LCOV - code coverage report
Current view: top level - basic - fd-util.c (source / functions) Hit Total Coverage
Test: main_coverage.info Lines: 259 443 58.5 %
Date: 2019-08-22 15:41:25 Functions: 19 23 82.6 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: LGPL-2.1+ */
       2             : 
       3             : #include <errno.h>
       4             : #include <fcntl.h>
       5             : #include <sys/resource.h>
       6             : #include <sys/socket.h>
       7             : #include <sys/stat.h>
       8             : #include <unistd.h>
       9             : 
      10             : #include "alloc-util.h"
      11             : #include "copy.h"
      12             : #include "dirent-util.h"
      13             : #include "fd-util.h"
      14             : #include "fileio.h"
      15             : #include "fs-util.h"
      16             : #include "io-util.h"
      17             : #include "macro.h"
      18             : #include "memfd-util.h"
      19             : #include "missing.h"
      20             : #include "parse-util.h"
      21             : #include "path-util.h"
      22             : #include "process-util.h"
      23             : #include "socket-util.h"
      24             : #include "stdio-util.h"
      25             : #include "util.h"
      26             : #include "tmpfile-util.h"
      27             : 
      28             : /* The maximum number of iterations in the loop to close descriptors in the fallback case
      29             :  * when /proc/self/fd/ is inaccessible. */
      30             : #define MAX_FD_LOOP_LIMIT (1024*1024)
      31             : 
      32      182907 : int close_nointr(int fd) {
      33      182907 :         assert(fd >= 0);
      34             : 
      35      182907 :         if (close(fd) >= 0)
      36      182902 :                 return 0;
      37             : 
      38             :         /*
      39             :          * Just ignore EINTR; a retry loop is the wrong thing to do on
      40             :          * Linux.
      41             :          *
      42             :          * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
      43             :          * https://bugzilla.gnome.org/show_bug.cgi?id=682819
      44             :          * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
      45             :          * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
      46             :          */
      47           5 :         if (errno == EINTR)
      48           0 :                 return 0;
      49             : 
      50           5 :         return -errno;
      51             : }
      52             : 
      53      367664 : int safe_close(int fd) {
      54             : 
      55             :         /*
      56             :          * Like close_nointr() but cannot fail. Guarantees errno is
      57             :          * unchanged. Is a NOP with negative fds passed, and returns
      58             :          * -1, so that it can be used in this syntax:
      59             :          *
      60             :          * fd = safe_close(fd);
      61             :          */
      62             : 
      63      367664 :         if (fd >= 0) {
      64      182885 :                 PROTECT_ERRNO;
      65             : 
      66             :                 /* The kernel might return pretty much any error code
      67             :                  * via close(), but the fd will be closed anyway. The
      68             :                  * only condition we want to check for here is whether
      69             :                  * the fd was invalid at all... */
      70             : 
      71      182885 :                 assert_se(close_nointr(fd) != -EBADF);
      72             :         }
      73             : 
      74      367664 :         return -1;
      75             : }
      76             : 
      77         384 : void safe_close_pair(int p[static 2]) {
      78         384 :         assert(p);
      79             : 
      80         384 :         if (p[0] == p[1]) {
      81             :                 /* Special case pairs which use the same fd in both
      82             :                  * directions... */
      83         333 :                 p[0] = p[1] = safe_close(p[0]);
      84         333 :                 return;
      85             :         }
      86             : 
      87          51 :         p[0] = safe_close(p[0]);
      88          51 :         p[1] = safe_close(p[1]);
      89             : }
      90             : 
      91         202 : void close_many(const int fds[], size_t n_fd) {
      92             :         size_t i;
      93             : 
      94         202 :         assert(fds || n_fd <= 0);
      95             : 
      96         218 :         for (i = 0; i < n_fd; i++)
      97          16 :                 safe_close(fds[i]);
      98         202 : }
      99             : 
     100       14608 : int fclose_nointr(FILE *f) {
     101       14608 :         assert(f);
     102             : 
     103             :         /* Same as close_nointr(), but for fclose() */
     104             : 
     105       14608 :         if (fclose(f) == 0)
     106       14608 :                 return 0;
     107             : 
     108           0 :         if (errno == EINTR)
     109           0 :                 return 0;
     110             : 
     111           0 :         return -errno;
     112             : }
     113             : 
     114       35792 : FILE* safe_fclose(FILE *f) {
     115             : 
     116             :         /* Same as safe_close(), but for fclose() */
     117             : 
     118       35792 :         if (f) {
     119       14608 :                 PROTECT_ERRNO;
     120             : 
     121       14608 :                 assert_se(fclose_nointr(f) != -EBADF);
     122             :         }
     123             : 
     124       35792 :         return NULL;
     125             : }
     126             : 
     127           0 : DIR* safe_closedir(DIR *d) {
     128             : 
     129           0 :         if (d) {
     130           0 :                 PROTECT_ERRNO;
     131             : 
     132           0 :                 assert_se(closedir(d) >= 0 || errno != EBADF);
     133             :         }
     134             : 
     135           0 :         return NULL;
     136             : }
     137             : 
     138        9873 : int fd_nonblock(int fd, bool nonblock) {
     139             :         int flags, nflags;
     140             : 
     141        9873 :         assert(fd >= 0);
     142             : 
     143        9873 :         flags = fcntl(fd, F_GETFL, 0);
     144        9873 :         if (flags < 0)
     145           0 :                 return -errno;
     146             : 
     147        9873 :         if (nonblock)
     148          24 :                 nflags = flags | O_NONBLOCK;
     149             :         else
     150        9849 :                 nflags = flags & ~O_NONBLOCK;
     151             : 
     152        9873 :         if (nflags == flags)
     153           4 :                 return 0;
     154             : 
     155        9869 :         if (fcntl(fd, F_SETFL, nflags) < 0)
     156           0 :                 return -errno;
     157             : 
     158        9869 :         return 0;
     159             : }
     160             : 
     161          53 : int fd_cloexec(int fd, bool cloexec) {
     162             :         int flags, nflags;
     163             : 
     164          53 :         assert(fd >= 0);
     165             : 
     166          53 :         flags = fcntl(fd, F_GETFD, 0);
     167          53 :         if (flags < 0)
     168           0 :                 return -errno;
     169             : 
     170          53 :         if (cloexec)
     171          20 :                 nflags = flags | FD_CLOEXEC;
     172             :         else
     173          33 :                 nflags = flags & ~FD_CLOEXEC;
     174             : 
     175          53 :         if (nflags == flags)
     176          35 :                 return 0;
     177             : 
     178          18 :         if (fcntl(fd, F_SETFD, nflags) < 0)
     179           0 :                 return -errno;
     180             : 
     181          18 :         return 0;
     182             : }
     183             : 
     184           2 : _pure_ static bool fd_in_set(int fd, const int fdset[], size_t n_fdset) {
     185             :         size_t i;
     186             : 
     187           2 :         assert(n_fdset == 0 || fdset);
     188             : 
     189           3 :         for (i = 0; i < n_fdset; i++)
     190           2 :                 if (fdset[i] == fd)
     191           1 :                         return true;
     192             : 
     193           1 :         return false;
     194             : }
     195             : 
     196           0 : static int get_max_fd(void) {
     197             :         struct rlimit rl;
     198             :         rlim_t m;
     199             : 
     200             :         /* Return the highest possible fd, based RLIMIT_NOFILE, but enforcing FD_SETSIZE-1 as lower boundary
     201             :          * and INT_MAX as upper boundary. */
     202             : 
     203           0 :         if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
     204           0 :                 return -errno;
     205             : 
     206           0 :         m = MAX(rl.rlim_cur, rl.rlim_max);
     207           0 :         if (m < FD_SETSIZE) /* Let's always cover at least 1024 fds */
     208           0 :                 return FD_SETSIZE-1;
     209             : 
     210           0 :         if (m == RLIM_INFINITY || m > INT_MAX) /* Saturate on overflow. After all fds are "int", hence can
     211             :                                                 * never be above INT_MAX */
     212           0 :                 return INT_MAX;
     213             : 
     214           0 :         return (int) (m - 1);
     215             : }
     216             : 
     217           1 : int close_all_fds(const int except[], size_t n_except) {
     218           1 :         _cleanup_closedir_ DIR *d = NULL;
     219             :         struct dirent *de;
     220           1 :         int r = 0;
     221             : 
     222           1 :         assert(n_except == 0 || except);
     223             : 
     224           1 :         d = opendir("/proc/self/fd");
     225           1 :         if (!d) {
     226             :                 int fd, max_fd;
     227             : 
     228             :                 /* When /proc isn't available (for example in chroots) the fallback is brute forcing through
     229             :                  * the fd table */
     230             : 
     231           0 :                 max_fd = get_max_fd();
     232           0 :                 if (max_fd < 0)
     233           0 :                         return max_fd;
     234             : 
     235             :                 /* Refuse to do the loop over more too many elements. It's better to fail immediately than to
     236             :                  * spin the CPU for a long time. */
     237           0 :                 if (max_fd > MAX_FD_LOOP_LIMIT)
     238           0 :                         return log_debug_errno(SYNTHETIC_ERRNO(EPERM),
     239             :                                                "/proc/self/fd is inaccessible. Refusing to loop over %d potential fds.",
     240             :                                                max_fd);
     241             : 
     242           0 :                 for (fd = 3; fd >= 0; fd = fd < max_fd ? fd + 1 : -1) {
     243             :                         int q;
     244             : 
     245           0 :                         if (fd_in_set(fd, except, n_except))
     246           0 :                                 continue;
     247             : 
     248           0 :                         q = close_nointr(fd);
     249           0 :                         if (q < 0 && q != -EBADF && r >= 0)
     250           0 :                                 r = q;
     251             :                 }
     252             : 
     253           0 :                 return r;
     254             :         }
     255             : 
     256           9 :         FOREACH_DIRENT(de, d, return -errno) {
     257           6 :                 int fd = -1, q;
     258             : 
     259           6 :                 if (safe_atoi(de->d_name, &fd) < 0)
     260             :                         /* Let's better ignore this, just in case */
     261           5 :                         continue;
     262             : 
     263           6 :                 if (fd < 3)
     264           3 :                         continue;
     265             : 
     266           3 :                 if (fd == dirfd(d))
     267           1 :                         continue;
     268             : 
     269           2 :                 if (fd_in_set(fd, except, n_except))
     270           1 :                         continue;
     271             : 
     272           1 :                 q = close_nointr(fd);
     273           1 :                 if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
     274           0 :                         r = q;
     275             :         }
     276             : 
     277           1 :         return r;
     278             : }
     279             : 
     280          20 : int same_fd(int a, int b) {
     281             :         struct stat sta, stb;
     282             :         pid_t pid;
     283             :         int r, fa, fb;
     284             : 
     285          20 :         assert(a >= 0);
     286          20 :         assert(b >= 0);
     287             : 
     288             :         /* Compares two file descriptors. Note that semantics are
     289             :          * quite different depending on whether we have kcmp() or we
     290             :          * don't. If we have kcmp() this will only return true for
     291             :          * dup()ed file descriptors, but not otherwise. If we don't
     292             :          * have kcmp() this will also return true for two fds of the same
     293             :          * file, created by separate open() calls. Since we use this
     294             :          * call mostly for filtering out duplicates in the fd store
     295             :          * this difference hopefully doesn't matter too much. */
     296             : 
     297          20 :         if (a == b)
     298           4 :                 return true;
     299             : 
     300             :         /* Try to use kcmp() if we have it. */
     301          16 :         pid = getpid_cached();
     302          16 :         r = kcmp(pid, pid, KCMP_FILE, a, b);
     303          16 :         if (r == 0)
     304           6 :                 return true;
     305          10 :         if (r > 0)
     306          10 :                 return false;
     307           0 :         if (!IN_SET(errno, ENOSYS, EACCES, EPERM))
     308           0 :                 return -errno;
     309             : 
     310             :         /* We don't have kcmp(), use fstat() instead. */
     311           0 :         if (fstat(a, &sta) < 0)
     312           0 :                 return -errno;
     313             : 
     314           0 :         if (fstat(b, &stb) < 0)
     315           0 :                 return -errno;
     316             : 
     317           0 :         if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
     318           0 :                 return false;
     319             : 
     320             :         /* We consider all device fds different, since two device fds
     321             :          * might refer to quite different device contexts even though
     322             :          * they share the same inode and backing dev_t. */
     323             : 
     324           0 :         if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
     325           0 :                 return false;
     326             : 
     327           0 :         if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
     328           0 :                 return false;
     329             : 
     330             :         /* The fds refer to the same inode on disk, let's also check
     331             :          * if they have the same fd flags. This is useful to
     332             :          * distinguish the read and write side of a pipe created with
     333             :          * pipe(). */
     334           0 :         fa = fcntl(a, F_GETFL);
     335           0 :         if (fa < 0)
     336           0 :                 return -errno;
     337             : 
     338           0 :         fb = fcntl(b, F_GETFL);
     339           0 :         if (fb < 0)
     340           0 :                 return -errno;
     341             : 
     342           0 :         return fa == fb;
     343             : }
     344             : 
     345           2 : void cmsg_close_all(struct msghdr *mh) {
     346             :         struct cmsghdr *cmsg;
     347             : 
     348           2 :         assert(mh);
     349             : 
     350           2 :         CMSG_FOREACH(cmsg, mh)
     351           0 :                 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
     352           0 :                         close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
     353           2 : }
     354             : 
     355           0 : bool fdname_is_valid(const char *s) {
     356             :         const char *p;
     357             : 
     358             :         /* Validates a name for $LISTEN_FDNAMES. We basically allow
     359             :          * everything ASCII that's not a control character. Also, as
     360             :          * special exception the ":" character is not allowed, as we
     361             :          * use that as field separator in $LISTEN_FDNAMES.
     362             :          *
     363             :          * Note that the empty string is explicitly allowed
     364             :          * here. However, we limit the length of the names to 255
     365             :          * characters. */
     366             : 
     367           0 :         if (!s)
     368           0 :                 return false;
     369             : 
     370           0 :         for (p = s; *p; p++) {
     371           0 :                 if (*p < ' ')
     372           0 :                         return false;
     373           0 :                 if (*p >= 127)
     374           0 :                         return false;
     375           0 :                 if (*p == ':')
     376           0 :                         return false;
     377             :         }
     378             : 
     379           0 :         return p - s < 256;
     380             : }
     381             : 
     382          29 : int fd_get_path(int fd, char **ret) {
     383             :         char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
     384             :         int r;
     385             : 
     386          29 :         xsprintf(procfs_path, "/proc/self/fd/%i", fd);
     387          29 :         r = readlink_malloc(procfs_path, ret);
     388          29 :         if (r == -ENOENT) {
     389             :                 /* ENOENT can mean two things: that the fd does not exist or that /proc is not mounted. Let's make
     390             :                  * things debuggable and distinguish the two. */
     391             : 
     392           0 :                 if (access("/proc/self/fd/", F_OK) < 0)
     393             :                         /* /proc is not available or not set up properly, we're most likely in some chroot
     394             :                          * environment. */
     395           0 :                         return errno == ENOENT ? -EOPNOTSUPP : -errno;
     396             : 
     397           0 :                 return -EBADF; /* The directory exists, hence it's the fd that doesn't. */
     398             :         }
     399             : 
     400          29 :         return r;
     401             : }
     402             : 
     403           0 : int move_fd(int from, int to, int cloexec) {
     404             :         int r;
     405             : 
     406             :         /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
     407             :          * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
     408             :          * off, if it is > 0 it is turned on. */
     409             : 
     410           0 :         if (from < 0)
     411           0 :                 return -EBADF;
     412           0 :         if (to < 0)
     413           0 :                 return -EBADF;
     414             : 
     415           0 :         if (from == to) {
     416             : 
     417           0 :                 if (cloexec >= 0) {
     418           0 :                         r = fd_cloexec(to, cloexec);
     419           0 :                         if (r < 0)
     420           0 :                                 return r;
     421             :                 }
     422             : 
     423           0 :                 return to;
     424             :         }
     425             : 
     426           0 :         if (cloexec < 0) {
     427             :                 int fl;
     428             : 
     429           0 :                 fl = fcntl(from, F_GETFD, 0);
     430           0 :                 if (fl < 0)
     431           0 :                         return -errno;
     432             : 
     433           0 :                 cloexec = !!(fl & FD_CLOEXEC);
     434             :         }
     435             : 
     436           0 :         r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
     437           0 :         if (r < 0)
     438           0 :                 return -errno;
     439             : 
     440           0 :         assert(r == to);
     441             : 
     442           0 :         safe_close(from);
     443             : 
     444           0 :         return to;
     445             : }
     446             : 
     447          29 : int acquire_data_fd(const void *data, size_t size, unsigned flags) {
     448             : 
     449          29 :         _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
     450          29 :         char pattern[] = "/dev/shm/data-fd-XXXXXX";
     451          29 :         _cleanup_close_ int fd = -1;
     452          29 :         int isz = 0, r;
     453             :         ssize_t n;
     454             :         off_t f;
     455             : 
     456          29 :         assert(data || size == 0);
     457             : 
     458             :         /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
     459             :          * complex than I wish it was. But here's why:
     460             :          *
     461             :          * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
     462             :          *    read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
     463             :          *
     464             :          * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
     465             :          *    a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
     466             :          *    clients can only bump their size to a system-wide limit, which might be quite low.
     467             :          *
     468             :          * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
     469             :          *    earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
     470             :          *    /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
     471             :          *
     472             :          * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
     473             :          *
     474             :          * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
     475             :          * figure. */
     476             : 
     477          29 :         if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
     478             :                 /* As a special case, return /dev/null if we have been called for an empty data block */
     479           4 :                 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
     480           4 :                 if (r < 0)
     481           0 :                         return -errno;
     482             : 
     483           4 :                 return r;
     484             :         }
     485             : 
     486          25 :         if ((flags & ACQUIRE_NO_MEMFD) == 0) {
     487          12 :                 fd = memfd_new("data-fd");
     488          12 :                 if (fd < 0)
     489           0 :                         goto try_pipe;
     490             : 
     491          12 :                 n = write(fd, data, size);
     492          12 :                 if (n < 0)
     493           0 :                         return -errno;
     494          12 :                 if ((size_t) n != size)
     495           0 :                         return -EIO;
     496             : 
     497          12 :                 f = lseek(fd, 0, SEEK_SET);
     498          12 :                 if (f != 0)
     499           0 :                         return -errno;
     500             : 
     501          12 :                 r = memfd_set_sealed(fd);
     502          12 :                 if (r < 0)
     503           0 :                         return r;
     504             : 
     505          12 :                 return TAKE_FD(fd);
     506             :         }
     507             : 
     508          13 : try_pipe:
     509          13 :         if ((flags & ACQUIRE_NO_PIPE) == 0) {
     510           5 :                 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
     511           0 :                         return -errno;
     512             : 
     513           5 :                 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
     514           5 :                 if (isz < 0)
     515           0 :                         return -errno;
     516             : 
     517           5 :                 if ((size_t) isz < size) {
     518           2 :                         isz = (int) size;
     519           2 :                         if (isz < 0 || (size_t) isz != size)
     520           0 :                                 return -E2BIG;
     521             : 
     522             :                         /* Try to bump the pipe size */
     523           2 :                         (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
     524             : 
     525             :                         /* See if that worked */
     526           2 :                         isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
     527           2 :                         if (isz < 0)
     528           0 :                                 return -errno;
     529             : 
     530           2 :                         if ((size_t) isz < size)
     531           0 :                                 goto try_dev_shm;
     532             :                 }
     533             : 
     534           5 :                 n = write(pipefds[1], data, size);
     535           5 :                 if (n < 0)
     536           0 :                         return -errno;
     537           5 :                 if ((size_t) n != size)
     538           0 :                         return -EIO;
     539             : 
     540           5 :                 (void) fd_nonblock(pipefds[0], false);
     541             : 
     542           5 :                 return TAKE_FD(pipefds[0]);
     543             :         }
     544             : 
     545           8 : try_dev_shm:
     546           8 :         if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
     547           5 :                 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
     548           5 :                 if (fd < 0)
     549           0 :                         goto try_dev_shm_without_o_tmpfile;
     550             : 
     551           5 :                 n = write(fd, data, size);
     552           5 :                 if (n < 0)
     553           0 :                         return -errno;
     554           5 :                 if ((size_t) n != size)
     555           0 :                         return -EIO;
     556             : 
     557             :                 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
     558           5 :                 return fd_reopen(fd, O_RDONLY|O_CLOEXEC);
     559             :         }
     560             : 
     561           3 : try_dev_shm_without_o_tmpfile:
     562           3 :         if ((flags & ACQUIRE_NO_REGULAR) == 0) {
     563           3 :                 fd = mkostemp_safe(pattern);
     564           3 :                 if (fd < 0)
     565           0 :                         return fd;
     566             : 
     567           3 :                 n = write(fd, data, size);
     568           3 :                 if (n < 0) {
     569           0 :                         r = -errno;
     570           0 :                         goto unlink_and_return;
     571             :                 }
     572           3 :                 if ((size_t) n != size) {
     573           0 :                         r = -EIO;
     574           0 :                         goto unlink_and_return;
     575             :                 }
     576             : 
     577             :                 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
     578           3 :                 r = open(pattern, O_RDONLY|O_CLOEXEC);
     579           3 :                 if (r < 0)
     580           0 :                         r = -errno;
     581             : 
     582           3 :         unlink_and_return:
     583           3 :                 (void) unlink(pattern);
     584           3 :                 return r;
     585             :         }
     586             : 
     587           0 :         return -EOPNOTSUPP;
     588             : }
     589             : 
     590             : /* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
     591             : #define DATA_FD_MEMORY_LIMIT (64U*1024U)
     592             : 
     593             : /* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
     594             : #define DATA_FD_TMP_LIMIT (1024U*1024U)
     595             : 
     596           3 : int fd_duplicate_data_fd(int fd) {
     597             : 
     598           3 :         _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
     599           3 :         _cleanup_free_ void *remains = NULL;
     600           3 :         size_t remains_size = 0;
     601             :         const char *td;
     602             :         struct stat st;
     603             :         int r;
     604             : 
     605             :         /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
     606             :          * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
     607             :          * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
     608             :          * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
     609             :          * /var/tmp. */
     610             : 
     611           3 :         if (fstat(fd, &st) < 0)
     612           0 :                 return -errno;
     613             : 
     614             :         /* For now, let's only accept regular files, sockets, pipes and char devices */
     615           3 :         if (S_ISDIR(st.st_mode))
     616           0 :                 return -EISDIR;
     617           3 :         if (S_ISLNK(st.st_mode))
     618           0 :                 return -ELOOP;
     619           3 :         if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
     620           0 :                 return -EBADFD;
     621             : 
     622             :         /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
     623             :          * that we use the reported regular file size only as a hint, given that there are plenty special files in
     624             :          * /proc and /sys which report a zero file size but can be read from. */
     625             : 
     626           3 :         if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {
     627             : 
     628             :                 /* Try a memfd first */
     629           3 :                 copy_fd = memfd_new("data-fd");
     630           3 :                 if (copy_fd >= 0) {
     631             :                         off_t f;
     632             : 
     633           3 :                         r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
     634           3 :                         if (r < 0)
     635           0 :                                 return r;
     636             : 
     637           3 :                         f = lseek(copy_fd, 0, SEEK_SET);
     638           3 :                         if (f != 0)
     639           0 :                                 return -errno;
     640             : 
     641           3 :                         if (r == 0) {
     642             :                                 /* Did it fit into the limit? If so, we are done. */
     643           2 :                                 r = memfd_set_sealed(copy_fd);
     644           2 :                                 if (r < 0)
     645           0 :                                         return r;
     646             : 
     647           2 :                                 return TAKE_FD(copy_fd);
     648             :                         }
     649             : 
     650             :                         /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
     651             : 
     652             :                 } else {
     653           0 :                         _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
     654             :                         int isz;
     655             : 
     656             :                         /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
     657             :                          * then block indefinitely when we hit the pipe size limit */
     658             : 
     659           0 :                         if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
     660           0 :                                 return -errno;
     661             : 
     662           0 :                         isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
     663           0 :                         if (isz < 0)
     664           0 :                                 return -errno;
     665             : 
     666             :                         /* Try to enlarge the pipe size if necessary */
     667           0 :                         if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
     668             : 
     669           0 :                                 (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
     670             : 
     671           0 :                                 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
     672           0 :                                 if (isz < 0)
     673           0 :                                         return -errno;
     674             :                         }
     675             : 
     676           0 :                         if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
     677             : 
     678           0 :                                 r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size, NULL, NULL);
     679           0 :                                 if (r < 0 && r != -EAGAIN)
     680           0 :                                         return r; /* If we get EAGAIN it could be because of the source or because of
     681             :                                                    * the destination fd, we can't know, as sendfile() and friends won't
     682             :                                                    * tell us. Hence, treat this as reason to fall back, just to be
     683             :                                                    * sure. */
     684           0 :                                 if (r == 0) {
     685             :                                         /* Everything fit in, yay! */
     686           0 :                                         (void) fd_nonblock(pipefds[0], false);
     687             : 
     688           0 :                                         return TAKE_FD(pipefds[0]);
     689             :                                 }
     690             : 
     691             :                                 /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
     692             :                                  * when writing the new file we incorporate this first. */
     693           0 :                                 copy_fd = TAKE_FD(pipefds[0]);
     694             :                         }
     695             :                 }
     696             :         }
     697             : 
     698             :         /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
     699           1 :         if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
     700           1 :             (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
     701             :                 off_t f;
     702             : 
     703           1 :                 tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
     704           1 :                 if (tmp_fd < 0)
     705           0 :                         return tmp_fd;
     706             : 
     707           1 :                 if (copy_fd >= 0) {
     708             :                         /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
     709             :                          * temporary file first. */
     710             : 
     711           1 :                         r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
     712           1 :                         if (r < 0)
     713           0 :                                 return r;
     714             : 
     715           1 :                         assert(r == 0);
     716             :                 }
     717             : 
     718           1 :                 if (remains_size > 0) {
     719             :                         /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
     720             :                          * failed copy operation, let's flush them out next. */
     721             : 
     722           0 :                         r = loop_write(tmp_fd, remains, remains_size, false);
     723           0 :                         if (r < 0)
     724           0 :                                 return r;
     725             :                 }
     726             : 
     727           1 :                 r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
     728           1 :                 if (r < 0)
     729           0 :                         return r;
     730           1 :                 if (r == 0)
     731           0 :                         goto finish;  /* Yay, it fit in */
     732             : 
     733             :                 /* It didn't fit in. Let's not forget to use what we already used */
     734           1 :                 f = lseek(tmp_fd, 0, SEEK_SET);
     735           1 :                 if (f != 0)
     736           0 :                         return -errno;
     737             : 
     738           1 :                 safe_close(copy_fd);
     739           1 :                 copy_fd = TAKE_FD(tmp_fd);
     740             : 
     741           1 :                 remains = mfree(remains);
     742           1 :                 remains_size = 0;
     743             :         }
     744             : 
     745             :         /* As last fallback use /var/tmp */
     746           1 :         r = var_tmp_dir(&td);
     747           1 :         if (r < 0)
     748           0 :                 return r;
     749             : 
     750           1 :         tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
     751           1 :         if (tmp_fd < 0)
     752           0 :                 return tmp_fd;
     753             : 
     754           1 :         if (copy_fd >= 0) {
     755             :                 /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
     756             :                  * into the temporary file first. */
     757           1 :                 r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
     758           1 :                 if (r < 0)
     759           0 :                         return r;
     760             : 
     761           1 :                 assert(r == 0);
     762             :         }
     763             : 
     764           1 :         if (remains_size > 0) {
     765             :                 /* Then, copy in any read but not yet written bytes. */
     766           0 :                 r = loop_write(tmp_fd, remains, remains_size, false);
     767           0 :                 if (r < 0)
     768           0 :                         return r;
     769             :         }
     770             : 
     771             :         /* Copy in the rest */
     772           1 :         r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
     773           1 :         if (r < 0)
     774           0 :                 return r;
     775             : 
     776           1 :         assert(r == 0);
     777             : 
     778           1 : finish:
     779             :         /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
     780             :          * file again */
     781             : 
     782           1 :         return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
     783             : }
     784             : 
     785         270 : int fd_move_above_stdio(int fd) {
     786             :         int flags, copy;
     787         270 :         PROTECT_ERRNO;
     788             : 
     789             :         /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
     790             :          * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
     791             :          * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
     792             :          * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
     793             :          * stdin/stdout/stderr of unrelated code.
     794             :          *
     795             :          * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
     796             :          * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
     797             :          * been closed before.
     798             :          *
     799             :          * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
     800             :          * error we simply return the original file descriptor, and we do not touch errno. */
     801             : 
     802         270 :         if (fd < 0 || fd > 2)
     803         269 :                 return fd;
     804             : 
     805           1 :         flags = fcntl(fd, F_GETFD, 0);
     806           1 :         if (flags < 0)
     807           0 :                 return fd;
     808             : 
     809           1 :         if (flags & FD_CLOEXEC)
     810           0 :                 copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
     811             :         else
     812           1 :                 copy = fcntl(fd, F_DUPFD, 3);
     813           1 :         if (copy < 0)
     814           0 :                 return fd;
     815             : 
     816           1 :         assert(copy > 2);
     817             : 
     818           1 :         (void) close(fd);
     819           1 :         return copy;
     820             : }
     821             : 
     822          16 : int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
     823             : 
     824          16 :         int fd[3] = { /* Put together an array of fds we work on */
     825             :                 original_input_fd,
     826             :                 original_output_fd,
     827             :                 original_error_fd
     828             :         };
     829             : 
     830             :         int r, i,
     831          16 :                 null_fd = -1,                /* if we open /dev/null, we store the fd to it here */
     832          16 :                 copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
     833             :         bool null_readable, null_writable;
     834             : 
     835             :         /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
     836             :          * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
     837             :          * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
     838             :          * on.
     839             :          *
     840             :          * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
     841             :          * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
     842             :          *
     843             :          * Note that when this function fails stdin/stdout/stderr might remain half set up!
     844             :          *
     845             :          * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
     846             :          * stdin/stdout/stderr). */
     847             : 
     848          16 :         null_readable = original_input_fd < 0;
     849          16 :         null_writable = original_output_fd < 0 || original_error_fd < 0;
     850             : 
     851             :         /* First step, open /dev/null once, if we need it */
     852          16 :         if (null_readable || null_writable) {
     853             : 
     854             :                 /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
     855           0 :                 null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
     856           0 :                                              null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
     857           0 :                 if (null_fd < 0) {
     858           0 :                         r = -errno;
     859           0 :                         goto finish;
     860             :                 }
     861             : 
     862             :                 /* If this fd is in the 0…2 range, let's move it out of it */
     863           0 :                 if (null_fd < 3) {
     864             :                         int copy;
     865             : 
     866           0 :                         copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
     867           0 :                         if (copy < 0) {
     868           0 :                                 r = -errno;
     869           0 :                                 goto finish;
     870             :                         }
     871             : 
     872           0 :                         safe_close(null_fd);
     873           0 :                         null_fd = copy;
     874             :                 }
     875             :         }
     876             : 
     877             :         /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
     878          64 :         for (i = 0; i < 3; i++) {
     879             : 
     880          48 :                 if (fd[i] < 0)
     881           0 :                         fd[i] = null_fd;        /* A negative parameter means: connect this one to /dev/null */
     882          48 :                 else if (fd[i] != i && fd[i] < 3) {
     883             :                         /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
     884           0 :                         copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
     885           0 :                         if (copy_fd[i] < 0) {
     886           0 :                                 r = -errno;
     887           0 :                                 goto finish;
     888             :                         }
     889             : 
     890           0 :                         fd[i] = copy_fd[i];
     891             :                 }
     892             :         }
     893             : 
     894             :         /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
     895             :          * have freedom to move them around. If the fds already were at the right places then the specific fds are
     896             :          * -1. Let's now move them to the right places. This is the point of no return. */
     897          64 :         for (i = 0; i < 3; i++) {
     898             : 
     899          48 :                 if (fd[i] == i) {
     900             : 
     901             :                         /* fd is already in place, but let's make sure O_CLOEXEC is off */
     902          32 :                         r = fd_cloexec(i, false);
     903          32 :                         if (r < 0)
     904           0 :                                 goto finish;
     905             : 
     906             :                 } else {
     907          16 :                         assert(fd[i] > 2);
     908             : 
     909          16 :                         if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
     910           0 :                                 r = -errno;
     911           0 :                                 goto finish;
     912             :                         }
     913             :                 }
     914             :         }
     915             : 
     916          16 :         r = 0;
     917             : 
     918          16 : finish:
     919             :         /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
     920             :          * fd passed in multiple times. */
     921          16 :         safe_close_above_stdio(original_input_fd);
     922          16 :         if (original_output_fd != original_input_fd)
     923          16 :                 safe_close_above_stdio(original_output_fd);
     924          16 :         if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
     925          16 :                 safe_close_above_stdio(original_error_fd);
     926             : 
     927             :         /* Close the copies we moved > 2 */
     928          64 :         for (i = 0; i < 3; i++)
     929          48 :                 safe_close(copy_fd[i]);
     930             : 
     931             :         /* Close our null fd, if it's > 2 */
     932          16 :         safe_close_above_stdio(null_fd);
     933             : 
     934          16 :         return r;
     935             : }
     936             : 
     937          31 : int fd_reopen(int fd, int flags) {
     938             :         char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
     939             :         int new_fd;
     940             : 
     941             :         /* Reopens the specified fd with new flags. This is useful for convert an O_PATH fd into a regular one, or to
     942             :          * turn O_RDWR fds into O_RDONLY fds.
     943             :          *
     944             :          * This doesn't work on sockets (since they cannot be open()ed, ever).
     945             :          *
     946             :          * This implicitly resets the file read index to 0. */
     947             : 
     948          31 :         xsprintf(procfs_path, "/proc/self/fd/%i", fd);
     949          31 :         new_fd = open(procfs_path, flags);
     950          31 :         if (new_fd < 0)
     951           0 :                 return -errno;
     952             : 
     953          31 :         return new_fd;
     954             : }
     955             : 
     956           1 : int read_nr_open(void) {
     957           1 :         _cleanup_free_ char *nr_open = NULL;
     958             :         int r;
     959             : 
     960             :         /* Returns the kernel's current fd limit, either by reading it of /proc/sys if that works, or using the
     961             :          * hard-coded default compiled-in value of current kernels (1M) if not. This call will never fail. */
     962             : 
     963           1 :         r = read_one_line_file("/proc/sys/fs/nr_open", &nr_open);
     964           1 :         if (r < 0)
     965           0 :                 log_debug_errno(r, "Failed to read /proc/sys/fs/nr_open, ignoring: %m");
     966             :         else {
     967             :                 int v;
     968             : 
     969           1 :                 r = safe_atoi(nr_open, &v);
     970           1 :                 if (r < 0)
     971           0 :                         log_debug_errno(r, "Failed to parse /proc/sys/fs/nr_open value '%s', ignoring: %m", nr_open);
     972             :                 else
     973           1 :                         return v;
     974             :         }
     975             : 
     976             :         /* If we fail, fallback to the hard-coded kernel limit of 1024 * 1024. */
     977           0 :         return 1024 * 1024;
     978             : }

Generated by: LCOV version 1.14