LCOV - code coverage report
Current view: top level - nspawn - nspawn-mount.c (source / functions) Hit Total Coverage
Test: main_coverage.info Lines: 4 670 0.6 %
Date: 2019-08-22 15:41:25 Functions: 1 29 3.4 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: LGPL-2.1+ */
       2             : 
       3             : #include <sys/mount.h>
       4             : #include <linux/magic.h>
       5             : 
       6             : #include "alloc-util.h"
       7             : #include "escape.h"
       8             : #include "fd-util.h"
       9             : #include "format-util.h"
      10             : #include "fs-util.h"
      11             : #include "label.h"
      12             : #include "mkdir.h"
      13             : #include "mount-util.h"
      14             : #include "mountpoint-util.h"
      15             : #include "nspawn-mount.h"
      16             : #include "parse-util.h"
      17             : #include "path-util.h"
      18             : #include "rm-rf.h"
      19             : #include "set.h"
      20             : #include "sort-util.h"
      21             : #include "stat-util.h"
      22             : #include "string-util.h"
      23             : #include "strv.h"
      24             : #include "tmpfile-util.h"
      25             : #include "user-util.h"
      26             : 
      27           0 : CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) {
      28             :         CustomMount *c, *ret;
      29             : 
      30           0 :         assert(l);
      31           0 :         assert(n);
      32           0 :         assert(t >= 0);
      33           0 :         assert(t < _CUSTOM_MOUNT_TYPE_MAX);
      34             : 
      35           0 :         c = reallocarray(*l, *n + 1, sizeof(CustomMount));
      36           0 :         if (!c)
      37           0 :                 return NULL;
      38             : 
      39           0 :         *l = c;
      40           0 :         ret = *l + *n;
      41           0 :         (*n)++;
      42             : 
      43           0 :         *ret = (CustomMount) { .type = t };
      44             : 
      45           0 :         return ret;
      46             : }
      47             : 
      48           4 : void custom_mount_free_all(CustomMount *l, size_t n) {
      49             :         size_t i;
      50             : 
      51           4 :         for (i = 0; i < n; i++) {
      52           0 :                 CustomMount *m = l + i;
      53             : 
      54           0 :                 free(m->source);
      55           0 :                 free(m->destination);
      56           0 :                 free(m->options);
      57             : 
      58           0 :                 if (m->work_dir) {
      59           0 :                         (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
      60           0 :                         free(m->work_dir);
      61             :                 }
      62             : 
      63           0 :                 if (m->rm_rf_tmpdir) {
      64           0 :                         (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
      65           0 :                         free(m->rm_rf_tmpdir);
      66             :                 }
      67             : 
      68           0 :                 strv_free(m->lower);
      69           0 :                 free(m->type_argument);
      70             :         }
      71             : 
      72           4 :         free(l);
      73           4 : }
      74             : 
      75           0 : static int custom_mount_compare(const CustomMount *a, const CustomMount *b) {
      76             :         int r;
      77             : 
      78           0 :         r = path_compare(a->destination, b->destination);
      79           0 :         if (r != 0)
      80           0 :                 return r;
      81             : 
      82           0 :         return CMP(a->type, b->type);
      83             : }
      84             : 
      85           0 : static bool source_path_is_valid(const char *p) {
      86           0 :         assert(p);
      87             : 
      88           0 :         if (*p == '+')
      89           0 :                 p++;
      90             : 
      91           0 :         return path_is_absolute(p);
      92             : }
      93             : 
      94           0 : static char *resolve_source_path(const char *dest, const char *source) {
      95             : 
      96           0 :         if (!source)
      97           0 :                 return NULL;
      98             : 
      99           0 :         if (source[0] == '+')
     100           0 :                 return path_join(dest, source + 1);
     101             : 
     102           0 :         return strdup(source);
     103             : }
     104             : 
     105           0 : int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
     106             :         size_t i;
     107             :         int r;
     108             : 
     109             :         /* Prepare all custom mounts. This will make source we know all temporary directories. This is called in the
     110             :          * parent process, so that we know the temporary directories to remove on exit before we fork off the
     111             :          * children. */
     112             : 
     113           0 :         assert(l || n == 0);
     114             : 
     115             :         /* Order the custom mounts, and make sure we have a working directory */
     116           0 :         typesafe_qsort(l, n, custom_mount_compare);
     117             : 
     118           0 :         for (i = 0; i < n; i++) {
     119           0 :                 CustomMount *m = l + i;
     120             : 
     121             :                 /* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
     122             :                  * already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
     123             :                  * particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
     124             :                  * the inner child, not the outer one. Determine this here. */
     125           0 :                 m->in_userns = path_startswith(m->destination, "/proc");
     126             : 
     127           0 :                 if (m->type == CUSTOM_MOUNT_BIND) {
     128           0 :                         if (m->source) {
     129             :                                 char *s;
     130             : 
     131           0 :                                 s = resolve_source_path(dest, m->source);
     132           0 :                                 if (!s)
     133           0 :                                         return log_oom();
     134             : 
     135           0 :                                 free_and_replace(m->source, s);
     136             :                         } else {
     137             :                                 /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
     138             : 
     139           0 :                                 m->rm_rf_tmpdir = strdup("/var/tmp/nspawn-temp-XXXXXX");
     140           0 :                                 if (!m->rm_rf_tmpdir)
     141           0 :                                         return log_oom();
     142             : 
     143           0 :                                 if (!mkdtemp(m->rm_rf_tmpdir)) {
     144           0 :                                         m->rm_rf_tmpdir = mfree(m->rm_rf_tmpdir);
     145           0 :                                         return log_error_errno(errno, "Failed to acquire temporary directory: %m");
     146             :                                 }
     147             : 
     148           0 :                                 m->source = path_join(m->rm_rf_tmpdir, "src");
     149           0 :                                 if (!m->source)
     150           0 :                                         return log_oom();
     151             : 
     152           0 :                                 if (mkdir(m->source, 0755) < 0)
     153           0 :                                         return log_error_errno(errno, "Failed to create %s: %m", m->source);
     154             :                         }
     155             :                 }
     156             : 
     157           0 :                 if (m->type == CUSTOM_MOUNT_OVERLAY) {
     158             :                         char **j;
     159             : 
     160           0 :                         STRV_FOREACH(j, m->lower) {
     161             :                                 char *s;
     162             : 
     163           0 :                                 s = resolve_source_path(dest, *j);
     164           0 :                                 if (!s)
     165           0 :                                         return log_oom();
     166             : 
     167           0 :                                 free_and_replace(*j, s);
     168             :                         }
     169             : 
     170           0 :                         if (m->work_dir) {
     171             :                                 char *s;
     172             : 
     173           0 :                                 s = resolve_source_path(dest, m->work_dir);
     174           0 :                                 if (!s)
     175           0 :                                         return log_oom();
     176             : 
     177           0 :                                 free_and_replace(m->work_dir, s);
     178             :                         } else {
     179           0 :                                 assert(m->source);
     180             : 
     181           0 :                                 r = tempfn_random(m->source, NULL, &m->work_dir);
     182           0 :                                 if (r < 0)
     183           0 :                                         return log_error_errno(r, "Failed to acquire working directory: %m");
     184             :                         }
     185             : 
     186           0 :                         (void) mkdir_label(m->work_dir, 0700);
     187             :                 }
     188             :         }
     189             : 
     190           0 :         return 0;
     191             : }
     192             : 
     193           0 : int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
     194           0 :         _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
     195           0 :         const char *p = s;
     196             :         CustomMount *m;
     197             :         int r;
     198             : 
     199           0 :         assert(l);
     200           0 :         assert(n);
     201             : 
     202           0 :         r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
     203           0 :         if (r < 0)
     204           0 :                 return r;
     205           0 :         if (r == 0)
     206           0 :                 return -EINVAL;
     207           0 :         if (r == 1) {
     208           0 :                 destination = strdup(source[0] == '+' ? source+1 : source);
     209           0 :                 if (!destination)
     210           0 :                         return -ENOMEM;
     211             :         }
     212           0 :         if (r == 2 && !isempty(p)) {
     213           0 :                 opts = strdup(p);
     214           0 :                 if (!opts)
     215           0 :                         return -ENOMEM;
     216             :         }
     217             : 
     218           0 :         if (isempty(source))
     219           0 :                 source = mfree(source);
     220           0 :         else if (!source_path_is_valid(source))
     221           0 :                 return -EINVAL;
     222             : 
     223           0 :         if (!path_is_absolute(destination))
     224           0 :                 return -EINVAL;
     225           0 :         if (empty_or_root(destination))
     226           0 :                 return -EINVAL;
     227             : 
     228           0 :         m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
     229           0 :         if (!m)
     230           0 :                 return -ENOMEM;
     231             : 
     232           0 :         m->source = TAKE_PTR(source);
     233           0 :         m->destination = TAKE_PTR(destination);
     234           0 :         m->read_only = read_only;
     235           0 :         m->options = TAKE_PTR(opts);
     236             : 
     237           0 :         return 0;
     238             : }
     239             : 
     240           0 : int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
     241           0 :         _cleanup_free_ char *path = NULL, *opts = NULL;
     242           0 :         const char *p = s;
     243             :         CustomMount *m;
     244             :         int r;
     245             : 
     246           0 :         assert(l);
     247           0 :         assert(n);
     248           0 :         assert(s);
     249             : 
     250           0 :         r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
     251           0 :         if (r < 0)
     252           0 :                 return r;
     253           0 :         if (r == 0)
     254           0 :                 return -EINVAL;
     255             : 
     256           0 :         if (isempty(p))
     257           0 :                 opts = strdup("mode=0755");
     258             :         else
     259           0 :                 opts = strdup(p);
     260           0 :         if (!opts)
     261           0 :                 return -ENOMEM;
     262             : 
     263           0 :         if (!path_is_absolute(path))
     264           0 :                 return -EINVAL;
     265           0 :         if (empty_or_root(path))
     266           0 :                 return -EINVAL;
     267             : 
     268           0 :         m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
     269           0 :         if (!m)
     270           0 :                 return -ENOMEM;
     271             : 
     272           0 :         m->destination = TAKE_PTR(path);
     273           0 :         m->options = TAKE_PTR(opts);
     274             : 
     275           0 :         return 0;
     276             : }
     277             : 
     278           0 : int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
     279           0 :         _cleanup_free_ char *upper = NULL, *destination = NULL;
     280           0 :         _cleanup_strv_free_ char **lower = NULL;
     281             :         CustomMount *m;
     282             :         int k;
     283             : 
     284           0 :         k = strv_split_extract(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
     285           0 :         if (k < 0)
     286           0 :                 return k;
     287           0 :         if (k < 2)
     288           0 :                 return -EADDRNOTAVAIL;
     289           0 :         if (k == 2) {
     290             :                 /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
     291             :                  * we'll also define the destination mount point the same as the upper. */
     292             : 
     293           0 :                 if (!source_path_is_valid(lower[0]) ||
     294           0 :                     !source_path_is_valid(lower[1]))
     295           0 :                         return -EINVAL;
     296             : 
     297           0 :                 upper = TAKE_PTR(lower[1]);
     298             : 
     299           0 :                 destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
     300           0 :                 if (!destination)
     301           0 :                         return -ENOMEM;
     302             :         } else {
     303             :                 char **i;
     304             : 
     305             :                 /* If more than two parameters are specified, the last one is the destination, the second to last one
     306             :                  * the "upper", and all before that the "lower" directories. */
     307             : 
     308           0 :                 destination = lower[k - 1];
     309           0 :                 upper = TAKE_PTR(lower[k - 2]);
     310             : 
     311           0 :                 STRV_FOREACH(i, lower)
     312           0 :                         if (!source_path_is_valid(*i))
     313           0 :                                 return -EINVAL;
     314             : 
     315             :                 /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
     316             :                  * in /var/tmp */
     317           0 :                 if (isempty(upper))
     318           0 :                         upper = mfree(upper);
     319           0 :                 else if (!source_path_is_valid(upper))
     320           0 :                         return -EINVAL;
     321             : 
     322           0 :                 if (!path_is_absolute(destination))
     323           0 :                         return -EINVAL;
     324             :         }
     325             : 
     326           0 :         if (empty_or_root(destination))
     327           0 :                 return -EINVAL;
     328             : 
     329           0 :         m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
     330           0 :         if (!m)
     331           0 :                 return -ENOMEM;
     332             : 
     333           0 :         m->destination = TAKE_PTR(destination);
     334           0 :         m->source = TAKE_PTR(upper);
     335           0 :         m->lower = TAKE_PTR(lower);
     336           0 :         m->read_only = read_only;
     337             : 
     338           0 :         return 0;
     339             : }
     340             : 
     341           0 : int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s) {
     342           0 :         _cleanup_free_ char *path = NULL;
     343             :         CustomMount *m;
     344             : 
     345           0 :         assert(l);
     346           0 :         assert(n);
     347           0 :         assert(s);
     348             : 
     349           0 :         if (!path_is_absolute(s))
     350           0 :                 return -EINVAL;
     351             : 
     352           0 :         path = strdup(s);
     353           0 :         if (!path)
     354           0 :                 return -ENOMEM;
     355             : 
     356           0 :         m = custom_mount_add(l, n, CUSTOM_MOUNT_INACCESSIBLE);
     357           0 :         if (!m)
     358           0 :                 return -ENOMEM;
     359             : 
     360           0 :         m->destination = TAKE_PTR(path);
     361           0 :         return 0;
     362             : }
     363             : 
     364           0 : int tmpfs_patch_options(
     365             :                 const char *options,
     366             :                 uid_t uid_shift,
     367             :                 const char *selinux_apifs_context,
     368             :                 char **ret) {
     369             : 
     370           0 :         char *buf = NULL;
     371             : 
     372           0 :         if (uid_shift != UID_INVALID) {
     373           0 :                 if (asprintf(&buf, "%s%suid=" UID_FMT ",gid=" UID_FMT,
     374             :                              strempty(options), options ? "," : "",
     375             :                              uid_shift, uid_shift) < 0)
     376           0 :                         return -ENOMEM;
     377             : 
     378           0 :                 options = buf;
     379             :         }
     380             : 
     381             : #if HAVE_SELINUX
     382           0 :         if (selinux_apifs_context) {
     383             :                 char *t;
     384             : 
     385           0 :                 t = strjoin(strempty(options), options ? "," : "",
     386             :                             "context=\"", selinux_apifs_context, "\"");
     387           0 :                 free(buf);
     388           0 :                 if (!t)
     389           0 :                         return -ENOMEM;
     390             : 
     391           0 :                 buf = t;
     392             :         }
     393             : #endif
     394             : 
     395           0 :         if (!buf && options) {
     396           0 :                 buf = strdup(options);
     397           0 :                 if (!buf)
     398           0 :                         return -ENOMEM;
     399             :         }
     400           0 :         *ret = buf;
     401             : 
     402           0 :         return !!buf;
     403             : }
     404             : 
     405           0 : int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
     406             :         const char *full, *top, *x;
     407             :         int r;
     408           0 :         unsigned long extra_flags = 0;
     409             : 
     410           0 :         top = prefix_roota(dest, "/sys");
     411           0 :         r = path_is_fs_type(top, SYSFS_MAGIC);
     412           0 :         if (r < 0)
     413           0 :                 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
     414             :         /* /sys might already be mounted as sysfs by the outer child in the
     415             :          * !netns case. In this case, it's all good. Don't touch it because we
     416             :          * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
     417             :          */
     418           0 :         if (r > 0)
     419           0 :                 return 0;
     420             : 
     421           0 :         full = prefix_roota(top, "/full");
     422             : 
     423           0 :         (void) mkdir(full, 0755);
     424             : 
     425           0 :         if (mount_settings & MOUNT_APPLY_APIVFS_RO)
     426           0 :                 extra_flags |= MS_RDONLY;
     427             : 
     428           0 :         r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs",
     429             :                           MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
     430           0 :         if (r < 0)
     431           0 :                 return r;
     432             : 
     433           0 :         FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
     434           0 :                 _cleanup_free_ char *from = NULL, *to = NULL;
     435             : 
     436           0 :                 from = path_join(full, x);
     437           0 :                 if (!from)
     438           0 :                         return log_oom();
     439             : 
     440           0 :                 to = path_join(top, x);
     441           0 :                 if (!to)
     442           0 :                         return log_oom();
     443             : 
     444           0 :                 (void) mkdir(to, 0755);
     445             : 
     446           0 :                 r = mount_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
     447           0 :                 if (r < 0)
     448           0 :                         return r;
     449             : 
     450           0 :                 r = mount_verbose(LOG_ERR, NULL, to, NULL,
     451             :                                   MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
     452           0 :                 if (r < 0)
     453           0 :                         return r;
     454             :         }
     455             : 
     456           0 :         r = umount_verbose(full);
     457           0 :         if (r < 0)
     458           0 :                 return r;
     459             : 
     460           0 :         if (rmdir(full) < 0)
     461           0 :                 return log_error_errno(errno, "Failed to remove %s: %m", full);
     462             : 
     463             :         /* Create mountpoint for cgroups. Otherwise we are not allowed since we
     464             :          * remount /sys read-only.
     465             :          */
     466           0 :         x = prefix_roota(top, "/fs/cgroup");
     467           0 :         (void) mkdir_p(x, 0755);
     468             : 
     469           0 :         return mount_verbose(LOG_ERR, NULL, top, NULL,
     470             :                              MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
     471             : }
     472             : 
     473           0 : static int mkdir_userns(const char *path, mode_t mode, uid_t uid_shift) {
     474             :         int r;
     475             : 
     476           0 :         assert(path);
     477             : 
     478           0 :         r = mkdir_errno_wrapper(path, mode);
     479           0 :         if (r < 0 && r != -EEXIST)
     480           0 :                 return r;
     481             : 
     482           0 :         if (uid_shift == UID_INVALID)
     483           0 :                 return 0;
     484             : 
     485           0 :         if (lchown(path, uid_shift, uid_shift) < 0)
     486           0 :                 return -errno;
     487             : 
     488           0 :         return 0;
     489             : }
     490             : 
     491           0 : static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, uid_t uid_shift) {
     492             :         const char *p, *e;
     493             :         int r;
     494             : 
     495           0 :         assert(path);
     496             : 
     497           0 :         if (prefix && !path_startswith(path, prefix))
     498           0 :                 return -ENOTDIR;
     499             : 
     500             :         /* create every parent directory in the path, except the last component */
     501           0 :         p = path + strspn(path, "/");
     502           0 :         for (;;) {
     503           0 :                 char t[strlen(path) + 1];
     504             : 
     505           0 :                 e = p + strcspn(p, "/");
     506           0 :                 p = e + strspn(e, "/");
     507             : 
     508             :                 /* Is this the last component? If so, then we're done */
     509           0 :                 if (*p == 0)
     510           0 :                         break;
     511             : 
     512           0 :                 memcpy(t, path, e - path);
     513           0 :                 t[e-path] = 0;
     514             : 
     515           0 :                 if (prefix && path_startswith(prefix, t))
     516           0 :                         continue;
     517             : 
     518           0 :                 r = mkdir_userns(t, mode, uid_shift);
     519           0 :                 if (r < 0)
     520           0 :                         return r;
     521             :         }
     522             : 
     523           0 :         return mkdir_userns(path, mode, uid_shift);
     524             : }
     525             : 
     526           0 : int mount_all(const char *dest,
     527             :               MountSettingsMask mount_settings,
     528             :               uid_t uid_shift,
     529             :               const char *selinux_apifs_context) {
     530             : 
     531             : #define PROC_INACCESSIBLE_REG(path)                                     \
     532             :         { "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
     533             :           MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
     534             :         { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
     535             :           MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
     536             : 
     537             : #define PROC_READ_ONLY(path)                                            \
     538             :         { (path), (path), NULL, NULL, MS_BIND,                          \
     539             :           MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
     540             :         { NULL,   (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
     541             :           MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
     542             : 
     543             :         typedef struct MountPoint {
     544             :                 const char *what;
     545             :                 const char *where;
     546             :                 const char *type;
     547             :                 const char *options;
     548             :                 unsigned long flags;
     549             :                 MountSettingsMask mount_settings;
     550             :         } MountPoint;
     551             : 
     552             :         static const MountPoint mount_table[] = {
     553             :                 /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing) */
     554             :                 { "proc",            "/proc",           "proc",  NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV,
     555             :                   MOUNT_FATAL|MOUNT_IN_USERNS },
     556             : 
     557             :                 { "/proc/sys",       "/proc/sys",       NULL,    NULL,        MS_BIND,
     558             :                   MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* Bind mount first ... */
     559             : 
     560             :                 { "/proc/sys/net",   "/proc/sys/net",   NULL,    NULL,        MS_BIND,
     561             :                   MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
     562             : 
     563             :                 { NULL,              "/proc/sys",       NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
     564             :                   MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO },                          /* ... then, make it r/o */
     565             : 
     566             :                 /* Make these files inaccessible to container payloads: they potentially leak information about kernel
     567             :                  * internals or the host's execution environment to the container */
     568             :                 PROC_INACCESSIBLE_REG("/proc/kallsyms"),
     569             :                 PROC_INACCESSIBLE_REG("/proc/kcore"),
     570             :                 PROC_INACCESSIBLE_REG("/proc/keys"),
     571             :                 PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
     572             :                 PROC_INACCESSIBLE_REG("/proc/timer_list"),
     573             : 
     574             :                 /* Make these directories read-only to container payloads: they show hardware information, and in some
     575             :                  * cases contain tunables the container really shouldn't have access to. */
     576             :                 PROC_READ_ONLY("/proc/acpi"),
     577             :                 PROC_READ_ONLY("/proc/apm"),
     578             :                 PROC_READ_ONLY("/proc/asound"),
     579             :                 PROC_READ_ONLY("/proc/bus"),
     580             :                 PROC_READ_ONLY("/proc/fs"),
     581             :                 PROC_READ_ONLY("/proc/irq"),
     582             :                 PROC_READ_ONLY("/proc/scsi"),
     583             : 
     584             :                 { "mqueue",          "/dev/mqueue",     "mqueue", NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV,
     585             :                   MOUNT_IN_USERNS },
     586             : 
     587             :                 /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */
     588             :                 { "tmpfs",           "/tmp",            "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
     589             :                   MOUNT_FATAL|MOUNT_APPLY_TMPFS_TMP },
     590             :                 { "tmpfs",           "/sys",            "tmpfs", "mode=555",  MS_NOSUID|MS_NOEXEC|MS_NODEV,
     591             :                   MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS },
     592             :                 { "sysfs",           "/sys",            "sysfs", NULL,        MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
     593             :                   MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO },    /* skipped if above was mounted */
     594             :                 { "sysfs",           "/sys",            "sysfs", NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV,
     595             :                   MOUNT_FATAL },                          /* skipped if above was mounted */
     596             :                 { "tmpfs",           "/dev",            "tmpfs", "mode=755",  MS_NOSUID|MS_STRICTATIME,
     597             :                   MOUNT_FATAL },
     598             :                 { "tmpfs",           "/dev/shm",        "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
     599             :                   MOUNT_FATAL },
     600             :                 { "tmpfs",           "/run",            "tmpfs", "mode=755",  MS_NOSUID|MS_NODEV|MS_STRICTATIME,
     601             :                   MOUNT_FATAL },
     602             : 
     603             : #if HAVE_SELINUX
     604             :                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL,    NULL,        MS_BIND,
     605             :                   0 },  /* Bind mount first */
     606             :                 { NULL,              "/sys/fs/selinux", NULL,    NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
     607             :                   0 },  /* Then, make it r/o */
     608             : #endif
     609             :         };
     610             : 
     611           0 :         bool use_userns = (mount_settings & MOUNT_USE_USERNS);
     612           0 :         bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS);
     613           0 :         bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO);
     614           0 :         bool in_userns = (mount_settings & MOUNT_IN_USERNS);
     615           0 :         bool tmpfs_tmp = (mount_settings & MOUNT_APPLY_TMPFS_TMP);
     616             :         size_t k;
     617             :         int r;
     618             : 
     619           0 :         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
     620           0 :                 _cleanup_free_ char *where = NULL, *options = NULL;
     621             :                 const char *o;
     622           0 :                 bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL);
     623             : 
     624           0 :                 if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS))
     625           0 :                         continue;
     626             : 
     627           0 :                 if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS))
     628           0 :                         continue;
     629             : 
     630           0 :                 if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO))
     631           0 :                         continue;
     632             : 
     633           0 :                 if (!tmpfs_tmp && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_TMPFS_TMP))
     634           0 :                         continue;
     635             : 
     636           0 :                 r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where);
     637           0 :                 if (r < 0)
     638           0 :                         return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where);
     639             : 
     640             :                 /* Skip this entry if it is not a remount. */
     641           0 :                 if (mount_table[k].what) {
     642           0 :                         r = path_is_mount_point(where, NULL, 0);
     643           0 :                         if (r < 0 && r != -ENOENT)
     644           0 :                                 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
     645           0 :                         if (r > 0)
     646           0 :                                 continue;
     647             :                 }
     648             : 
     649           0 :                 r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID);
     650           0 :                 if (r < 0 && r != -EEXIST) {
     651           0 :                         if (fatal && r != -EROFS)
     652           0 :                                 return log_error_errno(r, "Failed to create directory %s: %m", where);
     653             : 
     654           0 :                         log_debug_errno(r, "Failed to create directory %s: %m", where);
     655             :                         /* If we failed mkdir() or chown() due to the root
     656             :                          * directory being read only, attempt to mount this fs
     657             :                          * anyway and let mount_verbose log any errors */
     658           0 :                         if (r != -EROFS)
     659           0 :                                 continue;
     660             :                 }
     661             : 
     662           0 :                 o = mount_table[k].options;
     663           0 :                 if (streq_ptr(mount_table[k].type, "tmpfs")) {
     664           0 :                         r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options);
     665           0 :                         if (r < 0)
     666           0 :                                 return log_oom();
     667           0 :                         if (r > 0)
     668           0 :                                 o = options;
     669             :                 }
     670             : 
     671           0 :                 r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG,
     672             :                                   mount_table[k].what,
     673             :                                   where,
     674             :                                   mount_table[k].type,
     675             :                                   mount_table[k].flags,
     676             :                                   o);
     677           0 :                 if (r < 0 && fatal)
     678           0 :                         return r;
     679             :         }
     680             : 
     681           0 :         return 0;
     682             : }
     683             : 
     684           0 : static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
     685           0 :         const char *p = options;
     686           0 :         unsigned long flags = *mount_flags;
     687           0 :         char *opts = NULL;
     688             :         int r;
     689             : 
     690           0 :         assert(options);
     691             : 
     692           0 :         for (;;) {
     693           0 :                 _cleanup_free_ char *word = NULL;
     694             : 
     695           0 :                 r = extract_first_word(&p, &word, ",", 0);
     696           0 :                 if (r < 0)
     697           0 :                         return log_error_errno(r, "Failed to extract mount option: %m");
     698           0 :                 if (r == 0)
     699           0 :                         break;
     700             : 
     701           0 :                 if (streq(word, "rbind"))
     702           0 :                         flags |= MS_REC;
     703           0 :                 else if (streq(word, "norbind"))
     704           0 :                         flags &= ~MS_REC;
     705             :                 else {
     706           0 :                         log_error("Invalid bind mount option: %s", word);
     707           0 :                         return -EINVAL;
     708             :                 }
     709             :         }
     710             : 
     711           0 :         *mount_flags = flags;
     712             :         /* in the future mount_opts will hold string options for mount(2) */
     713           0 :         *mount_opts = opts;
     714             : 
     715           0 :         return 0;
     716             : }
     717             : 
     718           0 : static int mount_bind(const char *dest, CustomMount *m) {
     719           0 :         _cleanup_free_ char *mount_opts = NULL, *where = NULL;
     720           0 :         unsigned long mount_flags = MS_BIND | MS_REC;
     721             :         struct stat source_st, dest_st;
     722             :         int r;
     723             : 
     724           0 :         assert(dest);
     725           0 :         assert(m);
     726             : 
     727           0 :         if (m->options) {
     728           0 :                 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
     729           0 :                 if (r < 0)
     730           0 :                         return r;
     731             :         }
     732             : 
     733           0 :         if (stat(m->source, &source_st) < 0)
     734           0 :                 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
     735             : 
     736           0 :         r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
     737           0 :         if (r < 0)
     738           0 :                 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
     739           0 :         if (r > 0) { /* Path exists already? */
     740             : 
     741           0 :                 if (stat(where, &dest_st) < 0)
     742           0 :                         return log_error_errno(errno, "Failed to stat %s: %m", where);
     743             : 
     744           0 :                 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode))
     745           0 :                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
     746             :                                                "Cannot bind mount directory %s on file %s.",
     747             :                                                m->source, where);
     748             : 
     749           0 :                 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode))
     750           0 :                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
     751             :                                                "Cannot bind mount file %s on directory %s.",
     752             :                                                m->source, where);
     753             : 
     754             :         } else { /* Path doesn't exist yet? */
     755           0 :                 r = mkdir_parents_label(where, 0755);
     756           0 :                 if (r < 0)
     757           0 :                         return log_error_errno(r, "Failed to make parents of %s: %m", where);
     758             : 
     759             :                 /* Create the mount point. Any non-directory file can be
     760             :                 * mounted on any non-directory file (regular, fifo, socket,
     761             :                 * char, block).
     762             :                 */
     763           0 :                 if (S_ISDIR(source_st.st_mode))
     764           0 :                         r = mkdir_label(where, 0755);
     765             :                 else
     766           0 :                         r = touch(where);
     767           0 :                 if (r < 0)
     768           0 :                         return log_error_errno(r, "Failed to create mount point %s: %m", where);
     769             :         }
     770             : 
     771           0 :         r = mount_verbose(LOG_ERR, m->source, where, NULL, mount_flags, mount_opts);
     772           0 :         if (r < 0)
     773           0 :                 return r;
     774             : 
     775           0 :         if (m->read_only) {
     776           0 :                 r = bind_remount_recursive(where, MS_RDONLY, MS_RDONLY, NULL);
     777           0 :                 if (r < 0)
     778           0 :                         return log_error_errno(r, "Read-only bind mount failed: %m");
     779             :         }
     780             : 
     781           0 :         return 0;
     782             : }
     783             : 
     784           0 : static int mount_tmpfs(
     785             :                 const char *dest,
     786             :                 CustomMount *m,
     787             :                 bool userns, uid_t uid_shift, uid_t uid_range,
     788             :                 const char *selinux_apifs_context) {
     789             : 
     790             :         const char *options;
     791           0 :         _cleanup_free_ char *buf = NULL, *where = NULL;
     792             :         int r;
     793             : 
     794           0 :         assert(dest);
     795           0 :         assert(m);
     796             : 
     797           0 :         r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
     798           0 :         if (r < 0)
     799           0 :                 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
     800           0 :         if (r == 0) { /* Doesn't exist yet? */
     801           0 :                 r = mkdir_p_label(where, 0755);
     802           0 :                 if (r < 0)
     803           0 :                         return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
     804             :         }
     805             : 
     806           0 :         r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
     807           0 :         if (r < 0)
     808           0 :                 return log_oom();
     809           0 :         options = r > 0 ? buf : m->options;
     810             : 
     811           0 :         return mount_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
     812             : }
     813             : 
     814           0 : static char *joined_and_escaped_lower_dirs(char **lower) {
     815           0 :         _cleanup_strv_free_ char **sv = NULL;
     816             : 
     817           0 :         sv = strv_copy(lower);
     818           0 :         if (!sv)
     819           0 :                 return NULL;
     820             : 
     821           0 :         strv_reverse(sv);
     822             : 
     823           0 :         if (!strv_shell_escape(sv, ",:"))
     824           0 :                 return NULL;
     825             : 
     826           0 :         return strv_join(sv, ":");
     827             : }
     828             : 
     829           0 : static int mount_overlay(const char *dest, CustomMount *m) {
     830           0 :         _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
     831             :         const char *options;
     832             :         int r;
     833             : 
     834           0 :         assert(dest);
     835           0 :         assert(m);
     836             : 
     837           0 :         r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
     838           0 :         if (r < 0)
     839           0 :                 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
     840           0 :         if (r == 0) { /* Doesn't exist yet? */
     841           0 :                 r = mkdir_label(where, 0755);
     842           0 :                 if (r < 0)
     843           0 :                         return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
     844             :         }
     845             : 
     846           0 :         (void) mkdir_p_label(m->source, 0755);
     847             : 
     848           0 :         lower = joined_and_escaped_lower_dirs(m->lower);
     849           0 :         if (!lower)
     850           0 :                 return log_oom();
     851             : 
     852           0 :         escaped_source = shell_escape(m->source, ",:");
     853           0 :         if (!escaped_source)
     854           0 :                 return log_oom();
     855             : 
     856           0 :         if (m->read_only)
     857           0 :                 options = strjoina("lowerdir=", escaped_source, ":", lower);
     858             :         else {
     859           0 :                 _cleanup_free_ char *escaped_work_dir = NULL;
     860             : 
     861           0 :                 escaped_work_dir = shell_escape(m->work_dir, ",:");
     862           0 :                 if (!escaped_work_dir)
     863           0 :                         return log_oom();
     864             : 
     865           0 :                 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
     866             :         }
     867             : 
     868           0 :         return mount_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
     869             : }
     870             : 
     871           0 : static int mount_inaccessible(const char *dest, CustomMount *m) {
     872           0 :         _cleanup_free_ char *where = NULL;
     873             :         const char *source;
     874             :         struct stat st;
     875             :         int r;
     876             : 
     877           0 :         assert(dest);
     878           0 :         assert(m);
     879             : 
     880           0 :         r = chase_symlinks_and_stat(m->destination, dest, CHASE_PREFIX_ROOT, &where, &st);
     881           0 :         if (r < 0) {
     882           0 :                 log_full_errno(m->graceful ? LOG_DEBUG : LOG_ERR, r, "Failed to resolve %s/%s: %m", dest, m->destination);
     883           0 :                 return m->graceful ? 0 : r;
     884             :         }
     885             : 
     886           0 :         assert_se(source = mode_to_inaccessible_node(st.st_mode));
     887             : 
     888           0 :         r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
     889           0 :         if (r < 0)
     890           0 :                 return m->graceful ? 0 : r;
     891             : 
     892           0 :         r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
     893           0 :         if (r < 0) {
     894           0 :                 umount_verbose(where);
     895           0 :                 return m->graceful ? 0 : r;
     896             :         }
     897             : 
     898           0 :         return 0;
     899             : }
     900             : 
     901           0 : static int mount_arbitrary(const char *dest, CustomMount *m) {
     902           0 :         _cleanup_free_ char *where = NULL;
     903             :         int r;
     904             : 
     905           0 :         assert(dest);
     906           0 :         assert(m);
     907             : 
     908           0 :         r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
     909           0 :         if (r < 0)
     910           0 :                 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
     911           0 :         if (r == 0) { /* Doesn't exist yet? */
     912           0 :                 r = mkdir_p_label(where, 0755);
     913           0 :                 if (r < 0)
     914           0 :                         return log_error_errno(r, "Creating mount point for mount %s failed: %m", where);
     915             :         }
     916             : 
     917           0 :         return mount_verbose(LOG_ERR, m->source, where, m->type_argument, 0, m->options);
     918             : }
     919             : 
     920           0 : int mount_custom(
     921             :                 const char *dest,
     922             :                 CustomMount *mounts, size_t n,
     923             :                 bool userns, uid_t uid_shift, uid_t uid_range,
     924             :                 const char *selinux_apifs_context,
     925             :                 bool in_userns) {
     926             : 
     927             :         size_t i;
     928             :         int r;
     929             : 
     930           0 :         assert(dest);
     931             : 
     932           0 :         for (i = 0; i < n; i++) {
     933           0 :                 CustomMount *m = mounts + i;
     934             : 
     935           0 :                 if (m->in_userns != in_userns)
     936           0 :                         continue;
     937             : 
     938           0 :                 switch (m->type) {
     939             : 
     940           0 :                 case CUSTOM_MOUNT_BIND:
     941           0 :                         r = mount_bind(dest, m);
     942           0 :                         break;
     943             : 
     944           0 :                 case CUSTOM_MOUNT_TMPFS:
     945           0 :                         r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
     946           0 :                         break;
     947             : 
     948           0 :                 case CUSTOM_MOUNT_OVERLAY:
     949           0 :                         r = mount_overlay(dest, m);
     950           0 :                         break;
     951             : 
     952           0 :                 case CUSTOM_MOUNT_INACCESSIBLE:
     953           0 :                         r = mount_inaccessible(dest, m);
     954           0 :                         break;
     955             : 
     956           0 :                 case CUSTOM_MOUNT_ARBITRARY:
     957           0 :                         r = mount_arbitrary(dest, m);
     958           0 :                         break;
     959             : 
     960           0 :                 default:
     961           0 :                         assert_not_reached("Unknown custom mount type");
     962             :                 }
     963             : 
     964           0 :                 if (r < 0)
     965           0 :                         return r;
     966             :         }
     967             : 
     968           0 :         return 0;
     969             : }
     970             : 
     971           0 : static int setup_volatile_state(
     972             :                 const char *directory,
     973             :                 bool userns, uid_t uid_shift, uid_t uid_range,
     974             :                 const char *selinux_apifs_context) {
     975             : 
     976           0 :         _cleanup_free_ char *buf = NULL;
     977             :         const char *p, *options;
     978             :         int r;
     979             : 
     980           0 :         assert(directory);
     981             : 
     982             :         /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
     983             : 
     984           0 :         r = bind_remount_recursive(directory, MS_RDONLY, MS_RDONLY, NULL);
     985           0 :         if (r < 0)
     986           0 :                 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
     987             : 
     988           0 :         p = prefix_roota(directory, "/var");
     989           0 :         r = mkdir(p, 0755);
     990           0 :         if (r < 0 && errno != EEXIST)
     991           0 :                 return log_error_errno(errno, "Failed to create %s: %m", directory);
     992             : 
     993           0 :         options = "mode=755";
     994           0 :         r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
     995           0 :         if (r < 0)
     996           0 :                 return log_oom();
     997           0 :         if (r > 0)
     998           0 :                 options = buf;
     999             : 
    1000           0 :         return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
    1001             : }
    1002             : 
    1003           0 : static int setup_volatile_yes(
    1004             :                 const char *directory,
    1005             :                 bool userns, uid_t uid_shift, uid_t uid_range,
    1006             :                 const char *selinux_apifs_context) {
    1007             : 
    1008           0 :         bool tmpfs_mounted = false, bind_mounted = false;
    1009           0 :         char template[] = "/tmp/nspawn-volatile-XXXXXX";
    1010           0 :         _cleanup_free_ char *buf = NULL, *bindir = NULL;
    1011             :         const char *f, *t, *options;
    1012             :         struct stat st;
    1013             :         int r;
    1014             : 
    1015           0 :         assert(directory);
    1016             : 
    1017             :         /* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and
    1018             :          * that read-only. Before we start setting this up let's validate if the image has the /usr merge
    1019             :          * implemented, and let's output a friendly log message if it hasn't. */
    1020             : 
    1021           0 :         bindir = path_join(directory, "/bin");
    1022           0 :         if (!bindir)
    1023           0 :                 return log_oom();
    1024           0 :         if (lstat(bindir, &st) < 0) {
    1025           0 :                 if (errno != ENOENT)
    1026           0 :                         return log_error_errno(errno, "Failed to stat /bin directory below image: %m");
    1027             : 
    1028             :                 /* ENOENT is fine, just means the image is probably just a naked /usr and we can create the
    1029             :                  * rest. */
    1030           0 :         } else if (S_ISDIR(st.st_mode))
    1031           0 :                 return log_error_errno(SYNTHETIC_ERRNO(EISDIR),
    1032             :                                        "Sorry, --volatile=yes mode is not supported with OS images that have not merged /bin/, /sbin/, /lib/, /lib64/ into /usr/. "
    1033             :                                        "Please work with your distribution and help them adopt the merged /usr scheme.");
    1034           0 :         else if (!S_ISLNK(st.st_mode))
    1035           0 :                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
    1036             :                                        "Error starting image: if --volatile=yes is used /bin must be a symlink (for merged /usr support) or non-existent (in which case a symlink is created automatically).");
    1037             : 
    1038           0 :         if (!mkdtemp(template))
    1039           0 :                 return log_error_errno(errno, "Failed to create temporary directory: %m");
    1040             : 
    1041           0 :         options = "mode=755";
    1042           0 :         r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
    1043           0 :         if (r < 0)
    1044           0 :                 goto fail;
    1045           0 :         if (r > 0)
    1046           0 :                 options = buf;
    1047             : 
    1048           0 :         r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
    1049           0 :         if (r < 0)
    1050           0 :                 goto fail;
    1051             : 
    1052           0 :         tmpfs_mounted = true;
    1053             : 
    1054           0 :         f = prefix_roota(directory, "/usr");
    1055           0 :         t = prefix_roota(template, "/usr");
    1056             : 
    1057           0 :         r = mkdir(t, 0755);
    1058           0 :         if (r < 0 && errno != EEXIST) {
    1059           0 :                 r = log_error_errno(errno, "Failed to create %s: %m", t);
    1060           0 :                 goto fail;
    1061             :         }
    1062             : 
    1063           0 :         r = mount_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
    1064           0 :         if (r < 0)
    1065           0 :                 goto fail;
    1066             : 
    1067           0 :         bind_mounted = true;
    1068             : 
    1069           0 :         r = bind_remount_recursive(t, MS_RDONLY, MS_RDONLY, NULL);
    1070           0 :         if (r < 0) {
    1071           0 :                 log_error_errno(r, "Failed to remount %s read-only: %m", t);
    1072           0 :                 goto fail;
    1073             :         }
    1074             : 
    1075           0 :         r = mount_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
    1076           0 :         if (r < 0)
    1077           0 :                 goto fail;
    1078             : 
    1079           0 :         (void) rmdir(template);
    1080             : 
    1081           0 :         return 0;
    1082             : 
    1083           0 : fail:
    1084           0 :         if (bind_mounted)
    1085           0 :                 (void) umount_verbose(t);
    1086             : 
    1087           0 :         if (tmpfs_mounted)
    1088           0 :                 (void) umount_verbose(template);
    1089           0 :         (void) rmdir(template);
    1090           0 :         return r;
    1091             : }
    1092             : 
    1093           0 : static int setup_volatile_overlay(
    1094             :                 const char *directory,
    1095             :                 bool userns, uid_t uid_shift, uid_t uid_range,
    1096             :                 const char *selinux_apifs_context) {
    1097             : 
    1098           0 :         _cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL;
    1099           0 :         char template[] = "/tmp/nspawn-volatile-XXXXXX";
    1100             :         const char *upper, *work, *options;
    1101           0 :         bool tmpfs_mounted = false;
    1102             :         int r;
    1103             : 
    1104           0 :         assert(directory);
    1105             : 
    1106             :         /* --volatile=overlay means we mount an overlayfs to the root dir. */
    1107             : 
    1108           0 :         if (!mkdtemp(template))
    1109           0 :                 return log_error_errno(errno, "Failed to create temporary directory: %m");
    1110             : 
    1111           0 :         options = "mode=755";
    1112           0 :         r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
    1113           0 :         if (r < 0)
    1114           0 :                 goto finish;
    1115           0 :         if (r > 0)
    1116           0 :                 options = buf;
    1117             : 
    1118           0 :         r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
    1119           0 :         if (r < 0)
    1120           0 :                 goto finish;
    1121             : 
    1122           0 :         tmpfs_mounted = true;
    1123             : 
    1124           0 :         upper = strjoina(template, "/upper");
    1125           0 :         work = strjoina(template, "/work");
    1126             : 
    1127           0 :         if (mkdir(upper, 0755) < 0) {
    1128           0 :                 r = log_error_errno(errno, "Failed to create %s: %m", upper);
    1129           0 :                 goto finish;
    1130             :         }
    1131           0 :         if (mkdir(work, 0755) < 0) {
    1132           0 :                 r = log_error_errno(errno, "Failed to create %s: %m", work);
    1133           0 :                 goto finish;
    1134             :         }
    1135             : 
    1136             :         /* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice
    1137             :          * that the kernel allows us to do that without going through some mount point rearrangements. */
    1138             : 
    1139           0 :         escaped_directory = shell_escape(directory, ",:");
    1140           0 :         escaped_upper = shell_escape(upper, ",:");
    1141           0 :         escaped_work = shell_escape(work, ",:");
    1142           0 :         if (!escaped_directory || !escaped_upper || !escaped_work) {
    1143           0 :                 r = -ENOMEM;
    1144           0 :                 goto finish;
    1145             :         }
    1146             : 
    1147           0 :         options = strjoina("lowerdir=", escaped_directory, ",upperdir=", escaped_upper, ",workdir=", escaped_work);
    1148           0 :         r = mount_verbose(LOG_ERR, "overlay", directory, "overlay", 0, options);
    1149             : 
    1150           0 : finish:
    1151           0 :         if (tmpfs_mounted)
    1152           0 :                 (void) umount_verbose(template);
    1153             : 
    1154           0 :         (void) rmdir(template);
    1155           0 :         return r;
    1156             : }
    1157             : 
    1158           0 : int setup_volatile_mode(
    1159             :                 const char *directory,
    1160             :                 VolatileMode mode,
    1161             :                 bool userns, uid_t uid_shift, uid_t uid_range,
    1162             :                 const char *selinux_apifs_context) {
    1163             : 
    1164           0 :         switch (mode) {
    1165             : 
    1166           0 :         case VOLATILE_YES:
    1167           0 :                 return setup_volatile_yes(directory, userns, uid_shift, uid_range, selinux_apifs_context);
    1168             : 
    1169           0 :         case VOLATILE_STATE:
    1170           0 :                 return setup_volatile_state(directory, userns, uid_shift, uid_range, selinux_apifs_context);
    1171             : 
    1172           0 :         case VOLATILE_OVERLAY:
    1173           0 :                 return setup_volatile_overlay(directory, userns, uid_shift, uid_range, selinux_apifs_context);
    1174             : 
    1175           0 :         default:
    1176           0 :                 return 0;
    1177             :         }
    1178             : }
    1179             : 
    1180             : /* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
    1181           0 : int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
    1182           0 :         _cleanup_free_ char *root_new = NULL, *root_old = NULL;
    1183           0 :         const char *p = s;
    1184             :         int r;
    1185             : 
    1186           0 :         assert(pivot_root_new);
    1187           0 :         assert(pivot_root_old);
    1188             : 
    1189           0 :         r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
    1190           0 :         if (r < 0)
    1191           0 :                 return r;
    1192           0 :         if (r == 0)
    1193           0 :                 return -EINVAL;
    1194             : 
    1195           0 :         if (isempty(p))
    1196           0 :                 root_old = NULL;
    1197             :         else {
    1198           0 :                 root_old = strdup(p);
    1199           0 :                 if (!root_old)
    1200           0 :                         return -ENOMEM;
    1201             :         }
    1202             : 
    1203           0 :         if (!path_is_absolute(root_new))
    1204           0 :                 return -EINVAL;
    1205           0 :         if (root_old && !path_is_absolute(root_old))
    1206           0 :                 return -EINVAL;
    1207             : 
    1208           0 :         free_and_replace(*pivot_root_new, root_new);
    1209           0 :         free_and_replace(*pivot_root_old, root_old);
    1210             : 
    1211           0 :         return 0;
    1212             : }
    1213             : 
    1214           0 : int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
    1215           0 :         _cleanup_free_ char *directory_pivot_root_new = NULL;
    1216           0 :         _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
    1217           0 :         char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
    1218           0 :         bool remove_pivot_tmp = false;
    1219             :         int r;
    1220             : 
    1221           0 :         assert(directory);
    1222             : 
    1223           0 :         if (!pivot_root_new)
    1224           0 :                 return 0;
    1225             : 
    1226             :         /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
    1227             :          * If pivot_root_old is NULL, the existing / disappears.
    1228             :          * This requires a temporary directory, pivot_tmp, which is
    1229             :          * not a child of either.
    1230             :          *
    1231             :          * This is typically used for OSTree-style containers, where
    1232             :          * the root partition contains several sysroots which could be
    1233             :          * run. Normally, one would be chosen by the bootloader and
    1234             :          * pivoted to / by initramfs.
    1235             :          *
    1236             :          * For example, for an OSTree deployment, pivot_root_new
    1237             :          * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
    1238             :          * code doesn’t do the /var mount which OSTree expects: use
    1239             :          * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
    1240             :          *
    1241             :          * So in the OSTree case, we’ll end up with something like:
    1242             :          *  - directory = /tmp/nspawn-root-123456
    1243             :          *  - pivot_root_new = /ostree/deploy/os/deploy/123abc
    1244             :          *  - pivot_root_old = /sysroot
    1245             :          *  - directory_pivot_root_new =
    1246             :          *       /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
    1247             :          *  - pivot_tmp = /tmp/nspawn-pivot-123456
    1248             :          *  - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
    1249             :          *
    1250             :          * Requires all file systems at directory and below to be mounted
    1251             :          * MS_PRIVATE or MS_SLAVE so they can be moved.
    1252             :          */
    1253           0 :         directory_pivot_root_new = path_join(directory, pivot_root_new);
    1254           0 :         if (!directory_pivot_root_new)
    1255           0 :                 return log_oom();
    1256             : 
    1257             :         /* Remount directory_pivot_root_new to make it movable. */
    1258           0 :         r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
    1259           0 :         if (r < 0)
    1260           0 :                 goto done;
    1261             : 
    1262           0 :         if (pivot_root_old) {
    1263           0 :                 if (!mkdtemp(pivot_tmp)) {
    1264           0 :                         r = log_error_errno(errno, "Failed to create temporary directory: %m");
    1265           0 :                         goto done;
    1266             :                 }
    1267             : 
    1268           0 :                 remove_pivot_tmp = true;
    1269           0 :                 pivot_tmp_pivot_root_old = path_join(pivot_tmp, pivot_root_old);
    1270           0 :                 if (!pivot_tmp_pivot_root_old) {
    1271           0 :                         r = log_oom();
    1272           0 :                         goto done;
    1273             :                 }
    1274             : 
    1275           0 :                 r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
    1276           0 :                 if (r < 0)
    1277           0 :                         goto done;
    1278             : 
    1279           0 :                 r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
    1280           0 :                 if (r < 0)
    1281           0 :                         goto done;
    1282             : 
    1283           0 :                 r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
    1284           0 :                 if (r < 0)
    1285           0 :                         goto done;
    1286             :         } else {
    1287           0 :                 r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
    1288           0 :                 if (r < 0)
    1289           0 :                         goto done;
    1290             :         }
    1291             : 
    1292           0 : done:
    1293           0 :         if (remove_pivot_tmp)
    1294           0 :                 (void) rmdir(pivot_tmp);
    1295             : 
    1296           0 :         return r;
    1297             : }

Generated by: LCOV version 1.14