Line data Source code
1 : /* SPDX-License-Identifier: LGPL-2.1+ */
2 :
3 : #include <errno.h>
4 : #include <stdlib.h>
5 : #include <string.h>
6 : #include <sys/mount.h>
7 : #include <sys/stat.h>
8 : #include <sys/statvfs.h>
9 : #include <unistd.h>
10 :
11 : #include "alloc-util.h"
12 : #include "extract-word.h"
13 : #include "fd-util.h"
14 : #include "fileio.h"
15 : #include "fs-util.h"
16 : #include "hashmap.h"
17 : #include "libmount-util.h"
18 : #include "mount-util.h"
19 : #include "mountpoint-util.h"
20 : #include "parse-util.h"
21 : #include "path-util.h"
22 : #include "set.h"
23 : #include "stdio-util.h"
24 : #include "string-util.h"
25 : #include "strv.h"
26 :
27 0 : int umount_recursive(const char *prefix, int flags) {
28 0 : int n = 0, r;
29 : bool again;
30 :
31 : /* Try to umount everything recursively below a
32 : * directory. Also, take care of stacked mounts, and keep
33 : * unmounting them until they are gone. */
34 :
35 : do {
36 0 : _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
37 0 : _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
38 :
39 0 : again = false;
40 :
41 0 : r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
42 0 : if (r < 0)
43 0 : return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
44 :
45 0 : for (;;) {
46 : struct libmnt_fs *fs;
47 : const char *path;
48 :
49 0 : r = mnt_table_next_fs(table, iter, &fs);
50 0 : if (r == 1)
51 0 : break;
52 0 : if (r < 0)
53 0 : return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
54 :
55 0 : path = mnt_fs_get_target(fs);
56 0 : if (!path)
57 0 : continue;
58 :
59 0 : if (!path_startswith(path, prefix))
60 0 : continue;
61 :
62 0 : if (umount2(path, flags) < 0) {
63 0 : r = log_debug_errno(errno, "Failed to umount %s: %m", path);
64 0 : continue;
65 : }
66 :
67 0 : log_debug("Successfully unmounted %s", path);
68 :
69 0 : again = true;
70 0 : n++;
71 :
72 0 : break;
73 : }
74 :
75 0 : } while (again);
76 :
77 0 : return n;
78 : }
79 :
80 : /* Get the mount flags for the mountpoint at "path" from "table" */
81 0 : static int get_mount_flags(const char *path, unsigned long *flags, struct libmnt_table *table) {
82 0 : struct statvfs buf = {};
83 0 : struct libmnt_fs *fs = NULL;
84 0 : const char *opts = NULL;
85 0 : int r = 0;
86 :
87 0 : fs = mnt_table_find_target(table, path, MNT_ITER_FORWARD);
88 0 : if (fs == NULL) {
89 0 : log_warning("Could not find '%s' in mount table", path);
90 0 : goto fallback;
91 : }
92 :
93 0 : opts = mnt_fs_get_vfs_options(fs);
94 0 : r = mnt_optstr_get_flags(opts, flags, mnt_get_builtin_optmap(MNT_LINUX_MAP));
95 0 : if (r != 0) {
96 0 : log_warning_errno(r, "Could not get flags for '%s': %m", path);
97 0 : goto fallback;
98 : }
99 :
100 : /* relatime is default and trying to set it in an unprivileged container causes EPERM */
101 0 : *flags &= ~MS_RELATIME;
102 0 : return 0;
103 :
104 0 : fallback:
105 0 : if (statvfs(path, &buf) < 0)
106 0 : return -errno;
107 :
108 0 : *flags = buf.f_flag;
109 0 : return 0;
110 : }
111 :
112 : /* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
113 : * for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
114 0 : int bind_remount_recursive_with_mountinfo(
115 : const char *prefix,
116 : unsigned long new_flags,
117 : unsigned long flags_mask,
118 : char **blacklist,
119 : FILE *proc_self_mountinfo) {
120 :
121 0 : _cleanup_set_free_free_ Set *done = NULL;
122 0 : _cleanup_free_ char *cleaned = NULL;
123 : int r;
124 :
125 0 : assert(proc_self_mountinfo);
126 :
127 : /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
128 : * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
129 : * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
130 : * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
131 : * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
132 : * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
133 : * future submounts that have been triggered via autofs.
134 : *
135 : * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
136 : * remount operation. Note that we'll ignore the blacklist for the top-level path. */
137 :
138 0 : cleaned = strdup(prefix);
139 0 : if (!cleaned)
140 0 : return -ENOMEM;
141 :
142 0 : path_simplify(cleaned, false);
143 :
144 0 : done = set_new(&path_hash_ops);
145 0 : if (!done)
146 0 : return -ENOMEM;
147 :
148 0 : for (;;) {
149 0 : _cleanup_set_free_free_ Set *todo = NULL;
150 0 : _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
151 0 : _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
152 0 : bool top_autofs = false;
153 : char *x;
154 : unsigned long orig_flags;
155 :
156 0 : todo = set_new(&path_hash_ops);
157 0 : if (!todo)
158 0 : return -ENOMEM;
159 :
160 0 : rewind(proc_self_mountinfo);
161 :
162 0 : r = libmount_parse("/proc/self/mountinfo", proc_self_mountinfo, &table, &iter);
163 0 : if (r < 0)
164 0 : return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
165 :
166 0 : for (;;) {
167 : struct libmnt_fs *fs;
168 : const char *path, *type;
169 :
170 0 : r = mnt_table_next_fs(table, iter, &fs);
171 0 : if (r == 1)
172 0 : break;
173 0 : if (r < 0)
174 0 : return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
175 :
176 0 : path = mnt_fs_get_target(fs);
177 0 : type = mnt_fs_get_fstype(fs);
178 0 : if (!path || !type)
179 0 : continue;
180 :
181 0 : if (!path_startswith(path, cleaned))
182 0 : continue;
183 :
184 : /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount
185 : * we shall operate on. */
186 0 : if (!path_equal(path, cleaned)) {
187 0 : bool blacklisted = false;
188 : char **i;
189 :
190 0 : STRV_FOREACH(i, blacklist) {
191 0 : if (path_equal(*i, cleaned))
192 0 : continue;
193 :
194 0 : if (!path_startswith(*i, cleaned))
195 0 : continue;
196 :
197 0 : if (path_startswith(path, *i)) {
198 0 : blacklisted = true;
199 0 : log_debug("Not remounting %s blacklisted by %s, called for %s",
200 : path, *i, cleaned);
201 0 : break;
202 : }
203 : }
204 0 : if (blacklisted)
205 0 : continue;
206 : }
207 :
208 : /* Let's ignore autofs mounts. If they aren't
209 : * triggered yet, we want to avoid triggering
210 : * them, as we don't make any guarantees for
211 : * future submounts anyway. If they are
212 : * already triggered, then we will find
213 : * another entry for this. */
214 0 : if (streq(type, "autofs")) {
215 0 : top_autofs = top_autofs || path_equal(path, cleaned);
216 0 : continue;
217 : }
218 :
219 0 : if (!set_contains(done, path)) {
220 0 : r = set_put_strdup(todo, path);
221 0 : if (r < 0)
222 0 : return r;
223 : }
224 : }
225 :
226 : /* If we have no submounts to process anymore and if
227 : * the root is either already done, or an autofs, we
228 : * are done */
229 0 : if (set_isempty(todo) &&
230 0 : (top_autofs || set_contains(done, cleaned)))
231 0 : return 0;
232 :
233 0 : if (!set_contains(done, cleaned) &&
234 0 : !set_contains(todo, cleaned)) {
235 : /* The prefix directory itself is not yet a mount, make it one. */
236 0 : if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
237 0 : return -errno;
238 :
239 0 : orig_flags = 0;
240 0 : (void) get_mount_flags(cleaned, &orig_flags, table);
241 0 : orig_flags &= ~MS_RDONLY;
242 :
243 0 : if (mount(NULL, cleaned, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
244 0 : return -errno;
245 :
246 0 : log_debug("Made top-level directory %s a mount point.", prefix);
247 :
248 0 : r = set_put_strdup(done, cleaned);
249 0 : if (r < 0)
250 0 : return r;
251 : }
252 :
253 0 : while ((x = set_steal_first(todo))) {
254 :
255 0 : r = set_consume(done, x);
256 0 : if (IN_SET(r, 0, -EEXIST))
257 0 : continue;
258 0 : if (r < 0)
259 0 : return r;
260 :
261 : /* Deal with mount points that are obstructed by a later mount */
262 0 : r = path_is_mount_point(x, NULL, 0);
263 0 : if (IN_SET(r, 0, -ENOENT))
264 0 : continue;
265 0 : if (IN_SET(r, -EACCES, -EPERM)) {
266 : /* Even if root user invoke this, submounts under private FUSE or NFS mount points
267 : * may not be acceessed. E.g.,
268 : *
269 : * $ bindfs --no-allow-other ~/mnt/mnt ~/mnt/mnt
270 : * $ bindfs --no-allow-other ~/mnt ~/mnt
271 : *
272 : * Then, root user cannot access the mount point ~/mnt/mnt.
273 : * In such cases, the submounts are ignored, as we have no way to manage them. */
274 0 : log_debug_errno(r, "Failed to determine '%s' is mount point or not, ignoring: %m", x);
275 0 : continue;
276 : }
277 0 : if (r < 0)
278 0 : return r;
279 :
280 : /* Try to reuse the original flag set */
281 0 : orig_flags = 0;
282 0 : (void) get_mount_flags(x, &orig_flags, table);
283 0 : orig_flags &= ~MS_RDONLY;
284 :
285 0 : if (mount(NULL, x, NULL, (orig_flags & ~flags_mask)|MS_BIND|MS_REMOUNT|new_flags, NULL) < 0)
286 0 : return -errno;
287 :
288 0 : log_debug("Remounted %s read-only.", x);
289 : }
290 : }
291 : }
292 :
293 0 : int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **blacklist) {
294 0 : _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
295 : int r;
296 :
297 0 : r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
298 0 : if (r < 0)
299 0 : return r;
300 :
301 0 : return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, blacklist, proc_self_mountinfo);
302 : }
303 :
304 0 : int mount_move_root(const char *path) {
305 0 : assert(path);
306 :
307 0 : if (chdir(path) < 0)
308 0 : return -errno;
309 :
310 0 : if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
311 0 : return -errno;
312 :
313 0 : if (chroot(".") < 0)
314 0 : return -errno;
315 :
316 0 : if (chdir("/") < 0)
317 0 : return -errno;
318 :
319 0 : return 0;
320 : }
321 :
322 0 : int repeat_unmount(const char *path, int flags) {
323 0 : bool done = false;
324 :
325 0 : assert(path);
326 :
327 : /* If there are multiple mounts on a mount point, this
328 : * removes them all */
329 :
330 : for (;;) {
331 0 : if (umount2(path, flags) < 0) {
332 :
333 0 : if (errno == EINVAL)
334 0 : return done;
335 :
336 0 : return -errno;
337 : }
338 :
339 0 : done = true;
340 : }
341 : }
342 :
343 0 : const char* mode_to_inaccessible_node(mode_t mode) {
344 : /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
345 : * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
346 : * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
347 : * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
348 : * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
349 : * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
350 :
351 0 : switch(mode & S_IFMT) {
352 0 : case S_IFREG:
353 0 : return "/run/systemd/inaccessible/reg";
354 :
355 0 : case S_IFDIR:
356 0 : return "/run/systemd/inaccessible/dir";
357 :
358 0 : case S_IFCHR:
359 0 : if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
360 0 : return "/run/systemd/inaccessible/chr";
361 0 : return "/run/systemd/inaccessible/sock";
362 :
363 0 : case S_IFBLK:
364 0 : if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
365 0 : return "/run/systemd/inaccessible/blk";
366 0 : return "/run/systemd/inaccessible/sock";
367 :
368 0 : case S_IFIFO:
369 0 : return "/run/systemd/inaccessible/fifo";
370 :
371 0 : case S_IFSOCK:
372 0 : return "/run/systemd/inaccessible/sock";
373 : }
374 0 : return NULL;
375 : }
376 :
377 : #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
378 0 : static char* mount_flags_to_string(long unsigned flags) {
379 : char *x;
380 0 : _cleanup_free_ char *y = NULL;
381 : long unsigned overflow;
382 :
383 0 : overflow = flags & ~(MS_RDONLY |
384 : MS_NOSUID |
385 : MS_NODEV |
386 : MS_NOEXEC |
387 : MS_SYNCHRONOUS |
388 : MS_REMOUNT |
389 : MS_MANDLOCK |
390 : MS_DIRSYNC |
391 : MS_NOATIME |
392 : MS_NODIRATIME |
393 : MS_BIND |
394 : MS_MOVE |
395 : MS_REC |
396 : MS_SILENT |
397 : MS_POSIXACL |
398 : MS_UNBINDABLE |
399 : MS_PRIVATE |
400 : MS_SLAVE |
401 : MS_SHARED |
402 : MS_RELATIME |
403 : MS_KERNMOUNT |
404 : MS_I_VERSION |
405 : MS_STRICTATIME |
406 : MS_LAZYTIME);
407 :
408 0 : if (flags == 0 || overflow != 0)
409 0 : if (asprintf(&y, "%lx", overflow) < 0)
410 0 : return NULL;
411 :
412 0 : x = strjoin(FLAG(MS_RDONLY),
413 : FLAG(MS_NOSUID),
414 : FLAG(MS_NODEV),
415 : FLAG(MS_NOEXEC),
416 : FLAG(MS_SYNCHRONOUS),
417 : FLAG(MS_REMOUNT),
418 : FLAG(MS_MANDLOCK),
419 : FLAG(MS_DIRSYNC),
420 : FLAG(MS_NOATIME),
421 : FLAG(MS_NODIRATIME),
422 : FLAG(MS_BIND),
423 : FLAG(MS_MOVE),
424 : FLAG(MS_REC),
425 : FLAG(MS_SILENT),
426 : FLAG(MS_POSIXACL),
427 : FLAG(MS_UNBINDABLE),
428 : FLAG(MS_PRIVATE),
429 : FLAG(MS_SLAVE),
430 : FLAG(MS_SHARED),
431 : FLAG(MS_RELATIME),
432 : FLAG(MS_KERNMOUNT),
433 : FLAG(MS_I_VERSION),
434 : FLAG(MS_STRICTATIME),
435 : FLAG(MS_LAZYTIME),
436 : y);
437 0 : if (!x)
438 0 : return NULL;
439 0 : if (!y)
440 0 : x[strlen(x) - 1] = '\0'; /* truncate the last | */
441 0 : return x;
442 : }
443 :
444 0 : int mount_verbose(
445 : int error_log_level,
446 : const char *what,
447 : const char *where,
448 : const char *type,
449 : unsigned long flags,
450 : const char *options) {
451 :
452 0 : _cleanup_free_ char *fl = NULL, *o = NULL;
453 : unsigned long f;
454 : int r;
455 :
456 0 : r = mount_option_mangle(options, flags, &f, &o);
457 0 : if (r < 0)
458 0 : return log_full_errno(error_log_level, r,
459 : "Failed to mangle mount options %s: %m",
460 : strempty(options));
461 :
462 0 : fl = mount_flags_to_string(f);
463 :
464 0 : if ((f & MS_REMOUNT) && !what && !type)
465 0 : log_debug("Remounting %s (%s \"%s\")...",
466 : where, strnull(fl), strempty(o));
467 0 : else if (!what && !type)
468 0 : log_debug("Mounting %s (%s \"%s\")...",
469 : where, strnull(fl), strempty(o));
470 0 : else if ((f & MS_BIND) && !type)
471 0 : log_debug("Bind-mounting %s on %s (%s \"%s\")...",
472 : what, where, strnull(fl), strempty(o));
473 0 : else if (f & MS_MOVE)
474 0 : log_debug("Moving mount %s → %s (%s \"%s\")...",
475 : what, where, strnull(fl), strempty(o));
476 : else
477 0 : log_debug("Mounting %s on %s (%s \"%s\")...",
478 : strna(type), where, strnull(fl), strempty(o));
479 0 : if (mount(what, where, type, f, o) < 0)
480 0 : return log_full_errno(error_log_level, errno,
481 : "Failed to mount %s (type %s) on %s (%s \"%s\"): %m",
482 : strna(what), strna(type), where, strnull(fl), strempty(o));
483 0 : return 0;
484 : }
485 :
486 0 : int umount_verbose(const char *what) {
487 0 : log_debug("Umounting %s...", what);
488 0 : if (umount(what) < 0)
489 0 : return log_error_errno(errno, "Failed to unmount %s: %m", what);
490 0 : return 0;
491 : }
492 :
493 27 : int mount_option_mangle(
494 : const char *options,
495 : unsigned long mount_flags,
496 : unsigned long *ret_mount_flags,
497 : char **ret_remaining_options) {
498 :
499 : const struct libmnt_optmap *map;
500 27 : _cleanup_free_ char *ret = NULL;
501 : const char *p;
502 : int r;
503 :
504 : /* This extracts mount flags from the mount options, and store
505 : * non-mount-flag options to '*ret_remaining_options'.
506 : * E.g.,
507 : * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
508 : * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
509 : * "size=1630748k,mode=700,uid=1000,gid=1000".
510 : * See more examples in test-mount-utils.c.
511 : *
512 : * Note that if 'options' does not contain any non-mount-flag options,
513 : * then '*ret_remaining_options' is set to NULL instead of empty string.
514 : * Note that this does not check validity of options stored in
515 : * '*ret_remaining_options'.
516 : * Note that if 'options' is NULL, then this just copies 'mount_flags'
517 : * to '*ret_mount_flags'. */
518 :
519 27 : assert(ret_mount_flags);
520 27 : assert(ret_remaining_options);
521 :
522 27 : map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
523 27 : if (!map)
524 0 : return -EINVAL;
525 :
526 27 : p = options;
527 137 : for (;;) {
528 164 : _cleanup_free_ char *word = NULL;
529 : const struct libmnt_optmap *ent;
530 :
531 164 : r = extract_first_word(&p, &word, ",", EXTRACT_UNQUOTE);
532 164 : if (r < 0)
533 1 : return r;
534 163 : if (r == 0)
535 26 : break;
536 :
537 2797 : for (ent = map; ent->name; ent++) {
538 : /* All entries in MNT_LINUX_MAP do not take any argument.
539 : * Thus, ent->name does not contain "=" or "[=]". */
540 2750 : if (!streq(word, ent->name))
541 2660 : continue;
542 :
543 90 : if (!(ent->mask & MNT_INVERT))
544 50 : mount_flags |= ent->id;
545 40 : else if (mount_flags & ent->id)
546 7 : mount_flags ^= ent->id;
547 :
548 90 : break;
549 : }
550 :
551 : /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
552 137 : if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
553 0 : return -ENOMEM;
554 : }
555 :
556 26 : *ret_mount_flags = mount_flags;
557 26 : *ret_remaining_options = TAKE_PTR(ret);
558 :
559 26 : return 0;
560 : }
|