File: | build-scan/../src/core/namespace.c |
Warning: | line 639, column 9 Value stored to 'u' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
2 | |
3 | #include <errno(*__errno_location ()).h> |
4 | #include <sched.h> |
5 | #include <stdio.h> |
6 | #include <string.h> |
7 | #include <sys/mount.h> |
8 | #include <sys/stat.h> |
9 | #include <unistd.h> |
10 | #include <linux1/fs.h> |
11 | |
12 | #include "alloc-util.h" |
13 | #include "base-filesystem.h" |
14 | #include "dev-setup.h" |
15 | #include "fd-util.h" |
16 | #include "fs-util.h" |
17 | #include "label.h" |
18 | #include "loop-util.h" |
19 | #include "loopback-setup.h" |
20 | #include "missing.h" |
21 | #include "mkdir.h" |
22 | #include "mount-util.h" |
23 | #include "namespace.h" |
24 | #include "path-util.h" |
25 | #include "selinux-util.h" |
26 | #include "socket-util.h" |
27 | #include "stat-util.h" |
28 | #include "string-table.h" |
29 | #include "string-util.h" |
30 | #include "strv.h" |
31 | #include "umask-util.h" |
32 | #include "user-util.h" |
33 | #include "util.h" |
34 | |
35 | #define DEV_MOUNT_OPTIONS(2|(1<<24)|8) (MS_NOSUID2|MS_STRICTATIME(1<<24)|MS_NOEXEC8) |
36 | |
37 | typedef enum MountMode { |
38 | /* This is ordered by priority! */ |
39 | INACCESSIBLE, |
40 | BIND_MOUNT, |
41 | BIND_MOUNT_RECURSIVE, |
42 | PRIVATE_TMP, |
43 | PRIVATE_DEV, |
44 | BIND_DEV, |
45 | EMPTY_DIR, |
46 | SYSFS, |
47 | PROCFS, |
48 | READONLY, |
49 | READWRITE, |
50 | TMPFS, |
51 | } MountMode; |
52 | |
53 | typedef struct MountEntry { |
54 | const char *path_const; /* Memory allocated on stack or static */ |
55 | MountMode mode:5; |
56 | bool_Bool ignore:1; /* Ignore if path does not exist? */ |
57 | bool_Bool has_prefix:1; /* Already is prefixed by the root dir? */ |
58 | bool_Bool read_only:1; /* Shall this mount point be read-only? */ |
59 | bool_Bool applied:1; /* Already applied */ |
60 | char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */ |
61 | const char *source_const; /* The source path, for bind mounts */ |
62 | char *source_malloc; |
63 | const char *options_const;/* Mount options for tmpfs */ |
64 | char *options_malloc; |
65 | unsigned long flags; /* Mount flags used by EMPTY_DIR and TMPFS. Do not include MS_RDONLY here, but please use read_only. */ |
66 | unsigned n_followed; |
67 | } MountEntry; |
68 | |
69 | /* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted |
70 | * something there already. These mounts are hence overridden by any other explicitly configured mounts. */ |
71 | static const MountEntry apivfs_table[] = { |
72 | { "/proc", PROCFS, false0 }, |
73 | { "/dev", BIND_DEV, false0 }, |
74 | { "/sys", SYSFS, false0 }, |
75 | }; |
76 | |
77 | /* ProtectKernelTunables= option and the related filesystem APIs */ |
78 | static const MountEntry protect_kernel_tunables_table[] = { |
79 | { "/proc/acpi", READONLY, true1 }, |
80 | { "/proc/apm", READONLY, true1 }, /* Obsolete API, there's no point in permitting access to this, ever */ |
81 | { "/proc/asound", READONLY, true1 }, |
82 | { "/proc/bus", READONLY, true1 }, |
83 | { "/proc/fs", READONLY, true1 }, |
84 | { "/proc/irq", READONLY, true1 }, |
85 | { "/proc/kallsyms", INACCESSIBLE, true1 }, |
86 | { "/proc/kcore", INACCESSIBLE, true1 }, |
87 | { "/proc/latency_stats", READONLY, true1 }, |
88 | { "/proc/mtrr", READONLY, true1 }, |
89 | { "/proc/scsi", READONLY, true1 }, |
90 | { "/proc/sys", READONLY, false0 }, |
91 | { "/proc/sysrq-trigger", READONLY, true1 }, |
92 | { "/proc/timer_stats", READONLY, true1 }, |
93 | { "/sys", READONLY, false0 }, |
94 | { "/sys/fs/bpf", READONLY, true1 }, |
95 | { "/sys/fs/cgroup", READWRITE, false0 }, /* READONLY is set by ProtectControlGroups= option */ |
96 | { "/sys/fs/selinux", READWRITE, true1 }, |
97 | { "/sys/kernel/debug", READONLY, true1 }, |
98 | { "/sys/kernel/tracing", READONLY, true1 }, |
99 | }; |
100 | |
101 | /* ProtectKernelModules= option */ |
102 | static const MountEntry protect_kernel_modules_table[] = { |
103 | #if HAVE_SPLIT_USR0 |
104 | { "/lib/modules", INACCESSIBLE, true1 }, |
105 | #endif |
106 | { "/usr/lib/modules", INACCESSIBLE, true1 }, |
107 | }; |
108 | |
109 | /* |
110 | * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of |
111 | * system should be protected by ProtectSystem= |
112 | */ |
113 | static const MountEntry protect_home_read_only_table[] = { |
114 | { "/home", READONLY, true1 }, |
115 | { "/run/user", READONLY, true1 }, |
116 | { "/root", READONLY, true1 }, |
117 | }; |
118 | |
119 | /* ProtectHome=tmpfs table */ |
120 | static const MountEntry protect_home_tmpfs_table[] = { |
121 | { "/home", TMPFS, true1, .read_only = true1, .options_const = "mode=0755", .flags = MS_NODEV4|MS_STRICTATIME(1<<24) }, |
122 | { "/run/user", TMPFS, true1, .read_only = true1, .options_const = "mode=0755", .flags = MS_NODEV4|MS_STRICTATIME(1<<24) }, |
123 | { "/root", TMPFS, true1, .read_only = true1, .options_const = "mode=0700", .flags = MS_NODEV4|MS_STRICTATIME(1<<24) }, |
124 | }; |
125 | |
126 | /* ProtectHome=yes table */ |
127 | static const MountEntry protect_home_yes_table[] = { |
128 | { "/home", INACCESSIBLE, true1 }, |
129 | { "/run/user", INACCESSIBLE, true1 }, |
130 | { "/root", INACCESSIBLE, true1 }, |
131 | }; |
132 | |
133 | /* ProtectSystem=yes table */ |
134 | static const MountEntry protect_system_yes_table[] = { |
135 | { "/usr", READONLY, false0 }, |
136 | { "/boot", READONLY, true1 }, |
137 | { "/efi", READONLY, true1 }, |
138 | #if HAVE_SPLIT_USR0 |
139 | { "/lib", READONLY, true1 }, |
140 | { "/lib64", READONLY, true1 }, |
141 | { "/bin", READONLY, true1 }, |
142 | # if HAVE_SPLIT_BIN1 |
143 | { "/sbin", READONLY, true1 }, |
144 | # endif |
145 | #endif |
146 | }; |
147 | |
148 | /* ProtectSystem=full includes ProtectSystem=yes */ |
149 | static const MountEntry protect_system_full_table[] = { |
150 | { "/usr", READONLY, false0 }, |
151 | { "/boot", READONLY, true1 }, |
152 | { "/efi", READONLY, true1 }, |
153 | { "/etc", READONLY, false0 }, |
154 | #if HAVE_SPLIT_USR0 |
155 | { "/lib", READONLY, true1 }, |
156 | { "/lib64", READONLY, true1 }, |
157 | { "/bin", READONLY, true1 }, |
158 | # if HAVE_SPLIT_BIN1 |
159 | { "/sbin", READONLY, true1 }, |
160 | # endif |
161 | #endif |
162 | }; |
163 | |
164 | /* |
165 | * ProtectSystem=strict table. In this strict mode, we mount everything |
166 | * read-only, except for /proc, /dev, /sys which are the kernel API VFS, |
167 | * which are left writable, but PrivateDevices= + ProtectKernelTunables= |
168 | * protect those, and these options should be fully orthogonal. |
169 | * (And of course /home and friends are also left writable, as ProtectHome= |
170 | * shall manage those, orthogonally). |
171 | */ |
172 | static const MountEntry protect_system_strict_table[] = { |
173 | { "/", READONLY, false0 }, |
174 | { "/proc", READWRITE, false0 }, /* ProtectKernelTunables= */ |
175 | { "/sys", READWRITE, false0 }, /* ProtectKernelTunables= */ |
176 | { "/dev", READWRITE, false0 }, /* PrivateDevices= */ |
177 | { "/home", READWRITE, true1 }, /* ProtectHome= */ |
178 | { "/run/user", READWRITE, true1 }, /* ProtectHome= */ |
179 | { "/root", READWRITE, true1 }, /* ProtectHome= */ |
180 | }; |
181 | |
182 | static const char *mount_entry_path(const MountEntry *p) { |
183 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 183, __PRETTY_FUNCTION__ ); } while (0); |
184 | |
185 | /* Returns the path of this bind mount. If the malloc()-allocated ->path_buffer field is set we return that, |
186 | * otherwise the stack/static ->path field is returned. */ |
187 | |
188 | return p->path_malloc ?: p->path_const; |
189 | } |
190 | |
191 | static bool_Bool mount_entry_read_only(const MountEntry *p) { |
192 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 192, __PRETTY_FUNCTION__ ); } while (0); |
193 | |
194 | return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE)({ _Bool _found = 0; static __attribute__ ((unused)) char _static_assert__macros_need_to_be_extended [20 - sizeof((int[]){READONLY, INACCESSIBLE})/sizeof(int)]; switch (p->mode) { case READONLY: case INACCESSIBLE: _found = 1; break ; default: break; } _found; }); |
195 | } |
196 | |
197 | static const char *mount_entry_source(const MountEntry *p) { |
198 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 198, __PRETTY_FUNCTION__ ); } while (0); |
199 | |
200 | return p->source_malloc ?: p->source_const; |
201 | } |
202 | |
203 | static const char *mount_entry_options(const MountEntry *p) { |
204 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 204, __PRETTY_FUNCTION__ ); } while (0); |
205 | |
206 | return p->options_malloc ?: p->options_const; |
207 | } |
208 | |
209 | static void mount_entry_done(MountEntry *p) { |
210 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 210, __PRETTY_FUNCTION__ ); } while (0); |
211 | |
212 | p->path_malloc = mfree(p->path_malloc); |
213 | p->source_malloc = mfree(p->source_malloc); |
214 | p->options_malloc = mfree(p->options_malloc); |
215 | } |
216 | |
217 | static int append_access_mounts(MountEntry **p, char **strv, MountMode mode, bool_Bool forcibly_require_prefix) { |
218 | char **i; |
219 | |
220 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 220, __PRETTY_FUNCTION__ ); } while (0); |
221 | |
222 | /* Adds a list of user-supplied READWRITE/READONLY/INACCESSIBLE entries */ |
223 | |
224 | STRV_FOREACH(i, strv)for ((i) = (strv); (i) && *(i); (i)++) { |
225 | bool_Bool ignore = false0, needs_prefix = false0; |
226 | const char *e = *i; |
227 | |
228 | /* Look for any prefixes */ |
229 | if (startswith(e, "-")) { |
230 | e++; |
231 | ignore = true1; |
232 | } |
233 | if (startswith(e, "+")) { |
234 | e++; |
235 | needs_prefix = true1; |
236 | } |
237 | |
238 | if (!path_is_absolute(e)) |
239 | return -EINVAL22; |
240 | |
241 | *((*p)++) = (MountEntry) { |
242 | .path_const = e, |
243 | .mode = mode, |
244 | .ignore = ignore, |
245 | .has_prefix = !needs_prefix && !forcibly_require_prefix, |
246 | }; |
247 | } |
248 | |
249 | return 0; |
250 | } |
251 | |
252 | static int append_empty_dir_mounts(MountEntry **p, char **strv) { |
253 | char **i; |
254 | |
255 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 255, __PRETTY_FUNCTION__ ); } while (0); |
256 | |
257 | /* Adds tmpfs mounts to provide readable but empty directories. This is primarily used to implement the |
258 | * "/private/" boundary directories for DynamicUser=1. */ |
259 | |
260 | STRV_FOREACH(i, strv)for ((i) = (strv); (i) && *(i); (i)++) { |
261 | |
262 | *((*p)++) = (MountEntry) { |
263 | .path_const = *i, |
264 | .mode = EMPTY_DIR, |
265 | .ignore = false0, |
266 | .has_prefix = false0, |
267 | .read_only = true1, |
268 | .options_const = "mode=755", |
269 | .flags = MS_NOSUID2|MS_NOEXEC8|MS_NODEV4|MS_STRICTATIME(1<<24), |
270 | }; |
271 | } |
272 | |
273 | return 0; |
274 | } |
275 | |
276 | static int append_bind_mounts(MountEntry **p, const BindMount *binds, size_t n) { |
277 | size_t i; |
278 | |
279 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 279, __PRETTY_FUNCTION__ ); } while (0); |
280 | |
281 | for (i = 0; i < n; i++) { |
282 | const BindMount *b = binds + i; |
283 | |
284 | *((*p)++) = (MountEntry) { |
285 | .path_const = b->destination, |
286 | .mode = b->recursive ? BIND_MOUNT_RECURSIVE : BIND_MOUNT, |
287 | .read_only = b->read_only, |
288 | .source_const = b->source, |
289 | .ignore = b->ignore_enoent, |
290 | }; |
291 | } |
292 | |
293 | return 0; |
294 | } |
295 | |
296 | static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, size_t n) { |
297 | size_t i; |
298 | int r; |
299 | |
300 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 300, __PRETTY_FUNCTION__ ); } while (0); |
301 | |
302 | for (i = 0; i < n; i++) { |
303 | const TemporaryFileSystem *t = tmpfs + i; |
304 | _cleanup_free___attribute__((cleanup(freep))) char *o = NULL((void*)0), *str = NULL((void*)0); |
305 | unsigned long flags = MS_NODEV4|MS_STRICTATIME(1<<24); |
306 | bool_Bool ro = false0; |
307 | |
308 | if (!path_is_absolute(t->path)) |
309 | return -EINVAL22; |
310 | |
311 | if (!isempty(t->options)) { |
312 | str = strjoin("mode=0755,", t->options)strjoin_real(("mode=0755,"), t->options, ((void*)0)); |
313 | if (!str) |
314 | return -ENOMEM12; |
315 | |
316 | r = mount_option_mangle(str, MS_NODEV4|MS_STRICTATIME(1<<24), &flags, &o); |
317 | if (r < 0) |
318 | return r; |
319 | |
320 | ro = flags & MS_RDONLY1; |
321 | if (ro) |
322 | flags ^= MS_RDONLY1; |
323 | } |
324 | |
325 | *((*p)++) = (MountEntry) { |
326 | .path_const = t->path, |
327 | .mode = TMPFS, |
328 | .read_only = ro, |
329 | .options_malloc = o, |
330 | .flags = flags, |
331 | }; |
332 | |
333 | o = NULL((void*)0); |
334 | } |
335 | |
336 | return 0; |
337 | } |
338 | |
339 | static int append_static_mounts(MountEntry **p, const MountEntry *mounts, size_t n, bool_Bool ignore_protect) { |
340 | size_t i; |
341 | |
342 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 342, __PRETTY_FUNCTION__ ); } while (0); |
343 | assert(mounts)do { if ((__builtin_expect(!!(!(mounts)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("mounts"), "../src/core/namespace.c", 343 , __PRETTY_FUNCTION__); } while (0); |
344 | |
345 | /* Adds a list of static pre-defined entries */ |
346 | |
347 | for (i = 0; i < n; i++) |
348 | *((*p)++) = (MountEntry) { |
349 | .path_const = mount_entry_path(mounts+i), |
350 | .mode = mounts[i].mode, |
351 | .ignore = mounts[i].ignore || ignore_protect, |
352 | }; |
353 | |
354 | return 0; |
355 | } |
356 | |
357 | static int append_protect_home(MountEntry **p, ProtectHome protect_home, bool_Bool ignore_protect) { |
358 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 358, __PRETTY_FUNCTION__ ); } while (0); |
359 | |
360 | switch (protect_home) { |
361 | |
362 | case PROTECT_HOME_NO: |
363 | return 0; |
364 | |
365 | case PROTECT_HOME_READ_ONLY: |
366 | return append_static_mounts(p, protect_home_read_only_table, ELEMENTSOF(protect_home_read_only_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_home_read_only_table), typeof(&*(protect_home_read_only_table ))), sizeof(protect_home_read_only_table)/sizeof((protect_home_read_only_table )[0]), ((void)0))), ignore_protect); |
367 | |
368 | case PROTECT_HOME_TMPFS: |
369 | return append_static_mounts(p, protect_home_tmpfs_table, ELEMENTSOF(protect_home_tmpfs_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_home_tmpfs_table), typeof(&*(protect_home_tmpfs_table ))), sizeof(protect_home_tmpfs_table)/sizeof((protect_home_tmpfs_table )[0]), ((void)0))), ignore_protect); |
370 | |
371 | case PROTECT_HOME_YES: |
372 | return append_static_mounts(p, protect_home_yes_table, ELEMENTSOF(protect_home_yes_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_home_yes_table), typeof(&*(protect_home_yes_table ))), sizeof(protect_home_yes_table)/sizeof((protect_home_yes_table )[0]), ((void)0))), ignore_protect); |
373 | |
374 | default: |
375 | assert_not_reached("Unexpected ProtectHome= value")do { log_assert_failed_unreachable_realm(LOG_REALM_SYSTEMD, ( "Unexpected ProtectHome= value"), "../src/core/namespace.c", 375 , __PRETTY_FUNCTION__); } while (0); |
376 | } |
377 | } |
378 | |
379 | static int append_protect_system(MountEntry **p, ProtectSystem protect_system, bool_Bool ignore_protect) { |
380 | assert(p)do { if ((__builtin_expect(!!(!(p)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("p"), "../src/core/namespace.c", 380, __PRETTY_FUNCTION__ ); } while (0); |
381 | |
382 | switch (protect_system) { |
383 | |
384 | case PROTECT_SYSTEM_NO: |
385 | return 0; |
386 | |
387 | case PROTECT_SYSTEM_STRICT: |
388 | return append_static_mounts(p, protect_system_strict_table, ELEMENTSOF(protect_system_strict_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_system_strict_table), typeof(&*(protect_system_strict_table ))), sizeof(protect_system_strict_table)/sizeof((protect_system_strict_table )[0]), ((void)0))), ignore_protect); |
389 | |
390 | case PROTECT_SYSTEM_YES: |
391 | return append_static_mounts(p, protect_system_yes_table, ELEMENTSOF(protect_system_yes_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_system_yes_table), typeof(&*(protect_system_yes_table ))), sizeof(protect_system_yes_table)/sizeof((protect_system_yes_table )[0]), ((void)0))), ignore_protect); |
392 | |
393 | case PROTECT_SYSTEM_FULL: |
394 | return append_static_mounts(p, protect_system_full_table, ELEMENTSOF(protect_system_full_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_system_full_table), typeof(&*(protect_system_full_table ))), sizeof(protect_system_full_table)/sizeof((protect_system_full_table )[0]), ((void)0))), ignore_protect); |
395 | |
396 | default: |
397 | assert_not_reached("Unexpected ProtectSystem= value")do { log_assert_failed_unreachable_realm(LOG_REALM_SYSTEMD, ( "Unexpected ProtectSystem= value"), "../src/core/namespace.c" , 397, __PRETTY_FUNCTION__); } while (0); |
398 | } |
399 | } |
400 | |
401 | static int mount_path_compare(const void *a, const void *b) { |
402 | const MountEntry *p = a, *q = b; |
403 | int d; |
404 | |
405 | /* If the paths are not equal, then order prefixes first */ |
406 | d = path_compare(mount_entry_path(p), mount_entry_path(q)); |
407 | if (d != 0) |
408 | return d; |
409 | |
410 | /* If the paths are equal, check the mode */ |
411 | if (p->mode < q->mode) |
412 | return -1; |
413 | if (p->mode > q->mode) |
414 | return 1; |
415 | |
416 | return 0; |
417 | } |
418 | |
419 | static int prefix_where_needed(MountEntry *m, size_t n, const char *root_directory) { |
420 | size_t i; |
421 | |
422 | /* Prefixes all paths in the bind mount table with the root directory if it is specified and the entry needs |
423 | * that. */ |
424 | |
425 | if (!root_directory) |
426 | return 0; |
427 | |
428 | for (i = 0; i < n; i++) { |
429 | char *s; |
430 | |
431 | if (m[i].has_prefix) |
432 | continue; |
433 | |
434 | s = prefix_root(root_directory, mount_entry_path(m+i)); |
435 | if (!s) |
436 | return -ENOMEM12; |
437 | |
438 | free_and_replace(m[i].path_malloc, s)({ free(m[i].path_malloc); (m[i].path_malloc) = (s); (s) = (( void*)0); 0; }); |
439 | m[i].has_prefix = true1; |
440 | } |
441 | |
442 | return 0; |
443 | } |
444 | |
445 | static void drop_duplicates(MountEntry *m, size_t *n) { |
446 | MountEntry *f, *t, *previous; |
447 | |
448 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 448, __PRETTY_FUNCTION__ ); } while (0); |
449 | assert(n)do { if ((__builtin_expect(!!(!(n)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("n"), "../src/core/namespace.c", 449, __PRETTY_FUNCTION__ ); } while (0); |
450 | |
451 | /* Drops duplicate entries. Expects that the array is properly ordered already. */ |
452 | |
453 | for (f = m, t = m, previous = NULL((void*)0); f < m + *n; f++) { |
454 | |
455 | /* The first one wins (which is the one with the more restrictive mode), see mount_path_compare() |
456 | * above. Note that we only drop duplicates that haven't been mounted yet. */ |
457 | if (previous && |
458 | path_equal(mount_entry_path(f), mount_entry_path(previous)) && |
459 | !f->applied && !previous->applied) { |
460 | log_debug("%s is duplicate.", mount_entry_path(f))({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 460, __func__, "%s is duplicate." , mount_entry_path(f)) : -abs(_e); }); |
461 | previous->read_only = previous->read_only || mount_entry_read_only(f); /* Propagate the read-only flag to the remaining entry */ |
462 | mount_entry_done(f); |
463 | continue; |
464 | } |
465 | |
466 | *t = *f; |
467 | previous = t; |
468 | t++; |
469 | } |
470 | |
471 | *n = t - m; |
472 | } |
473 | |
474 | static void drop_inaccessible(MountEntry *m, size_t *n) { |
475 | MountEntry *f, *t; |
476 | const char *clear = NULL((void*)0); |
477 | |
478 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 478, __PRETTY_FUNCTION__ ); } while (0); |
479 | assert(n)do { if ((__builtin_expect(!!(!(n)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("n"), "../src/core/namespace.c", 479, __PRETTY_FUNCTION__ ); } while (0); |
480 | |
481 | /* Drops all entries obstructed by another entry further up the tree. Expects that the array is properly |
482 | * ordered already. */ |
483 | |
484 | for (f = m, t = m; f < m + *n; f++) { |
485 | |
486 | /* If we found a path set for INACCESSIBLE earlier, and this entry has it as prefix we should drop |
487 | * it, as inaccessible paths really should drop the entire subtree. */ |
488 | if (clear && path_startswith(mount_entry_path(f), clear)) { |
489 | log_debug("%s is masked by %s.", mount_entry_path(f), clear)({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 489, __func__, "%s is masked by %s." , mount_entry_path(f), clear) : -abs(_e); }); |
490 | mount_entry_done(f); |
491 | continue; |
492 | } |
493 | |
494 | clear = f->mode == INACCESSIBLE ? mount_entry_path(f) : NULL((void*)0); |
495 | |
496 | *t = *f; |
497 | t++; |
498 | } |
499 | |
500 | *n = t - m; |
501 | } |
502 | |
503 | static void drop_nop(MountEntry *m, size_t *n) { |
504 | MountEntry *f, *t; |
505 | |
506 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 506, __PRETTY_FUNCTION__ ); } while (0); |
507 | assert(n)do { if ((__builtin_expect(!!(!(n)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("n"), "../src/core/namespace.c", 507, __PRETTY_FUNCTION__ ); } while (0); |
508 | |
509 | /* Drops all entries which have an immediate parent that has the same type, as they are redundant. Assumes the |
510 | * list is ordered by prefixes. */ |
511 | |
512 | for (f = m, t = m; f < m + *n; f++) { |
513 | |
514 | /* Only suppress such subtrees for READONLY and READWRITE entries */ |
515 | if (IN_SET(f->mode, READONLY, READWRITE)({ _Bool _found = 0; static __attribute__ ((unused)) char _static_assert__macros_need_to_be_extended [20 - sizeof((int[]){READONLY, READWRITE})/sizeof(int)]; switch (f->mode) { case READONLY: case READWRITE: _found = 1; break ; default: break; } _found; })) { |
516 | MountEntry *p; |
517 | bool_Bool found = false0; |
518 | |
519 | /* Now let's find the first parent of the entry we are looking at. */ |
520 | for (p = t-1; p >= m; p--) { |
521 | if (path_startswith(mount_entry_path(f), mount_entry_path(p))) { |
522 | found = true1; |
523 | break; |
524 | } |
525 | } |
526 | |
527 | /* We found it, let's see if it's the same mode, if so, we can drop this entry */ |
528 | if (found && p->mode == f->mode) { |
529 | log_debug("%s is redundant by %s", mount_entry_path(f), mount_entry_path(p))({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 529, __func__, "%s is redundant by %s" , mount_entry_path(f), mount_entry_path(p)) : -abs(_e); }); |
530 | mount_entry_done(f); |
531 | continue; |
532 | } |
533 | } |
534 | |
535 | *t = *f; |
536 | t++; |
537 | } |
538 | |
539 | *n = t - m; |
540 | } |
541 | |
542 | static void drop_outside_root(const char *root_directory, MountEntry *m, size_t *n) { |
543 | MountEntry *f, *t; |
544 | |
545 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 545, __PRETTY_FUNCTION__ ); } while (0); |
546 | assert(n)do { if ((__builtin_expect(!!(!(n)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("n"), "../src/core/namespace.c", 546, __PRETTY_FUNCTION__ ); } while (0); |
547 | |
548 | /* Nothing to do */ |
549 | if (!root_directory) |
550 | return; |
551 | |
552 | /* Drops all mounts that are outside of the root directory. */ |
553 | |
554 | for (f = m, t = m; f < m + *n; f++) { |
555 | |
556 | if (!path_startswith(mount_entry_path(f), root_directory)) { |
557 | log_debug("%s is outside of root directory.", mount_entry_path(f))({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 557, __func__, "%s is outside of root directory." , mount_entry_path(f)) : -abs(_e); }); |
558 | mount_entry_done(f); |
559 | continue; |
560 | } |
561 | |
562 | *t = *f; |
563 | t++; |
564 | } |
565 | |
566 | *n = t - m; |
567 | } |
568 | |
569 | static int clone_device_node(const char *d, const char *temporary_mount, bool_Bool *make_devnode) { |
570 | const char *dn; |
571 | struct stat st; |
572 | int r; |
573 | |
574 | if (stat(d, &st) < 0) { |
575 | if (errno(*__errno_location ()) == ENOENT2) |
576 | return -ENXIO6; |
577 | return -errno(*__errno_location ()); |
578 | } |
579 | |
580 | if (!S_ISBLK(st.st_mode)((((st.st_mode)) & 0170000) == (0060000)) && |
581 | !S_ISCHR(st.st_mode)((((st.st_mode)) & 0170000) == (0020000))) |
582 | return -EINVAL22; |
583 | |
584 | if (st.st_rdev == 0) |
585 | return -ENXIO6; |
586 | |
587 | dn = strjoina(temporary_mount, d)({ const char *_appendees_[] = { temporary_mount, d }; char * _d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
588 | |
589 | if (*make_devnode) { |
590 | mac_selinux_create_file_prepare(d, st.st_mode); |
591 | r = mknod(dn, st.st_mode, st.st_rdev); |
592 | mac_selinux_create_file_clear(); |
593 | |
594 | if (r == 0) |
595 | return 0; |
596 | if (errno(*__errno_location ()) != EPERM1) |
597 | return log_debug_errno(errno, "mknod failed for %s: %m", d)({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 597, __func__ , "mknod failed for %s: %m", d) : -abs(_e); }); |
598 | |
599 | *make_devnode = false0; |
600 | } |
601 | |
602 | /* We're about to fallback to bind-mounting the device |
603 | * node. So create a dummy bind-mount target. */ |
604 | mac_selinux_create_file_prepare(d, 0); |
605 | r = mknod(dn, S_IFREG0100000, 0); |
606 | mac_selinux_create_file_clear(); |
607 | |
608 | if (r < 0 && errno(*__errno_location ()) != EEXIST17) |
609 | return log_debug_errno(errno, "mknod fallback failed for %s: %m", d)({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 609, __func__ , "mknod fallback failed for %s: %m", d) : -abs(_e); }); |
610 | |
611 | /* Fallback to bind-mounting: |
612 | * The assumption here is that all used device nodes carry standard |
613 | * properties. Specifically, the devices nodes we bind-mount should |
614 | * either be owned by root:root or root:tty (e.g. /dev/tty, /dev/ptmx) |
615 | * and should not carry ACLs. */ |
616 | if (mount(d, dn, NULL((void*)0), MS_BIND4096, NULL((void*)0)) < 0) |
617 | return log_debug_errno(errno, "mount failed for %s: %m", d)({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 617, __func__ , "mount failed for %s: %m", d) : -abs(_e); }); |
618 | |
619 | return 0; |
620 | } |
621 | |
622 | static int mount_private_dev(MountEntry *m) { |
623 | static const char devnodes[] = |
624 | "/dev/null\0" |
625 | "/dev/zero\0" |
626 | "/dev/full\0" |
627 | "/dev/random\0" |
628 | "/dev/urandom\0" |
629 | "/dev/tty\0"; |
630 | |
631 | char temporary_mount[] = "/tmp/namespace-dev-XXXXXX"; |
632 | const char *d, *dev = NULL((void*)0), *devpts = NULL((void*)0), *devshm = NULL((void*)0), *devhugepages = NULL((void*)0), *devmqueue = NULL((void*)0), *devlog = NULL((void*)0), *devptmx = NULL((void*)0); |
633 | bool_Bool can_mknod = true1; |
634 | _cleanup_umask___attribute__((cleanup(umaskp))) mode_t u; |
635 | int r; |
636 | |
637 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 637, __PRETTY_FUNCTION__ ); } while (0); |
638 | |
639 | u = umask(0000); |
Value stored to 'u' is never read | |
640 | |
641 | if (!mkdtemp(temporary_mount)) |
642 | return -errno(*__errno_location ()); |
643 | |
644 | dev = strjoina(temporary_mount, "/dev")({ const char *_appendees_[] = { temporary_mount, "/dev" }; char *_d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
645 | (void) mkdir(dev, 0755); |
646 | if (mount("tmpfs", dev, "tmpfs", DEV_MOUNT_OPTIONS(2|(1<<24)|8), "mode=755") < 0) { |
647 | r = -errno(*__errno_location ()); |
648 | goto fail; |
649 | } |
650 | |
651 | devpts = strjoina(temporary_mount, "/dev/pts")({ const char *_appendees_[] = { temporary_mount, "/dev/pts" } ; char *_d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0 ; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
652 | (void) mkdir(devpts, 0755); |
653 | if (mount("/dev/pts", devpts, NULL((void*)0), MS_BIND4096, NULL((void*)0)) < 0) { |
654 | r = -errno(*__errno_location ()); |
655 | goto fail; |
656 | } |
657 | |
658 | /* /dev/ptmx can either be a device node or a symlink to /dev/pts/ptmx |
659 | * when /dev/ptmx a device node, /dev/pts/ptmx has 000 permissions making it inaccessible |
660 | * thus, in that case make a clone |
661 | * |
662 | * in nspawn and other containers it will be a symlink, in that case make it a symlink |
663 | */ |
664 | r = is_symlink("/dev/ptmx"); |
665 | if (r < 0) |
666 | goto fail; |
667 | if (r > 0) { |
668 | devptmx = strjoina(temporary_mount, "/dev/ptmx")({ const char *_appendees_[] = { temporary_mount, "/dev/ptmx" }; char *_d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
669 | if (symlink("pts/ptmx", devptmx) < 0) { |
670 | r = -errno(*__errno_location ()); |
671 | goto fail; |
672 | } |
673 | } else { |
674 | r = clone_device_node("/dev/ptmx", temporary_mount, &can_mknod); |
675 | if (r < 0) |
676 | goto fail; |
677 | } |
678 | |
679 | devshm = strjoina(temporary_mount, "/dev/shm")({ const char *_appendees_[] = { temporary_mount, "/dev/shm" } ; char *_d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0 ; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
680 | (void) mkdir(devshm, 0755); |
681 | r = mount("/dev/shm", devshm, NULL((void*)0), MS_BIND4096, NULL((void*)0)); |
682 | if (r < 0) { |
683 | r = -errno(*__errno_location ()); |
684 | goto fail; |
685 | } |
686 | |
687 | devmqueue = strjoina(temporary_mount, "/dev/mqueue")({ const char *_appendees_[] = { temporary_mount, "/dev/mqueue" }; char *_d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
688 | (void) mkdir(devmqueue, 0755); |
689 | (void) mount("/dev/mqueue", devmqueue, NULL((void*)0), MS_BIND4096, NULL((void*)0)); |
690 | |
691 | devhugepages = strjoina(temporary_mount, "/dev/hugepages")({ const char *_appendees_[] = { temporary_mount, "/dev/hugepages" }; char *_d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
692 | (void) mkdir(devhugepages, 0755); |
693 | (void) mount("/dev/hugepages", devhugepages, NULL((void*)0), MS_BIND4096, NULL((void*)0)); |
694 | |
695 | devlog = strjoina(temporary_mount, "/dev/log")({ const char *_appendees_[] = { temporary_mount, "/dev/log" } ; char *_d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0 ; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
696 | (void) symlink("/run/systemd/journal/dev-log", devlog); |
697 | |
698 | NULSTR_FOREACH(d, devnodes)for ((d) = (devnodes); (d) && *(d); (d) = strchr((d), 0)+1) { |
699 | r = clone_device_node(d, temporary_mount, &can_mknod); |
700 | /* ENXIO means the the *source* is not a device file, skip creation in that case */ |
701 | if (r < 0 && r != -ENXIO6) |
702 | goto fail; |
703 | } |
704 | |
705 | dev_setup(temporary_mount, UID_INVALID((uid_t) -1), GID_INVALID((gid_t) -1)); |
706 | |
707 | /* Create the /dev directory if missing. It is more likely to be |
708 | * missing when the service is started with RootDirectory. This is |
709 | * consistent with mount units creating the mount points when missing. |
710 | */ |
711 | (void) mkdir_p_label(mount_entry_path(m), 0755); |
712 | |
713 | /* Unmount everything in old /dev */ |
714 | umount_recursive(mount_entry_path(m), 0); |
715 | if (mount(dev, mount_entry_path(m), NULL((void*)0), MS_MOVE8192, NULL((void*)0)) < 0) { |
716 | r = -errno(*__errno_location ()); |
717 | goto fail; |
718 | } |
719 | |
720 | rmdir(dev); |
721 | rmdir(temporary_mount); |
722 | |
723 | return 0; |
724 | |
725 | fail: |
726 | if (devpts) |
727 | umount(devpts); |
728 | |
729 | if (devshm) |
730 | umount(devshm); |
731 | |
732 | if (devhugepages) |
733 | umount(devhugepages); |
734 | |
735 | if (devmqueue) |
736 | umount(devmqueue); |
737 | |
738 | umount(dev); |
739 | rmdir(dev); |
740 | rmdir(temporary_mount); |
741 | |
742 | return r; |
743 | } |
744 | |
745 | static int mount_bind_dev(const MountEntry *m) { |
746 | int r; |
747 | |
748 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 748, __PRETTY_FUNCTION__ ); } while (0); |
749 | |
750 | /* Implements the little brother of mount_private_dev(): simply bind mounts the host's /dev into the service's |
751 | * /dev. This is only used when RootDirectory= is set. */ |
752 | |
753 | (void) mkdir_p_label(mount_entry_path(m), 0755); |
754 | |
755 | r = path_is_mount_point(mount_entry_path(m), NULL((void*)0), 0); |
756 | if (r < 0) |
757 | return log_debug_errno(r, "Unable to determine whether /dev is already mounted: %m")({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 757, __func__, "Unable to determine whether /dev is already mounted: %m" ) : -abs(_e); }); |
758 | if (r > 0) /* make this a NOP if /dev is already a mount point */ |
759 | return 0; |
760 | |
761 | if (mount("/dev", mount_entry_path(m), NULL((void*)0), MS_BIND4096|MS_REC16384, NULL((void*)0)) < 0) |
762 | return log_debug_errno(errno, "Failed to bind mount %s: %m", mount_entry_path(m))({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 762, __func__ , "Failed to bind mount %s: %m", mount_entry_path(m)) : -abs( _e); }); |
763 | |
764 | return 1; |
765 | } |
766 | |
767 | static int mount_sysfs(const MountEntry *m) { |
768 | int r; |
769 | |
770 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 770, __PRETTY_FUNCTION__ ); } while (0); |
771 | |
772 | (void) mkdir_p_label(mount_entry_path(m), 0755); |
773 | |
774 | r = path_is_mount_point(mount_entry_path(m), NULL((void*)0), 0); |
775 | if (r < 0) |
776 | return log_debug_errno(r, "Unable to determine whether /sys is already mounted: %m")({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 776, __func__, "Unable to determine whether /sys is already mounted: %m" ) : -abs(_e); }); |
777 | if (r > 0) /* make this a NOP if /sys is already a mount point */ |
778 | return 0; |
779 | |
780 | /* Bind mount the host's version so that we get all child mounts of it, too. */ |
781 | if (mount("/sys", mount_entry_path(m), NULL((void*)0), MS_BIND4096|MS_REC16384, NULL((void*)0)) < 0) |
782 | return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m))({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 782, __func__ , "Failed to mount %s: %m", mount_entry_path(m)) : -abs(_e); } ); |
783 | |
784 | return 1; |
785 | } |
786 | |
787 | static int mount_procfs(const MountEntry *m) { |
788 | int r; |
789 | |
790 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 790, __PRETTY_FUNCTION__ ); } while (0); |
791 | |
792 | (void) mkdir_p_label(mount_entry_path(m), 0755); |
793 | |
794 | r = path_is_mount_point(mount_entry_path(m), NULL((void*)0), 0); |
795 | if (r < 0) |
796 | return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m")({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 796, __func__, "Unable to determine whether /proc is already mounted: %m" ) : -abs(_e); }); |
797 | if (r > 0) /* make this a NOP if /proc is already a mount point */ |
798 | return 0; |
799 | |
800 | /* Mount a new instance, so that we get the one that matches our user namespace, if we are running in one */ |
801 | if (mount("proc", mount_entry_path(m), "proc", MS_NOSUID2|MS_NOEXEC8|MS_NODEV4, NULL((void*)0)) < 0) |
802 | return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m))({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 802, __func__ , "Failed to mount %s: %m", mount_entry_path(m)) : -abs(_e); } ); |
803 | |
804 | return 1; |
805 | } |
806 | |
807 | static int mount_tmpfs(const MountEntry *m) { |
808 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 808, __PRETTY_FUNCTION__ ); } while (0); |
809 | |
810 | /* First, get rid of everything that is below if there is anything. Then, overmount with our new tmpfs */ |
811 | |
812 | (void) mkdir_p_label(mount_entry_path(m), 0755); |
813 | (void) umount_recursive(mount_entry_path(m), 0); |
814 | |
815 | if (mount("tmpfs", mount_entry_path(m), "tmpfs", m->flags, mount_entry_options(m)) < 0) |
816 | return log_debug_errno(errno, "Failed to mount %s: %m", mount_entry_path(m))({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 816, __func__ , "Failed to mount %s: %m", mount_entry_path(m)) : -abs(_e); } ); |
817 | |
818 | return 1; |
819 | } |
820 | |
821 | static int follow_symlink( |
822 | const char *root_directory, |
823 | MountEntry *m) { |
824 | |
825 | _cleanup_free___attribute__((cleanup(freep))) char *target = NULL((void*)0); |
826 | int r; |
827 | |
828 | /* Let's chase symlinks, but only one step at a time. That's because depending where the symlink points we |
829 | * might need to change the order in which we mount stuff. Hence: let's normalize piecemeal, and do one step at |
830 | * a time by specifying CHASE_STEP. This function returns 0 if we resolved one step, and > 0 if we reached the |
831 | * end and already have a fully normalized name. */ |
832 | |
833 | r = chase_symlinks(mount_entry_path(m), root_directory, CHASE_STEP|CHASE_NONEXISTENT, &target); |
834 | if (r < 0) |
835 | return log_debug_errno(r, "Failed to chase symlinks '%s': %m", mount_entry_path(m))({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 835, __func__, "Failed to chase symlinks '%s': %m" , mount_entry_path(m)) : -abs(_e); }); |
836 | if (r > 0) /* Reached the end, nothing more to resolve */ |
837 | return 1; |
838 | |
839 | if (m->n_followed >= CHASE_SYMLINKS_MAX32) { /* put a boundary on things */ |
840 | log_debug("Symlink loop on '%s'.", mount_entry_path(m))({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 840, __func__, "Symlink loop on '%s'." , mount_entry_path(m)) : -abs(_e); }); |
841 | return -ELOOP40; |
842 | } |
843 | |
844 | log_debug("Followed mount entry path symlink %s → %s.", mount_entry_path(m), target)({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 844, __func__, "Followed mount entry path symlink %s → %s." , mount_entry_path(m), target) : -abs(_e); }); |
845 | |
846 | free_and_replace(m->path_malloc, target)({ free(m->path_malloc); (m->path_malloc) = (target); ( target) = ((void*)0); 0; }); |
847 | m->has_prefix = true1; |
848 | |
849 | m->n_followed ++; |
850 | |
851 | return 0; |
852 | } |
853 | |
854 | static int apply_mount( |
855 | const char *root_directory, |
856 | MountEntry *m) { |
857 | |
858 | bool_Bool rbind = true1, make = false0; |
859 | const char *what; |
860 | int r; |
861 | |
862 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 862, __PRETTY_FUNCTION__ ); } while (0); |
863 | |
864 | log_debug("Applying namespace mount on %s", mount_entry_path(m))({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 864, __func__, "Applying namespace mount on %s" , mount_entry_path(m)) : -abs(_e); }); |
865 | |
866 | switch (m->mode) { |
867 | |
868 | case INACCESSIBLE: { |
869 | struct stat target; |
870 | |
871 | /* First, get rid of everything that is below if there |
872 | * is anything... Then, overmount it with an |
873 | * inaccessible path. */ |
874 | (void) umount_recursive(mount_entry_path(m), 0); |
875 | |
876 | if (lstat(mount_entry_path(m), &target) < 0) { |
877 | if (errno(*__errno_location ()) == ENOENT2 && m->ignore) |
878 | return 0; |
879 | |
880 | return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m))({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 880, __func__ , "Failed to lstat() %s to determine what to mount over it: %m" , mount_entry_path(m)) : -abs(_e); }); |
881 | } |
882 | |
883 | what = mode_to_inaccessible_node(target.st_mode); |
884 | if (!what) { |
885 | log_debug("File type not supported for inaccessible mounts. Note that symlinks are not allowed")({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 885, __func__, "File type not supported for inaccessible mounts. Note that symlinks are not allowed" ) : -abs(_e); }); |
886 | return -ELOOP40; |
887 | } |
888 | break; |
889 | } |
890 | |
891 | case READONLY: |
892 | case READWRITE: |
893 | r = path_is_mount_point(mount_entry_path(m), root_directory, 0); |
894 | if (r == -ENOENT2 && m->ignore) |
895 | return 0; |
896 | if (r < 0) |
897 | return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", mount_entry_path(m))({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 897, __func__, "Failed to determine whether %s is already a mount point: %m" , mount_entry_path(m)) : -abs(_e); }); |
898 | if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ |
899 | return 0; |
900 | /* This isn't a mount point yet, let's make it one. */ |
901 | what = mount_entry_path(m); |
902 | break; |
903 | |
904 | case BIND_MOUNT: |
905 | rbind = false0; |
906 | |
907 | _fallthrough_; |
908 | case BIND_MOUNT_RECURSIVE: { |
909 | _cleanup_free___attribute__((cleanup(freep))) char *chased = NULL((void*)0); |
910 | |
911 | /* Since mount() will always follow symlinks we chase the symlinks on our own first. Note that bind |
912 | * mount source paths are always relative to the host root, hence we pass NULL as root directory to |
913 | * chase_symlinks() here. */ |
914 | |
915 | r = chase_symlinks(mount_entry_source(m), NULL((void*)0), CHASE_TRAIL_SLASH, &chased); |
916 | if (r == -ENOENT2 && m->ignore) { |
917 | log_debug_errno(r, "Path %s does not exist, ignoring.", mount_entry_source(m))({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 917, __func__, "Path %s does not exist, ignoring." , mount_entry_source(m)) : -abs(_e); }); |
918 | return 0; |
919 | } |
920 | if (r < 0) |
921 | return log_debug_errno(r, "Failed to follow symlinks on %s: %m", mount_entry_source(m))({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 921, __func__, "Failed to follow symlinks on %s: %m" , mount_entry_source(m)) : -abs(_e); }); |
922 | |
923 | log_debug("Followed source symlinks %s → %s.", mount_entry_source(m), chased)({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 923, __func__, "Followed source symlinks %s → %s." , mount_entry_source(m), chased) : -abs(_e); }); |
924 | |
925 | free_and_replace(m->source_malloc, chased)({ free(m->source_malloc); (m->source_malloc) = (chased ); (chased) = ((void*)0); 0; }); |
926 | |
927 | what = mount_entry_source(m); |
928 | make = true1; |
929 | break; |
930 | } |
931 | |
932 | case EMPTY_DIR: |
933 | case TMPFS: |
934 | return mount_tmpfs(m); |
935 | |
936 | case PRIVATE_TMP: |
937 | what = mount_entry_source(m); |
938 | make = true1; |
939 | break; |
940 | |
941 | case PRIVATE_DEV: |
942 | return mount_private_dev(m); |
943 | |
944 | case BIND_DEV: |
945 | return mount_bind_dev(m); |
946 | |
947 | case SYSFS: |
948 | return mount_sysfs(m); |
949 | |
950 | case PROCFS: |
951 | return mount_procfs(m); |
952 | |
953 | default: |
954 | assert_not_reached("Unknown mode")do { log_assert_failed_unreachable_realm(LOG_REALM_SYSTEMD, ( "Unknown mode"), "../src/core/namespace.c", 954, __PRETTY_FUNCTION__ ); } while (0); |
955 | } |
956 | |
957 | assert(what)do { if ((__builtin_expect(!!(!(what)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("what"), "../src/core/namespace.c", 957, __PRETTY_FUNCTION__); } while (0); |
958 | |
959 | if (mount(what, mount_entry_path(m), NULL((void*)0), MS_BIND4096|(rbind ? MS_REC16384 : 0), NULL((void*)0)) < 0) { |
960 | bool_Bool try_again = false0; |
961 | r = -errno(*__errno_location ()); |
962 | |
963 | if (r == -ENOENT2 && make) { |
964 | struct stat st; |
965 | |
966 | /* Hmm, either the source or the destination are missing. Let's see if we can create the destination, then try again */ |
967 | |
968 | if (stat(what, &st) < 0) |
969 | log_debug_errno(errno, "Mount point source '%s' is not accessible: %m", what)({ int _level = ((7)), _e = (((*__errno_location ()))), _realm = (LOG_REALM_SYSTEMD); (log_get_max_level_realm(_realm) >= ((_level) & 0x07)) ? log_internal_realm(((_realm) << 10 | (_level)), _e, "../src/core/namespace.c", 969, __func__ , "Mount point source '%s' is not accessible: %m", what) : -abs (_e); }); |
970 | else { |
971 | int q; |
972 | |
973 | (void) mkdir_parents(mount_entry_path(m), 0755); |
974 | |
975 | if (S_ISDIR(st.st_mode)((((st.st_mode)) & 0170000) == (0040000))) |
976 | q = mkdir(mount_entry_path(m), 0755) < 0 ? -errno(*__errno_location ()) : 0; |
977 | else |
978 | q = touch(mount_entry_path(m)); |
979 | |
980 | if (q < 0) |
981 | log_debug_errno(q, "Failed to create destination mount point node '%s': %m", mount_entry_path(m))({ int _level = ((7)), _e = ((q)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 981, __func__, "Failed to create destination mount point node '%s': %m" , mount_entry_path(m)) : -abs(_e); }); |
982 | else |
983 | try_again = true1; |
984 | } |
985 | } |
986 | |
987 | if (try_again) { |
988 | if (mount(what, mount_entry_path(m), NULL((void*)0), MS_BIND4096|(rbind ? MS_REC16384 : 0), NULL((void*)0)) < 0) |
989 | r = -errno(*__errno_location ()); |
990 | else |
991 | r = 0; |
992 | } |
993 | |
994 | if (r < 0) |
995 | return log_debug_errno(r, "Failed to mount %s to %s: %m", what, mount_entry_path(m))({ int _level = ((7)), _e = ((r)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 995, __func__, "Failed to mount %s to %s: %m" , what, mount_entry_path(m)) : -abs(_e); }); |
996 | } |
997 | |
998 | log_debug("Successfully mounted %s to %s", what, mount_entry_path(m))({ int _level = (((7))), _e = ((0)), _realm = (LOG_REALM_SYSTEMD ); (log_get_max_level_realm(_realm) >= ((_level) & 0x07 )) ? log_internal_realm(((_realm) << 10 | (_level)), _e , "../src/core/namespace.c", 998, __func__, "Successfully mounted %s to %s" , what, mount_entry_path(m)) : -abs(_e); }); |
999 | return 0; |
1000 | } |
1001 | |
1002 | static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) { |
1003 | int r = 0; |
1004 | |
1005 | assert(m)do { if ((__builtin_expect(!!(!(m)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("m"), "../src/core/namespace.c", 1005, __PRETTY_FUNCTION__ ); } while (0); |
1006 | assert(proc_self_mountinfo)do { if ((__builtin_expect(!!(!(proc_self_mountinfo)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("proc_self_mountinfo"), "../src/core/namespace.c" , 1006, __PRETTY_FUNCTION__); } while (0); |
1007 | |
1008 | if (mount_entry_read_only(m)) { |
1009 | if (IN_SET(m->mode, EMPTY_DIR, TMPFS)({ _Bool _found = 0; static __attribute__ ((unused)) char _static_assert__macros_need_to_be_extended [20 - sizeof((int[]){EMPTY_DIR, TMPFS})/sizeof(int)]; switch( m->mode) { case EMPTY_DIR: case TMPFS: _found = 1; break; default : break; } _found; })) { |
1010 | /* Make superblock readonly */ |
1011 | if (mount(NULL((void*)0), mount_entry_path(m), NULL((void*)0), MS_REMOUNT32 | MS_RDONLY1 | m->flags, mount_entry_options(m)) < 0) |
1012 | r = -errno(*__errno_location ()); |
1013 | } else |
1014 | r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true1, blacklist, proc_self_mountinfo); |
1015 | } else if (m->mode == PRIVATE_DEV) { |
1016 | /* Superblock can be readonly but the submounts can't */ |
1017 | if (mount(NULL((void*)0), mount_entry_path(m), NULL((void*)0), MS_REMOUNT32|DEV_MOUNT_OPTIONS(2|(1<<24)|8)|MS_RDONLY1, NULL((void*)0)) < 0) |
1018 | r = -errno(*__errno_location ()); |
1019 | } else |
1020 | return 0; |
1021 | |
1022 | /* Not that we only turn on the MS_RDONLY flag here, we never turn it off. Something that was marked read-only |
1023 | * already stays this way. This improves compatibility with container managers, where we won't attempt to undo |
1024 | * read-only mounts already applied. */ |
1025 | |
1026 | if (r == -ENOENT2 && m->ignore) |
1027 | r = 0; |
1028 | |
1029 | return r; |
1030 | } |
1031 | |
1032 | static bool_Bool namespace_info_mount_apivfs(const char *root_directory, const NamespaceInfo *ns_info) { |
1033 | assert(ns_info)do { if ((__builtin_expect(!!(!(ns_info)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("ns_info"), "../src/core/namespace.c", 1033 , __PRETTY_FUNCTION__); } while (0); |
1034 | |
1035 | /* |
1036 | * ProtectControlGroups= and ProtectKernelTunables= imply MountAPIVFS=, |
1037 | * since to protect the API VFS mounts, they need to be around in the |
1038 | * first place... and RootDirectory= or RootImage= need to be set. |
1039 | */ |
1040 | |
1041 | /* root_directory should point to a mount point */ |
1042 | return root_directory && |
1043 | (ns_info->mount_apivfs || |
1044 | ns_info->protect_control_groups || |
1045 | ns_info->protect_kernel_tunables); |
1046 | } |
1047 | |
1048 | static size_t namespace_calculate_mounts( |
1049 | const char* root_directory, |
1050 | const NamespaceInfo *ns_info, |
1051 | char** read_write_paths, |
1052 | char** read_only_paths, |
1053 | char** inaccessible_paths, |
1054 | char** empty_directories, |
1055 | size_t n_bind_mounts, |
1056 | size_t n_temporary_filesystems, |
1057 | const char* tmp_dir, |
1058 | const char* var_tmp_dir, |
1059 | ProtectHome protect_home, |
1060 | ProtectSystem protect_system) { |
1061 | |
1062 | size_t protect_home_cnt; |
1063 | size_t protect_system_cnt = |
1064 | (protect_system == PROTECT_SYSTEM_STRICT ? |
1065 | ELEMENTSOF(protect_system_strict_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_system_strict_table), typeof(&*(protect_system_strict_table ))), sizeof(protect_system_strict_table)/sizeof((protect_system_strict_table )[0]), ((void)0))) : |
1066 | ((protect_system == PROTECT_SYSTEM_FULL) ? |
1067 | ELEMENTSOF(protect_system_full_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_system_full_table), typeof(&*(protect_system_full_table ))), sizeof(protect_system_full_table)/sizeof((protect_system_full_table )[0]), ((void)0))) : |
1068 | ((protect_system == PROTECT_SYSTEM_YES) ? |
1069 | ELEMENTSOF(protect_system_yes_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_system_yes_table), typeof(&*(protect_system_yes_table ))), sizeof(protect_system_yes_table)/sizeof((protect_system_yes_table )[0]), ((void)0))) : 0))); |
1070 | |
1071 | protect_home_cnt = |
1072 | (protect_home == PROTECT_HOME_YES ? |
1073 | ELEMENTSOF(protect_home_yes_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_home_yes_table), typeof(&*(protect_home_yes_table ))), sizeof(protect_home_yes_table)/sizeof((protect_home_yes_table )[0]), ((void)0))) : |
1074 | ((protect_home == PROTECT_HOME_READ_ONLY) ? |
1075 | ELEMENTSOF(protect_home_read_only_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_home_read_only_table), typeof(&*(protect_home_read_only_table ))), sizeof(protect_home_read_only_table)/sizeof((protect_home_read_only_table )[0]), ((void)0))) : |
1076 | ((protect_home == PROTECT_HOME_TMPFS) ? |
1077 | ELEMENTSOF(protect_home_tmpfs_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_home_tmpfs_table), typeof(&*(protect_home_tmpfs_table ))), sizeof(protect_home_tmpfs_table)/sizeof((protect_home_tmpfs_table )[0]), ((void)0))) : 0))); |
1078 | |
1079 | return !!tmp_dir + !!var_tmp_dir + |
1080 | strv_length(read_write_paths) + |
1081 | strv_length(read_only_paths) + |
1082 | strv_length(inaccessible_paths) + |
1083 | strv_length(empty_directories) + |
1084 | n_bind_mounts + |
1085 | n_temporary_filesystems + |
1086 | ns_info->private_dev + |
1087 | (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_kernel_tunables_table), typeof(&*(protect_kernel_tunables_table ))), sizeof(protect_kernel_tunables_table)/sizeof((protect_kernel_tunables_table )[0]), ((void)0))) : 0) + |
1088 | (ns_info->protect_control_groups ? 1 : 0) + |
1089 | (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_kernel_modules_table), typeof(&*(protect_kernel_modules_table ))), sizeof(protect_kernel_modules_table)/sizeof((protect_kernel_modules_table )[0]), ((void)0))) : 0) + |
1090 | protect_home_cnt + protect_system_cnt + |
1091 | (namespace_info_mount_apivfs(root_directory, ns_info) ? ELEMENTSOF(apivfs_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(apivfs_table), typeof(&*(apivfs_table))), sizeof( apivfs_table)/sizeof((apivfs_table)[0]), ((void)0))) : 0); |
1092 | } |
1093 | |
1094 | static void normalize_mounts(const char *root_directory, MountEntry *mounts, size_t *n_mounts) { |
1095 | assert(n_mounts)do { if ((__builtin_expect(!!(!(n_mounts)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("n_mounts"), "../src/core/namespace.c", 1095 , __PRETTY_FUNCTION__); } while (0); |
1096 | assert(mounts || *n_mounts == 0)do { if ((__builtin_expect(!!(!(mounts || *n_mounts == 0)),0) )) log_assert_failed_realm(LOG_REALM_SYSTEMD, ("mounts || *n_mounts == 0" ), "../src/core/namespace.c", 1096, __PRETTY_FUNCTION__); } while (0); |
1097 | |
1098 | qsort_safe(mounts, *n_mounts, sizeof(MountEntry), mount_path_compare); |
1099 | |
1100 | drop_duplicates(mounts, n_mounts); |
1101 | drop_outside_root(root_directory, mounts, n_mounts); |
1102 | drop_inaccessible(mounts, n_mounts); |
1103 | drop_nop(mounts, n_mounts); |
1104 | } |
1105 | |
1106 | int setup_namespace( |
1107 | const char* root_directory, |
1108 | const char* root_image, |
1109 | const NamespaceInfo *ns_info, |
1110 | char** read_write_paths, |
1111 | char** read_only_paths, |
1112 | char** inaccessible_paths, |
1113 | char** empty_directories, |
1114 | const BindMount *bind_mounts, |
1115 | size_t n_bind_mounts, |
1116 | const TemporaryFileSystem *temporary_filesystems, |
1117 | size_t n_temporary_filesystems, |
1118 | const char* tmp_dir, |
1119 | const char* var_tmp_dir, |
1120 | ProtectHome protect_home, |
1121 | ProtectSystem protect_system, |
1122 | unsigned long mount_flags, |
1123 | DissectImageFlags dissect_image_flags) { |
1124 | |
1125 | _cleanup_(loop_device_unrefp)__attribute__((cleanup(loop_device_unrefp))) LoopDevice *loop_device = NULL((void*)0); |
1126 | _cleanup_(decrypted_image_unrefp)__attribute__((cleanup(decrypted_image_unrefp))) DecryptedImage *decrypted_image = NULL((void*)0); |
1127 | _cleanup_(dissected_image_unrefp)__attribute__((cleanup(dissected_image_unrefp))) DissectedImage *dissected_image = NULL((void*)0); |
1128 | _cleanup_free___attribute__((cleanup(freep))) void *root_hash = NULL((void*)0); |
1129 | MountEntry *m, *mounts = NULL((void*)0); |
1130 | size_t root_hash_size = 0; |
1131 | const char *root; |
1132 | size_t n_mounts; |
1133 | bool_Bool make_slave; |
1134 | bool_Bool require_prefix = false0; |
1135 | int r = 0; |
1136 | |
1137 | assert(ns_info)do { if ((__builtin_expect(!!(!(ns_info)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("ns_info"), "../src/core/namespace.c", 1137 , __PRETTY_FUNCTION__); } while (0); |
1138 | |
1139 | if (mount_flags == 0) |
1140 | mount_flags = MS_SHARED(1<<20); |
1141 | |
1142 | if (root_image) { |
1143 | dissect_image_flags |= DISSECT_IMAGE_REQUIRE_ROOT; |
1144 | |
1145 | if (protect_system == PROTECT_SYSTEM_STRICT && |
1146 | protect_home != PROTECT_HOME_NO && |
1147 | strv_isempty(read_write_paths)) |
1148 | dissect_image_flags |= DISSECT_IMAGE_READ_ONLY; |
1149 | |
1150 | r = loop_device_make_by_path(root_image, |
1151 | dissect_image_flags & DISSECT_IMAGE_READ_ONLY ? O_RDONLY00 : O_RDWR02, |
1152 | &loop_device); |
1153 | if (r < 0) |
1154 | return r; |
1155 | |
1156 | r = root_hash_load(root_image, &root_hash, &root_hash_size); |
1157 | if (r < 0) |
1158 | return r; |
1159 | |
1160 | r = dissect_image(loop_device->fd, root_hash, root_hash_size, dissect_image_flags, &dissected_image); |
1161 | if (r < 0) |
1162 | return r; |
1163 | |
1164 | r = dissected_image_decrypt(dissected_image, NULL((void*)0), root_hash, root_hash_size, dissect_image_flags, &decrypted_image); |
1165 | if (r < 0) |
1166 | return r; |
1167 | } |
1168 | |
1169 | if (root_directory) |
1170 | root = root_directory; |
1171 | else { |
1172 | /* Always create the mount namespace in a temporary directory, instead of operating |
1173 | * directly in the root. The temporary directory prevents any mounts from being |
1174 | * potentially obscured my other mounts we already applied. |
1175 | * We use the same mount point for all images, which is safe, since they all live |
1176 | * in their own namespaces after all, and hence won't see each other. */ |
1177 | |
1178 | root = "/run/systemd/unit-root"; |
1179 | (void) mkdir_label(root, 0700); |
1180 | require_prefix = true1; |
1181 | } |
1182 | |
1183 | n_mounts = namespace_calculate_mounts( |
1184 | root, |
1185 | ns_info, |
1186 | read_write_paths, |
1187 | read_only_paths, |
1188 | inaccessible_paths, |
1189 | empty_directories, |
1190 | n_bind_mounts, |
1191 | n_temporary_filesystems, |
1192 | tmp_dir, var_tmp_dir, |
1193 | protect_home, protect_system); |
1194 | |
1195 | /* Set mount slave mode */ |
1196 | make_slave = root || n_mounts > 0 || ns_info->private_mounts; |
1197 | |
1198 | if (n_mounts > 0) { |
1199 | m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry))({ char *_new_; size_t _len_ = n_mounts * sizeof(MountEntry); _new_ = __builtin_alloca (_len_); (void *) memset(_new_, 0, _len_ ); }); |
1200 | r = append_access_mounts(&m, read_write_paths, READWRITE, require_prefix); |
1201 | if (r < 0) |
1202 | goto finish; |
1203 | |
1204 | r = append_access_mounts(&m, read_only_paths, READONLY, require_prefix); |
1205 | if (r < 0) |
1206 | goto finish; |
1207 | |
1208 | r = append_access_mounts(&m, inaccessible_paths, INACCESSIBLE, require_prefix); |
1209 | if (r < 0) |
1210 | goto finish; |
1211 | |
1212 | r = append_empty_dir_mounts(&m, empty_directories); |
1213 | if (r < 0) |
1214 | goto finish; |
1215 | |
1216 | r = append_bind_mounts(&m, bind_mounts, n_bind_mounts); |
1217 | if (r < 0) |
1218 | goto finish; |
1219 | |
1220 | r = append_tmpfs_mounts(&m, temporary_filesystems, n_temporary_filesystems); |
1221 | if (r < 0) |
1222 | goto finish; |
1223 | |
1224 | if (tmp_dir) { |
1225 | *(m++) = (MountEntry) { |
1226 | .path_const = "/tmp", |
1227 | .mode = PRIVATE_TMP, |
1228 | .source_const = tmp_dir, |
1229 | }; |
1230 | } |
1231 | |
1232 | if (var_tmp_dir) { |
1233 | *(m++) = (MountEntry) { |
1234 | .path_const = "/var/tmp", |
1235 | .mode = PRIVATE_TMP, |
1236 | .source_const = var_tmp_dir, |
1237 | }; |
1238 | } |
1239 | |
1240 | if (ns_info->private_dev) { |
1241 | *(m++) = (MountEntry) { |
1242 | .path_const = "/dev", |
1243 | .mode = PRIVATE_DEV, |
1244 | }; |
1245 | } |
1246 | |
1247 | if (ns_info->protect_kernel_tunables) { |
1248 | r = append_static_mounts(&m, protect_kernel_tunables_table, ELEMENTSOF(protect_kernel_tunables_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_kernel_tunables_table), typeof(&*(protect_kernel_tunables_table ))), sizeof(protect_kernel_tunables_table)/sizeof((protect_kernel_tunables_table )[0]), ((void)0))), ns_info->ignore_protect_paths); |
1249 | if (r < 0) |
1250 | goto finish; |
1251 | } |
1252 | |
1253 | if (ns_info->protect_kernel_modules) { |
1254 | r = append_static_mounts(&m, protect_kernel_modules_table, ELEMENTSOF(protect_kernel_modules_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_kernel_modules_table), typeof(&*(protect_kernel_modules_table ))), sizeof(protect_kernel_modules_table)/sizeof((protect_kernel_modules_table )[0]), ((void)0))), ns_info->ignore_protect_paths); |
1255 | if (r < 0) |
1256 | goto finish; |
1257 | } |
1258 | |
1259 | if (ns_info->protect_control_groups) { |
1260 | *(m++) = (MountEntry) { |
1261 | .path_const = "/sys/fs/cgroup", |
1262 | .mode = READONLY, |
1263 | }; |
1264 | } |
1265 | |
1266 | r = append_protect_home(&m, protect_home, ns_info->ignore_protect_paths); |
1267 | if (r < 0) |
1268 | goto finish; |
1269 | |
1270 | r = append_protect_system(&m, protect_system, false0); |
1271 | if (r < 0) |
1272 | goto finish; |
1273 | |
1274 | if (namespace_info_mount_apivfs(root, ns_info)) { |
1275 | r = append_static_mounts(&m, apivfs_table, ELEMENTSOF(apivfs_table)__extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(apivfs_table), typeof(&*(apivfs_table))), sizeof( apivfs_table)/sizeof((apivfs_table)[0]), ((void)0))), ns_info->ignore_protect_paths); |
1276 | if (r < 0) |
1277 | goto finish; |
1278 | } |
1279 | |
1280 | assert(mounts + n_mounts == m)do { if ((__builtin_expect(!!(!(mounts + n_mounts == m)),0))) log_assert_failed_realm(LOG_REALM_SYSTEMD, ("mounts + n_mounts == m" ), "../src/core/namespace.c", 1280, __PRETTY_FUNCTION__); } while (0); |
1281 | |
1282 | /* Prepend the root directory where that's necessary */ |
1283 | r = prefix_where_needed(mounts, n_mounts, root); |
1284 | if (r < 0) |
1285 | goto finish; |
1286 | |
1287 | normalize_mounts(root_directory, mounts, &n_mounts); |
1288 | } |
1289 | |
1290 | if (unshare(CLONE_NEWNS0x00020000) < 0) { |
1291 | r = -errno(*__errno_location ()); |
1292 | goto finish; |
1293 | } |
1294 | |
1295 | if (make_slave) { |
1296 | /* Remount / as SLAVE so that nothing now mounted in the namespace |
1297 | shows up in the parent */ |
1298 | if (mount(NULL((void*)0), "/", NULL((void*)0), MS_SLAVE(1<<19)|MS_REC16384, NULL((void*)0)) < 0) { |
1299 | r = -errno(*__errno_location ()); |
1300 | goto finish; |
1301 | } |
1302 | } |
1303 | |
1304 | if (root_image) { |
1305 | /* A root image is specified, mount it to the right place */ |
1306 | r = dissected_image_mount(dissected_image, root, UID_INVALID((uid_t) -1), dissect_image_flags); |
1307 | if (r < 0) |
1308 | goto finish; |
1309 | |
1310 | if (decrypted_image) { |
1311 | r = decrypted_image_relinquish(decrypted_image); |
1312 | if (r < 0) |
1313 | goto finish; |
1314 | } |
1315 | |
1316 | loop_device_relinquish(loop_device); |
1317 | |
1318 | } else if (root_directory) { |
1319 | |
1320 | /* A root directory is specified. Turn its directory into bind mount, if it isn't one yet. */ |
1321 | r = path_is_mount_point(root, NULL((void*)0), AT_SYMLINK_FOLLOW0x400); |
1322 | if (r < 0) |
1323 | goto finish; |
1324 | if (r == 0) { |
1325 | if (mount(root, root, NULL((void*)0), MS_BIND4096|MS_REC16384, NULL((void*)0)) < 0) { |
1326 | r = -errno(*__errno_location ()); |
1327 | goto finish; |
1328 | } |
1329 | } |
1330 | |
1331 | } else if (root) { |
1332 | |
1333 | /* Let's mount the main root directory to the root directory to use */ |
1334 | if (mount("/", root, NULL((void*)0), MS_BIND4096|MS_REC16384, NULL((void*)0)) < 0) { |
1335 | r = -errno(*__errno_location ()); |
1336 | goto finish; |
1337 | } |
1338 | } |
1339 | |
1340 | /* Try to set up the new root directory before mounting anything else there. */ |
1341 | if (root_image || root_directory) |
1342 | (void) base_filesystem_create(root, UID_INVALID((uid_t) -1), GID_INVALID((gid_t) -1)); |
1343 | |
1344 | if (n_mounts > 0) { |
1345 | _cleanup_fclose___attribute__((cleanup(fclosep))) FILE *proc_self_mountinfo = NULL((void*)0); |
1346 | char **blacklist; |
1347 | size_t j; |
1348 | |
1349 | /* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of /proc. |
1350 | * For example, this is the case with the option: 'InaccessiblePaths=/proc' */ |
1351 | proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); |
1352 | if (!proc_self_mountinfo) { |
1353 | r = -errno(*__errno_location ()); |
1354 | goto finish; |
1355 | } |
1356 | |
1357 | /* First round, establish all mounts we need */ |
1358 | for (;;) { |
1359 | bool_Bool again = false0; |
1360 | |
1361 | for (m = mounts; m < mounts + n_mounts; ++m) { |
1362 | |
1363 | if (m->applied) |
1364 | continue; |
1365 | |
1366 | r = follow_symlink(root, m); |
1367 | if (r < 0) |
1368 | goto finish; |
1369 | if (r == 0) { |
1370 | /* We hit a symlinked mount point. The entry got rewritten and might point to a |
1371 | * very different place now. Let's normalize the changed list, and start from |
1372 | * the beginning. After all to mount the entry at the new location we might |
1373 | * need some other mounts first */ |
1374 | again = true1; |
1375 | break; |
1376 | } |
1377 | |
1378 | r = apply_mount(root, m); |
1379 | if (r < 0) |
1380 | goto finish; |
1381 | |
1382 | m->applied = true1; |
1383 | } |
1384 | |
1385 | if (!again) |
1386 | break; |
1387 | |
1388 | normalize_mounts(root_directory, mounts, &n_mounts); |
1389 | } |
1390 | |
1391 | /* Create a blacklist we can pass to bind_mount_recursive() */ |
1392 | blacklist = newa(char*, n_mounts+1)({ do { if ((__builtin_expect(!!(!(!size_multiply_overflow(sizeof (char*), n_mounts+1))),0))) log_assert_failed_realm(LOG_REALM_SYSTEMD , ("!size_multiply_overflow(sizeof(char*), n_mounts+1)"), "../src/core/namespace.c" , 1392, __PRETTY_FUNCTION__); } while (0); (char**) __builtin_alloca (sizeof(char*)*(n_mounts+1)); }); |
1393 | for (j = 0; j < n_mounts; j++) |
1394 | blacklist[j] = (char*) mount_entry_path(mounts+j); |
1395 | blacklist[j] = NULL((void*)0); |
1396 | |
1397 | /* Second round, flip the ro bits if necessary. */ |
1398 | for (m = mounts; m < mounts + n_mounts; ++m) { |
1399 | r = make_read_only(m, blacklist, proc_self_mountinfo); |
1400 | if (r < 0) |
1401 | goto finish; |
1402 | } |
1403 | } |
1404 | |
1405 | if (root) { |
1406 | /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ |
1407 | r = mount_move_root(root); |
1408 | if (r < 0) |
1409 | goto finish; |
1410 | } |
1411 | |
1412 | /* Remount / as the desired mode. Note that this will not |
1413 | * reestablish propagation from our side to the host, since |
1414 | * what's disconnected is disconnected. */ |
1415 | if (mount(NULL((void*)0), "/", NULL((void*)0), mount_flags | MS_REC16384, NULL((void*)0)) < 0) { |
1416 | r = -errno(*__errno_location ()); |
1417 | goto finish; |
1418 | } |
1419 | |
1420 | r = 0; |
1421 | |
1422 | finish: |
1423 | for (m = mounts; m < mounts + n_mounts; m++) |
1424 | mount_entry_done(m); |
1425 | |
1426 | return r; |
1427 | } |
1428 | |
1429 | void bind_mount_free_many(BindMount *b, size_t n) { |
1430 | size_t i; |
1431 | |
1432 | assert(b || n == 0)do { if ((__builtin_expect(!!(!(b || n == 0)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("b || n == 0"), "../src/core/namespace.c" , 1432, __PRETTY_FUNCTION__); } while (0); |
1433 | |
1434 | for (i = 0; i < n; i++) { |
1435 | free(b[i].source); |
1436 | free(b[i].destination); |
1437 | } |
1438 | |
1439 | free(b); |
1440 | } |
1441 | |
1442 | int bind_mount_add(BindMount **b, size_t *n, const BindMount *item) { |
1443 | _cleanup_free___attribute__((cleanup(freep))) char *s = NULL((void*)0), *d = NULL((void*)0); |
1444 | BindMount *c; |
1445 | |
1446 | assert(b)do { if ((__builtin_expect(!!(!(b)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("b"), "../src/core/namespace.c", 1446, __PRETTY_FUNCTION__ ); } while (0); |
1447 | assert(n)do { if ((__builtin_expect(!!(!(n)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("n"), "../src/core/namespace.c", 1447, __PRETTY_FUNCTION__ ); } while (0); |
1448 | assert(item)do { if ((__builtin_expect(!!(!(item)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("item"), "../src/core/namespace.c", 1448 , __PRETTY_FUNCTION__); } while (0); |
1449 | |
1450 | s = strdup(item->source); |
1451 | if (!s) |
1452 | return -ENOMEM12; |
1453 | |
1454 | d = strdup(item->destination); |
1455 | if (!d) |
1456 | return -ENOMEM12; |
1457 | |
1458 | c = reallocarray(*b, *n + 1, sizeof(BindMount)); |
1459 | if (!c) |
1460 | return -ENOMEM12; |
1461 | |
1462 | *b = c; |
1463 | |
1464 | c[(*n) ++] = (BindMount) { |
1465 | .source = TAKE_PTR(s)({ typeof(s) _ptr_ = (s); (s) = ((void*)0); _ptr_; }), |
1466 | .destination = TAKE_PTR(d)({ typeof(d) _ptr_ = (d); (d) = ((void*)0); _ptr_; }), |
1467 | .read_only = item->read_only, |
1468 | .recursive = item->recursive, |
1469 | .ignore_enoent = item->ignore_enoent, |
1470 | }; |
1471 | |
1472 | return 0; |
1473 | } |
1474 | |
1475 | void temporary_filesystem_free_many(TemporaryFileSystem *t, size_t n) { |
1476 | size_t i; |
1477 | |
1478 | assert(t || n == 0)do { if ((__builtin_expect(!!(!(t || n == 0)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("t || n == 0"), "../src/core/namespace.c" , 1478, __PRETTY_FUNCTION__); } while (0); |
1479 | |
1480 | for (i = 0; i < n; i++) { |
1481 | free(t[i].path); |
1482 | free(t[i].options); |
1483 | } |
1484 | |
1485 | free(t); |
1486 | } |
1487 | |
1488 | int temporary_filesystem_add( |
1489 | TemporaryFileSystem **t, |
1490 | size_t *n, |
1491 | const char *path, |
1492 | const char *options) { |
1493 | |
1494 | _cleanup_free___attribute__((cleanup(freep))) char *p = NULL((void*)0), *o = NULL((void*)0); |
1495 | TemporaryFileSystem *c; |
1496 | |
1497 | assert(t)do { if ((__builtin_expect(!!(!(t)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("t"), "../src/core/namespace.c", 1497, __PRETTY_FUNCTION__ ); } while (0); |
1498 | assert(n)do { if ((__builtin_expect(!!(!(n)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("n"), "../src/core/namespace.c", 1498, __PRETTY_FUNCTION__ ); } while (0); |
1499 | assert(path)do { if ((__builtin_expect(!!(!(path)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("path"), "../src/core/namespace.c", 1499 , __PRETTY_FUNCTION__); } while (0); |
1500 | |
1501 | p = strdup(path); |
1502 | if (!p) |
1503 | return -ENOMEM12; |
1504 | |
1505 | if (!isempty(options)) { |
1506 | o = strdup(options); |
1507 | if (!o) |
1508 | return -ENOMEM12; |
1509 | } |
1510 | |
1511 | c = reallocarray(*t, *n + 1, sizeof(TemporaryFileSystem)); |
1512 | if (!c) |
1513 | return -ENOMEM12; |
1514 | |
1515 | *t = c; |
1516 | |
1517 | c[(*n) ++] = (TemporaryFileSystem) { |
1518 | .path = TAKE_PTR(p)({ typeof(p) _ptr_ = (p); (p) = ((void*)0); _ptr_; }), |
1519 | .options = TAKE_PTR(o)({ typeof(o) _ptr_ = (o); (o) = ((void*)0); _ptr_; }), |
1520 | }; |
1521 | |
1522 | return 0; |
1523 | } |
1524 | |
1525 | static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) { |
1526 | _cleanup_free___attribute__((cleanup(freep))) char *x = NULL((void*)0); |
1527 | char bid[SD_ID128_STRING_MAX33]; |
1528 | sd_id128_t boot_id; |
1529 | int r; |
1530 | |
1531 | assert(id)do { if ((__builtin_expect(!!(!(id)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("id"), "../src/core/namespace.c", 1531, __PRETTY_FUNCTION__ ); } while (0); |
1532 | assert(prefix)do { if ((__builtin_expect(!!(!(prefix)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("prefix"), "../src/core/namespace.c", 1532 , __PRETTY_FUNCTION__); } while (0); |
1533 | assert(path)do { if ((__builtin_expect(!!(!(path)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("path"), "../src/core/namespace.c", 1533 , __PRETTY_FUNCTION__); } while (0); |
1534 | |
1535 | /* We include the boot id in the directory so that after a |
1536 | * reboot we can easily identify obsolete directories. */ |
1537 | |
1538 | r = sd_id128_get_boot(&boot_id); |
1539 | if (r < 0) |
1540 | return r; |
1541 | |
1542 | x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX")strjoin_real((prefix), "/systemd-private-", sd_id128_to_string (boot_id, bid), "-", id, "-XXXXXX", ((void*)0)); |
1543 | if (!x) |
1544 | return -ENOMEM12; |
1545 | |
1546 | RUN_WITH_UMASK(0077)for (__attribute__((cleanup(_reset_umask_))) struct _umask_struct_ _saved_umask_ = { umask(0077), 0 }; !_saved_umask_.quit ; _saved_umask_ .quit = 1) |
1547 | if (!mkdtemp(x)) |
1548 | return -errno(*__errno_location ()); |
1549 | |
1550 | RUN_WITH_UMASK(0000)for (__attribute__((cleanup(_reset_umask_))) struct _umask_struct_ _saved_umask_ = { umask(0000), 0 }; !_saved_umask_.quit ; _saved_umask_ .quit = 1) { |
1551 | char *y; |
1552 | |
1553 | y = strjoina(x, "/tmp")({ const char *_appendees_[] = { x, "/tmp" }; char *_d_, *_p_ ; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p(typeof (_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
1554 | |
1555 | if (mkdir(y, 0777 | S_ISVTX01000) < 0) |
1556 | return -errno(*__errno_location ()); |
1557 | } |
1558 | |
1559 | *path = TAKE_PTR(x)({ typeof(x) _ptr_ = (x); (x) = ((void*)0); _ptr_; }); |
1560 | |
1561 | return 0; |
1562 | } |
1563 | |
1564 | int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) { |
1565 | char *a, *b; |
1566 | int r; |
1567 | |
1568 | assert(id)do { if ((__builtin_expect(!!(!(id)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("id"), "../src/core/namespace.c", 1568, __PRETTY_FUNCTION__ ); } while (0); |
1569 | assert(tmp_dir)do { if ((__builtin_expect(!!(!(tmp_dir)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("tmp_dir"), "../src/core/namespace.c", 1569 , __PRETTY_FUNCTION__); } while (0); |
1570 | assert(var_tmp_dir)do { if ((__builtin_expect(!!(!(var_tmp_dir)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("var_tmp_dir"), "../src/core/namespace.c" , 1570, __PRETTY_FUNCTION__); } while (0); |
1571 | |
1572 | r = setup_one_tmp_dir(id, "/tmp", &a); |
1573 | if (r < 0) |
1574 | return r; |
1575 | |
1576 | r = setup_one_tmp_dir(id, "/var/tmp", &b); |
1577 | if (r < 0) { |
1578 | char *t; |
1579 | |
1580 | t = strjoina(a, "/tmp")({ const char *_appendees_[] = { a, "/tmp" }; char *_d_, *_p_ ; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p(typeof (_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
1581 | rmdir(t); |
1582 | rmdir(a); |
1583 | |
1584 | free(a); |
1585 | return r; |
1586 | } |
1587 | |
1588 | *tmp_dir = a; |
1589 | *var_tmp_dir = b; |
1590 | |
1591 | return 0; |
1592 | } |
1593 | |
1594 | int setup_netns(int netns_storage_socket[2]) { |
1595 | _cleanup_close___attribute__((cleanup(closep))) int netns = -1; |
1596 | int r, q; |
1597 | |
1598 | assert(netns_storage_socket)do { if ((__builtin_expect(!!(!(netns_storage_socket)),0))) log_assert_failed_realm (LOG_REALM_SYSTEMD, ("netns_storage_socket"), "../src/core/namespace.c" , 1598, __PRETTY_FUNCTION__); } while (0); |
1599 | assert(netns_storage_socket[0] >= 0)do { if ((__builtin_expect(!!(!(netns_storage_socket[0] >= 0)),0))) log_assert_failed_realm(LOG_REALM_SYSTEMD, ("netns_storage_socket[0] >= 0" ), "../src/core/namespace.c", 1599, __PRETTY_FUNCTION__); } while (0); |
1600 | assert(netns_storage_socket[1] >= 0)do { if ((__builtin_expect(!!(!(netns_storage_socket[1] >= 0)),0))) log_assert_failed_realm(LOG_REALM_SYSTEMD, ("netns_storage_socket[1] >= 0" ), "../src/core/namespace.c", 1600, __PRETTY_FUNCTION__); } while (0); |
1601 | |
1602 | /* We use the passed socketpair as a storage buffer for our |
1603 | * namespace reference fd. Whatever process runs this first |
1604 | * shall create a new namespace, all others should just join |
1605 | * it. To serialize that we use a file lock on the socket |
1606 | * pair. |
1607 | * |
1608 | * It's a bit crazy, but hey, works great! */ |
1609 | |
1610 | if (lockf(netns_storage_socket[0], F_LOCK1, 0) < 0) |
1611 | return -errno(*__errno_location ()); |
1612 | |
1613 | netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAITMSG_DONTWAIT); |
1614 | if (netns == -EAGAIN11) { |
1615 | /* Nothing stored yet, so let's create a new namespace */ |
1616 | |
1617 | if (unshare(CLONE_NEWNET0x40000000) < 0) { |
1618 | r = -errno(*__errno_location ()); |
1619 | goto fail; |
1620 | } |
1621 | |
1622 | loopback_setup(); |
1623 | |
1624 | netns = open("/proc/self/ns/net", O_RDONLY00|O_CLOEXEC02000000|O_NOCTTY0400); |
1625 | if (netns < 0) { |
1626 | r = -errno(*__errno_location ()); |
1627 | goto fail; |
1628 | } |
1629 | |
1630 | r = 1; |
1631 | |
1632 | } else if (netns < 0) { |
1633 | r = netns; |
1634 | goto fail; |
1635 | |
1636 | } else { |
1637 | /* Yay, found something, so let's join the namespace */ |
1638 | if (setns(netns, CLONE_NEWNET0x40000000) < 0) { |
1639 | r = -errno(*__errno_location ()); |
1640 | goto fail; |
1641 | } |
1642 | |
1643 | r = 0; |
1644 | } |
1645 | |
1646 | q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT)send_one_fd_iov_sa(netns_storage_socket[1], netns, ((void*)0) , 0, ((void*)0), 0, MSG_DONTWAIT); |
1647 | if (q < 0) { |
1648 | r = q; |
1649 | goto fail; |
1650 | } |
1651 | |
1652 | fail: |
1653 | (void) lockf(netns_storage_socket[0], F_ULOCK0, 0); |
1654 | return r; |
1655 | } |
1656 | |
1657 | bool_Bool ns_type_supported(NamespaceType type) { |
1658 | const char *t, *ns_proc; |
1659 | |
1660 | t = namespace_type_to_string(type); |
1661 | if (!t) /* Don't know how to translate this? Then it's not supported */ |
1662 | return false0; |
1663 | |
1664 | ns_proc = strjoina("/proc/self/ns/", t)({ const char *_appendees_[] = { "/proc/self/ns/", t }; char * _d_, *_p_; size_t _len_ = 0; size_t _i_; for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(_appendees_), typeof(&*(_appendees_))), sizeof(_appendees_ )/sizeof((_appendees_)[0]), ((void)0))) && _appendees_ [_i_]; _i_++) _len_ += strlen(_appendees_[_i_]); _p_ = _d_ = __builtin_alloca (_len_ + 1); for (_i_ = 0; _i_ < __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(_appendees_), typeof(& *(_appendees_))), sizeof(_appendees_)/sizeof((_appendees_)[0] ), ((void)0))) && _appendees_[_i_]; _i_++) _p_ = stpcpy (_p_, _appendees_[_i_]); *_p_ = 0; _d_; }); |
1665 | return access(ns_proc, F_OK0) == 0; |
1666 | } |
1667 | |
1668 | static const char *const protect_home_table[_PROTECT_HOME_MAX] = { |
1669 | [PROTECT_HOME_NO] = "no", |
1670 | [PROTECT_HOME_YES] = "yes", |
1671 | [PROTECT_HOME_READ_ONLY] = "read-only", |
1672 | [PROTECT_HOME_TMPFS] = "tmpfs", |
1673 | }; |
1674 | |
1675 | DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_home, ProtectHome, PROTECT_HOME_YES)const char *protect_home_to_string(ProtectHome i) { if (i < 0 || i >= (ProtectHome) __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(protect_home_table), typeof (&*(protect_home_table))), sizeof(protect_home_table)/sizeof ((protect_home_table)[0]), ((void)0)))) return ((void*)0); return protect_home_table[i]; } ProtectHome protect_home_from_string (const char *s) { int b; if (!s) return -1; b = parse_boolean (s); if (b == 0) return (ProtectHome) 0; else if (b > 0) return PROTECT_HOME_YES; return (ProtectHome) string_table_lookup(protect_home_table , __extension__ (__builtin_choose_expr( !__builtin_types_compatible_p (typeof(protect_home_table), typeof(&*(protect_home_table ))), sizeof(protect_home_table)/sizeof((protect_home_table)[0 ]), ((void)0))), s); }; |
1676 | |
1677 | static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = { |
1678 | [PROTECT_SYSTEM_NO] = "no", |
1679 | [PROTECT_SYSTEM_YES] = "yes", |
1680 | [PROTECT_SYSTEM_FULL] = "full", |
1681 | [PROTECT_SYSTEM_STRICT] = "strict", |
1682 | }; |
1683 | |
1684 | DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_system, ProtectSystem, PROTECT_SYSTEM_YES)const char *protect_system_to_string(ProtectSystem i) { if (i < 0 || i >= (ProtectSystem) __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(protect_system_table), typeof(&*(protect_system_table))), sizeof(protect_system_table )/sizeof((protect_system_table)[0]), ((void)0)))) return ((void *)0); return protect_system_table[i]; } ProtectSystem protect_system_from_string (const char *s) { int b; if (!s) return -1; b = parse_boolean (s); if (b == 0) return (ProtectSystem) 0; else if (b > 0) return PROTECT_SYSTEM_YES; return (ProtectSystem) string_table_lookup (protect_system_table, __extension__ (__builtin_choose_expr( ! __builtin_types_compatible_p(typeof(protect_system_table), typeof (&*(protect_system_table))), sizeof(protect_system_table) /sizeof((protect_system_table)[0]), ((void)0))), s); }; |
1685 | |
1686 | static const char* const namespace_type_table[] = { |
1687 | [NAMESPACE_MOUNT] = "mnt", |
1688 | [NAMESPACE_CGROUP] = "cgroup", |
1689 | [NAMESPACE_UTS] = "uts", |
1690 | [NAMESPACE_IPC] = "ipc", |
1691 | [NAMESPACE_USER] = "user", |
1692 | [NAMESPACE_PID] = "pid", |
1693 | [NAMESPACE_NET] = "net", |
1694 | }; |
1695 | |
1696 | DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType)const char *namespace_type_to_string(NamespaceType i) { if (i < 0 || i >= (NamespaceType) __extension__ (__builtin_choose_expr ( !__builtin_types_compatible_p(typeof(namespace_type_table), typeof(&*(namespace_type_table))), sizeof(namespace_type_table )/sizeof((namespace_type_table)[0]), ((void)0)))) return ((void *)0); return namespace_type_table[i]; } NamespaceType namespace_type_from_string (const char *s) { return (NamespaceType) string_table_lookup( namespace_type_table, __extension__ (__builtin_choose_expr( ! __builtin_types_compatible_p(typeof(namespace_type_table), typeof (&*(namespace_type_table))), sizeof(namespace_type_table) /sizeof((namespace_type_table)[0]), ((void)0))), s); }; |