LCOV - code coverage report
Current view: top level - basic - cgroup-util.c (source / functions) Hit Total Coverage
Test: main_coverage.info Lines: 642 1529 42.0 %
Date: 2019-08-22 15:41:25 Functions: 65 95 68.4 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: LGPL-2.1+ */
       2             : 
       3             : #include <dirent.h>
       4             : #include <errno.h>
       5             : #include <ftw.h>
       6             : #include <limits.h>
       7             : #include <signal.h>
       8             : #include <stddef.h>
       9             : #include <stdlib.h>
      10             : #include <string.h>
      11             : #include <sys/stat.h>
      12             : #include <sys/statfs.h>
      13             : #include <sys/types.h>
      14             : #include <sys/utsname.h>
      15             : #include <sys/xattr.h>
      16             : #include <unistd.h>
      17             : 
      18             : #include "alloc-util.h"
      19             : #include "cgroup-util.h"
      20             : #include "def.h"
      21             : #include "dirent-util.h"
      22             : #include "extract-word.h"
      23             : #include "fd-util.h"
      24             : #include "fileio.h"
      25             : #include "format-util.h"
      26             : #include "fs-util.h"
      27             : #include "log.h"
      28             : #include "login-util.h"
      29             : #include "macro.h"
      30             : #include "missing.h"
      31             : #include "mkdir.h"
      32             : #include "parse-util.h"
      33             : #include "path-util.h"
      34             : #include "proc-cmdline.h"
      35             : #include "process-util.h"
      36             : #include "set.h"
      37             : #include "special.h"
      38             : #include "stat-util.h"
      39             : #include "stdio-util.h"
      40             : #include "string-table.h"
      41             : #include "string-util.h"
      42             : #include "strv.h"
      43             : #include "unit-name.h"
      44             : #include "user-util.h"
      45             : 
      46           6 : static int cg_enumerate_items(const char *controller, const char *path, FILE **_f, const char *item) {
      47           6 :         _cleanup_free_ char *fs = NULL;
      48             :         FILE *f;
      49             :         int r;
      50             : 
      51           6 :         assert(_f);
      52             : 
      53           6 :         r = cg_get_path(controller, path, item, &fs);
      54           6 :         if (r < 0)
      55           0 :                 return r;
      56             : 
      57           6 :         f = fopen(fs, "re");
      58           6 :         if (!f)
      59           6 :                 return -errno;
      60             : 
      61           0 :         *_f = f;
      62           0 :         return 0;
      63             : }
      64             : 
      65           0 : int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
      66           0 :         return cg_enumerate_items(controller, path, _f, "cgroup.procs");
      67             : }
      68             : 
      69           0 : int cg_read_pid(FILE *f, pid_t *_pid) {
      70             :         unsigned long ul;
      71             : 
      72             :         /* Note that the cgroup.procs might contain duplicates! See
      73             :          * cgroups.txt for details. */
      74             : 
      75           0 :         assert(f);
      76           0 :         assert(_pid);
      77             : 
      78           0 :         errno = 0;
      79           0 :         if (fscanf(f, "%lu", &ul) != 1) {
      80             : 
      81           0 :                 if (feof(f))
      82           0 :                         return 0;
      83             : 
      84           0 :                 return errno_or_else(EIO);
      85             :         }
      86             : 
      87           0 :         if (ul <= 0)
      88           0 :                 return -EIO;
      89             : 
      90           0 :         *_pid = (pid_t) ul;
      91           0 :         return 1;
      92             : }
      93             : 
      94           0 : int cg_read_event(
      95             :                 const char *controller,
      96             :                 const char *path,
      97             :                 const char *event,
      98             :                 char **ret) {
      99             : 
     100           0 :         _cleanup_free_ char *events = NULL, *content = NULL;
     101             :         int r;
     102             : 
     103           0 :         r = cg_get_path(controller, path, "cgroup.events", &events);
     104           0 :         if (r < 0)
     105           0 :                 return r;
     106             : 
     107           0 :         r = read_full_file(events, &content, NULL);
     108           0 :         if (r < 0)
     109           0 :                 return r;
     110             : 
     111           0 :         for (const char *p = content;;) {
     112           0 :                 _cleanup_free_ char *line = NULL, *key = NULL, *val = NULL;
     113             :                 const char *q;
     114             : 
     115           0 :                 r = extract_first_word(&p, &line, "\n", 0);
     116           0 :                 if (r < 0)
     117           0 :                         return r;
     118           0 :                 if (r == 0)
     119           0 :                         return -ENOENT;
     120             : 
     121           0 :                 q = line;
     122           0 :                 r = extract_first_word(&q, &key, " ", 0);
     123           0 :                 if (r < 0)
     124           0 :                         return r;
     125           0 :                 if (r == 0)
     126           0 :                         return -EINVAL;
     127             : 
     128           0 :                 if (!streq(key, event))
     129           0 :                         continue;
     130             : 
     131           0 :                 val = strdup(q);
     132           0 :                 if (!val)
     133           0 :                         return -ENOMEM;
     134             : 
     135           0 :                 *ret = TAKE_PTR(val);
     136           0 :                 return 0;
     137             :         }
     138             : }
     139             : 
     140           0 : bool cg_ns_supported(void) {
     141             :         static thread_local int enabled = -1;
     142             : 
     143           0 :         if (enabled >= 0)
     144           0 :                 return enabled;
     145             : 
     146           0 :         if (access("/proc/self/ns/cgroup", F_OK) < 0) {
     147           0 :                 if (errno != ENOENT)
     148           0 :                         log_debug_errno(errno, "Failed to check whether /proc/self/ns/cgroup is available, assuming not: %m");
     149           0 :                 enabled = false;
     150             :         } else
     151           0 :                 enabled = true;
     152             : 
     153           0 :         return enabled;
     154             : }
     155             : 
     156           6 : int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
     157           6 :         _cleanup_free_ char *fs = NULL;
     158             :         int r;
     159             :         DIR *d;
     160             : 
     161           6 :         assert(_d);
     162             : 
     163             :         /* This is not recursive! */
     164             : 
     165           6 :         r = cg_get_path(controller, path, NULL, &fs);
     166           6 :         if (r < 0)
     167           0 :                 return r;
     168             : 
     169           6 :         d = opendir(fs);
     170           6 :         if (!d)
     171           6 :                 return -errno;
     172             : 
     173           0 :         *_d = d;
     174           0 :         return 0;
     175             : }
     176             : 
     177           0 : int cg_read_subgroup(DIR *d, char **fn) {
     178             :         struct dirent *de;
     179             : 
     180           0 :         assert(d);
     181           0 :         assert(fn);
     182             : 
     183           0 :         FOREACH_DIRENT_ALL(de, d, return -errno) {
     184             :                 char *b;
     185             : 
     186           0 :                 if (de->d_type != DT_DIR)
     187           0 :                         continue;
     188             : 
     189           0 :                 if (dot_or_dot_dot(de->d_name))
     190           0 :                         continue;
     191             : 
     192           0 :                 b = strdup(de->d_name);
     193           0 :                 if (!b)
     194           0 :                         return -ENOMEM;
     195             : 
     196           0 :                 *fn = b;
     197           0 :                 return 1;
     198             :         }
     199             : 
     200           0 :         return 0;
     201             : }
     202             : 
     203           0 : int cg_rmdir(const char *controller, const char *path) {
     204           0 :         _cleanup_free_ char *p = NULL;
     205             :         int r;
     206             : 
     207           0 :         r = cg_get_path(controller, path, NULL, &p);
     208           0 :         if (r < 0)
     209           0 :                 return r;
     210             : 
     211           0 :         r = rmdir(p);
     212           0 :         if (r < 0 && errno != ENOENT)
     213           0 :                 return -errno;
     214             : 
     215           0 :         r = cg_hybrid_unified();
     216           0 :         if (r <= 0)
     217           0 :                 return r;
     218             : 
     219           0 :         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
     220           0 :                 r = cg_rmdir(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
     221           0 :                 if (r < 0)
     222           0 :                         log_warning_errno(r, "Failed to remove compat systemd cgroup %s: %m", path);
     223             :         }
     224             : 
     225           0 :         return 0;
     226             : }
     227             : 
     228           6 : static int cg_kill_items(
     229             :                 const char *controller,
     230             :                 const char *path,
     231             :                 int sig,
     232             :                 CGroupFlags flags,
     233             :                 Set *s,
     234             :                 cg_kill_log_func_t log_kill,
     235             :                 void *userdata,
     236             :                 const char *item) {
     237             : 
     238           6 :         _cleanup_set_free_ Set *allocated_set = NULL;
     239           6 :         bool done = false;
     240           6 :         int r, ret = 0, ret_log_kill = 0;
     241             :         pid_t my_pid;
     242             : 
     243           6 :         assert(sig >= 0);
     244             : 
     245             :          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
     246             :           * SIGCONT on SIGKILL. */
     247           6 :         if (IN_SET(sig, SIGCONT, SIGKILL))
     248           0 :                 flags &= ~CGROUP_SIGCONT;
     249             : 
     250             :         /* This goes through the tasks list and kills them all. This
     251             :          * is repeated until no further processes are added to the
     252             :          * tasks list, to properly handle forking processes */
     253             : 
     254           6 :         if (!s) {
     255           0 :                 s = allocated_set = set_new(NULL);
     256           0 :                 if (!s)
     257           0 :                         return -ENOMEM;
     258             :         }
     259             : 
     260           6 :         my_pid = getpid_cached();
     261             : 
     262             :         do {
     263           6 :                 _cleanup_fclose_ FILE *f = NULL;
     264           6 :                 pid_t pid = 0;
     265           6 :                 done = true;
     266             : 
     267           6 :                 r = cg_enumerate_items(controller, path, &f, item);
     268           6 :                 if (r < 0) {
     269           6 :                         if (ret >= 0 && r != -ENOENT)
     270           0 :                                 return r;
     271             : 
     272           6 :                         return ret;
     273             :                 }
     274             : 
     275           0 :                 while ((r = cg_read_pid(f, &pid)) > 0) {
     276             : 
     277           0 :                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
     278           0 :                                 continue;
     279             : 
     280           0 :                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
     281           0 :                                 continue;
     282             : 
     283           0 :                         if (log_kill)
     284           0 :                                 ret_log_kill = log_kill(pid, sig, userdata);
     285             : 
     286             :                         /* If we haven't killed this process yet, kill
     287             :                          * it */
     288           0 :                         if (kill(pid, sig) < 0) {
     289           0 :                                 if (ret >= 0 && errno != ESRCH)
     290           0 :                                         ret = -errno;
     291             :                         } else {
     292           0 :                                 if (flags & CGROUP_SIGCONT)
     293           0 :                                         (void) kill(pid, SIGCONT);
     294             : 
     295           0 :                                 if (ret == 0) {
     296           0 :                                         if (log_kill)
     297           0 :                                                 ret = ret_log_kill;
     298             :                                         else
     299           0 :                                                 ret = 1;
     300             :                                 }
     301             :                         }
     302             : 
     303           0 :                         done = false;
     304             : 
     305           0 :                         r = set_put(s, PID_TO_PTR(pid));
     306           0 :                         if (r < 0) {
     307           0 :                                 if (ret >= 0)
     308           0 :                                         return r;
     309             : 
     310           0 :                                 return ret;
     311             :                         }
     312             :                 }
     313             : 
     314           0 :                 if (r < 0) {
     315           0 :                         if (ret >= 0)
     316           0 :                                 return r;
     317             : 
     318           0 :                         return ret;
     319             :                 }
     320             : 
     321             :                 /* To avoid racing against processes which fork
     322             :                  * quicker than we can kill them we repeat this until
     323             :                  * no new pids need to be killed. */
     324             : 
     325           0 :         } while (!done);
     326             : 
     327           0 :         return ret;
     328             : }
     329             : 
     330           6 : int cg_kill(
     331             :                 const char *controller,
     332             :                 const char *path,
     333             :                 int sig,
     334             :                 CGroupFlags flags,
     335             :                 Set *s,
     336             :                 cg_kill_log_func_t log_kill,
     337             :                 void *userdata) {
     338             :         int r;
     339             : 
     340           6 :         r = cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.procs");
     341           6 :         if (r < 0 || sig != SIGKILL)
     342           6 :                 return r;
     343             : 
     344             :         /* Only in case of killing with SIGKILL and when using cgroupsv2, kill remaining threads manually as
     345             :            a workaround for kernel bug. It was fixed in 5.2-rc5 (c03cd7738a83), backported to 4.19.66
     346             :            (4340d175b898) and 4.14.138 (feb6b123b7dd). */
     347           0 :         r = cg_unified_controller(controller);
     348           0 :         if (r < 0)
     349           0 :                 return r;
     350           0 :         if (r == 0) /* doesn't apply to legacy hierarchy */
     351           0 :                 return 0;
     352             : 
     353           0 :         return cg_kill_items(controller, path, sig, flags, s, log_kill, userdata, "cgroup.threads");
     354             : }
     355             : 
     356           6 : int cg_kill_recursive(
     357             :                 const char *controller,
     358             :                 const char *path,
     359             :                 int sig,
     360             :                 CGroupFlags flags,
     361             :                 Set *s,
     362             :                 cg_kill_log_func_t log_kill,
     363             :                 void *userdata) {
     364             : 
     365           6 :         _cleanup_set_free_ Set *allocated_set = NULL;
     366           6 :         _cleanup_closedir_ DIR *d = NULL;
     367             :         int r, ret;
     368             :         char *fn;
     369             : 
     370           6 :         assert(path);
     371           6 :         assert(sig >= 0);
     372             : 
     373           6 :         if (!s) {
     374           6 :                 s = allocated_set = set_new(NULL);
     375           6 :                 if (!s)
     376           0 :                         return -ENOMEM;
     377             :         }
     378             : 
     379           6 :         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
     380             : 
     381           6 :         r = cg_enumerate_subgroups(controller, path, &d);
     382           6 :         if (r < 0) {
     383           6 :                 if (ret >= 0 && r != -ENOENT)
     384           0 :                         return r;
     385             : 
     386           6 :                 return ret;
     387             :         }
     388             : 
     389           0 :         while ((r = cg_read_subgroup(d, &fn)) > 0) {
     390           0 :                 _cleanup_free_ char *p = NULL;
     391             : 
     392           0 :                 p = path_join(empty_to_root(path), fn);
     393           0 :                 free(fn);
     394           0 :                 if (!p)
     395           0 :                         return -ENOMEM;
     396             : 
     397           0 :                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
     398           0 :                 if (r != 0 && ret >= 0)
     399           0 :                         ret = r;
     400             :         }
     401           0 :         if (ret >= 0 && r < 0)
     402           0 :                 ret = r;
     403             : 
     404           0 :         if (flags & CGROUP_REMOVE) {
     405           0 :                 r = cg_rmdir(controller, path);
     406           0 :                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
     407           0 :                         return r;
     408             :         }
     409             : 
     410           0 :         return ret;
     411             : }
     412             : 
     413           0 : int cg_migrate(
     414             :                 const char *cfrom,
     415             :                 const char *pfrom,
     416             :                 const char *cto,
     417             :                 const char *pto,
     418             :                 CGroupFlags flags) {
     419             : 
     420           0 :         bool done = false;
     421           0 :         _cleanup_set_free_ Set *s = NULL;
     422           0 :         int r, ret = 0;
     423             :         pid_t my_pid;
     424             : 
     425           0 :         assert(cfrom);
     426           0 :         assert(pfrom);
     427           0 :         assert(cto);
     428           0 :         assert(pto);
     429             : 
     430           0 :         s = set_new(NULL);
     431           0 :         if (!s)
     432           0 :                 return -ENOMEM;
     433             : 
     434           0 :         my_pid = getpid_cached();
     435             : 
     436             :         do {
     437           0 :                 _cleanup_fclose_ FILE *f = NULL;
     438           0 :                 pid_t pid = 0;
     439           0 :                 done = true;
     440             : 
     441           0 :                 r = cg_enumerate_processes(cfrom, pfrom, &f);
     442           0 :                 if (r < 0) {
     443           0 :                         if (ret >= 0 && r != -ENOENT)
     444           0 :                                 return r;
     445             : 
     446           0 :                         return ret;
     447             :                 }
     448             : 
     449           0 :                 while ((r = cg_read_pid(f, &pid)) > 0) {
     450             : 
     451             :                         /* This might do weird stuff if we aren't a
     452             :                          * single-threaded program. However, we
     453             :                          * luckily know we are not */
     454           0 :                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
     455           0 :                                 continue;
     456             : 
     457           0 :                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
     458           0 :                                 continue;
     459             : 
     460             :                         /* Ignore kernel threads. Since they can only
     461             :                          * exist in the root cgroup, we only check for
     462             :                          * them there. */
     463           0 :                         if (cfrom &&
     464           0 :                             empty_or_root(pfrom) &&
     465           0 :                             is_kernel_thread(pid) > 0)
     466           0 :                                 continue;
     467             : 
     468           0 :                         r = cg_attach(cto, pto, pid);
     469           0 :                         if (r < 0) {
     470           0 :                                 if (ret >= 0 && r != -ESRCH)
     471           0 :                                         ret = r;
     472           0 :                         } else if (ret == 0)
     473           0 :                                 ret = 1;
     474             : 
     475           0 :                         done = false;
     476             : 
     477           0 :                         r = set_put(s, PID_TO_PTR(pid));
     478           0 :                         if (r < 0) {
     479           0 :                                 if (ret >= 0)
     480           0 :                                         return r;
     481             : 
     482           0 :                                 return ret;
     483             :                         }
     484             :                 }
     485             : 
     486           0 :                 if (r < 0) {
     487           0 :                         if (ret >= 0)
     488           0 :                                 return r;
     489             : 
     490           0 :                         return ret;
     491             :                 }
     492           0 :         } while (!done);
     493             : 
     494           0 :         return ret;
     495             : }
     496             : 
     497           0 : int cg_migrate_recursive(
     498             :                 const char *cfrom,
     499             :                 const char *pfrom,
     500             :                 const char *cto,
     501             :                 const char *pto,
     502             :                 CGroupFlags flags) {
     503             : 
     504           0 :         _cleanup_closedir_ DIR *d = NULL;
     505           0 :         int r, ret = 0;
     506             :         char *fn;
     507             : 
     508           0 :         assert(cfrom);
     509           0 :         assert(pfrom);
     510           0 :         assert(cto);
     511           0 :         assert(pto);
     512             : 
     513           0 :         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
     514             : 
     515           0 :         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
     516           0 :         if (r < 0) {
     517           0 :                 if (ret >= 0 && r != -ENOENT)
     518           0 :                         return r;
     519             : 
     520           0 :                 return ret;
     521             :         }
     522             : 
     523           0 :         while ((r = cg_read_subgroup(d, &fn)) > 0) {
     524           0 :                 _cleanup_free_ char *p = NULL;
     525             : 
     526           0 :                 p = path_join(empty_to_root(pfrom), fn);
     527           0 :                 free(fn);
     528           0 :                 if (!p)
     529           0 :                         return -ENOMEM;
     530             : 
     531           0 :                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
     532           0 :                 if (r != 0 && ret >= 0)
     533           0 :                         ret = r;
     534             :         }
     535             : 
     536           0 :         if (r < 0 && ret >= 0)
     537           0 :                 ret = r;
     538             : 
     539           0 :         if (flags & CGROUP_REMOVE) {
     540           0 :                 r = cg_rmdir(cfrom, pfrom);
     541           0 :                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
     542           0 :                         return r;
     543             :         }
     544             : 
     545           0 :         return ret;
     546             : }
     547             : 
     548           0 : int cg_migrate_recursive_fallback(
     549             :                 const char *cfrom,
     550             :                 const char *pfrom,
     551             :                 const char *cto,
     552             :                 const char *pto,
     553             :                 CGroupFlags flags) {
     554             : 
     555             :         int r;
     556             : 
     557           0 :         assert(cfrom);
     558           0 :         assert(pfrom);
     559           0 :         assert(cto);
     560           0 :         assert(pto);
     561             : 
     562           0 :         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
     563           0 :         if (r < 0) {
     564           0 :                 char prefix[strlen(pto) + 1];
     565             : 
     566             :                 /* This didn't work? Then let's try all prefixes of the destination */
     567             : 
     568           0 :                 PATH_FOREACH_PREFIX(prefix, pto) {
     569             :                         int q;
     570             : 
     571           0 :                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
     572           0 :                         if (q >= 0)
     573           0 :                                 return q;
     574             :                 }
     575             :         }
     576             : 
     577           0 :         return r;
     578             : }
     579             : 
     580         492 : static const char *controller_to_dirname(const char *controller) {
     581             :         const char *e;
     582             : 
     583         492 :         assert(controller);
     584             : 
     585             :         /* Converts a controller name to the directory name below
     586             :          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
     587             :          * just cuts off the name= prefixed used for named
     588             :          * hierarchies, if it is specified. */
     589             : 
     590         492 :         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
     591         107 :                 if (cg_hybrid_unified() > 0)
     592         107 :                         controller = SYSTEMD_CGROUP_CONTROLLER_HYBRID;
     593             :                 else
     594           0 :                         controller = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
     595             :         }
     596             : 
     597         492 :         e = startswith(controller, "name=");
     598         492 :         if (e)
     599         107 :                 return e;
     600             : 
     601         385 :         return controller;
     602             : }
     603             : 
     604         150 : static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
     605             :         const char *dn;
     606         150 :         char *t = NULL;
     607             : 
     608         150 :         assert(fs);
     609         150 :         assert(controller);
     610             : 
     611         150 :         dn = controller_to_dirname(controller);
     612             : 
     613         150 :         if (isempty(path) && isempty(suffix))
     614           0 :                 t = path_join("/sys/fs/cgroup", dn);
     615         150 :         else if (isempty(path))
     616          54 :                 t = path_join("/sys/fs/cgroup", dn, suffix);
     617          96 :         else if (isempty(suffix))
     618          83 :                 t = path_join("/sys/fs/cgroup", dn, path);
     619             :         else
     620          13 :                 t = path_join("/sys/fs/cgroup", dn, path, suffix);
     621         150 :         if (!t)
     622           0 :                 return -ENOMEM;
     623             : 
     624         150 :         *fs = t;
     625         150 :         return 0;
     626             : }
     627             : 
     628           0 : static int join_path_unified(const char *path, const char *suffix, char **fs) {
     629             :         char *t;
     630             : 
     631           0 :         assert(fs);
     632             : 
     633           0 :         if (isempty(path) && isempty(suffix))
     634           0 :                 t = strdup("/sys/fs/cgroup");
     635           0 :         else if (isempty(path))
     636           0 :                 t = path_join("/sys/fs/cgroup", suffix);
     637           0 :         else if (isempty(suffix))
     638           0 :                 t = path_join("/sys/fs/cgroup", path);
     639             :         else
     640           0 :                 t = path_join("/sys/fs/cgroup", path, suffix);
     641           0 :         if (!t)
     642           0 :                 return -ENOMEM;
     643             : 
     644           0 :         *fs = t;
     645           0 :         return 0;
     646             : }
     647             : 
     648         150 : int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
     649             :         int r;
     650             : 
     651         150 :         assert(fs);
     652             : 
     653         150 :         if (!controller) {
     654             :                 char *t;
     655             : 
     656             :                 /* If no controller is specified, we return the path
     657             :                  * *below* the controllers, without any prefix. */
     658             : 
     659           0 :                 if (!path && !suffix)
     660           0 :                         return -EINVAL;
     661             : 
     662           0 :                 if (!suffix)
     663           0 :                         t = strdup(path);
     664           0 :                 else if (!path)
     665           0 :                         t = strdup(suffix);
     666             :                 else
     667           0 :                         t = path_join(path, suffix);
     668           0 :                 if (!t)
     669           0 :                         return -ENOMEM;
     670             : 
     671           0 :                 *fs = path_simplify(t, false);
     672           0 :                 return 0;
     673             :         }
     674             : 
     675         150 :         if (!cg_controller_is_valid(controller))
     676           0 :                 return -EINVAL;
     677             : 
     678         150 :         r = cg_all_unified();
     679         150 :         if (r < 0)
     680           0 :                 return r;
     681         150 :         if (r > 0)
     682           0 :                 r = join_path_unified(path, suffix, fs);
     683             :         else
     684         150 :                 r = join_path_legacy(controller, path, suffix, fs);
     685         150 :         if (r < 0)
     686           0 :                 return r;
     687             : 
     688         150 :         path_simplify(*fs, false);
     689         150 :         return 0;
     690             : }
     691             : 
     692         342 : static int controller_is_accessible(const char *controller) {
     693             :         int r;
     694             : 
     695         342 :         assert(controller);
     696             : 
     697             :         /* Checks whether a specific controller is accessible,
     698             :          * i.e. its hierarchy mounted. In the unified hierarchy all
     699             :          * controllers are considered accessible, except for the named
     700             :          * hierarchies */
     701             : 
     702         342 :         if (!cg_controller_is_valid(controller))
     703           0 :                 return -EINVAL;
     704             : 
     705         342 :         r = cg_all_unified();
     706         342 :         if (r < 0)
     707           0 :                 return r;
     708         342 :         if (r > 0) {
     709             :                 /* We don't support named hierarchies if we are using
     710             :                  * the unified hierarchy. */
     711             : 
     712           0 :                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
     713           0 :                         return 0;
     714             : 
     715           0 :                 if (startswith(controller, "name="))
     716           0 :                         return -EOPNOTSUPP;
     717             : 
     718             :         } else {
     719             :                 const char *cc, *dn;
     720             : 
     721         342 :                 dn = controller_to_dirname(controller);
     722        1710 :                 cc = strjoina("/sys/fs/cgroup/", dn);
     723             : 
     724         342 :                 if (laccess(cc, F_OK) < 0)
     725           0 :                         return -errno;
     726             :         }
     727             : 
     728         342 :         return 0;
     729             : }
     730             : 
     731          54 : int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
     732             :         int r;
     733             : 
     734          54 :         assert(controller);
     735          54 :         assert(fs);
     736             : 
     737             :         /* Check if the specified controller is actually accessible */
     738          54 :         r = controller_is_accessible(controller);
     739          54 :         if (r < 0)
     740           0 :                 return r;
     741             : 
     742          54 :         return cg_get_path(controller, path, suffix, fs);
     743             : }
     744             : 
     745           0 : static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
     746           0 :         assert(path);
     747           0 :         assert(sb);
     748           0 :         assert(ftwbuf);
     749             : 
     750           0 :         if (typeflag != FTW_DP)
     751           0 :                 return 0;
     752             : 
     753           0 :         if (ftwbuf->level < 1)
     754           0 :                 return 0;
     755             : 
     756           0 :         (void) rmdir(path);
     757           0 :         return 0;
     758             : }
     759             : 
     760          18 : int cg_trim(const char *controller, const char *path, bool delete_root) {
     761          18 :         _cleanup_free_ char *fs = NULL;
     762          18 :         int r = 0, q;
     763             : 
     764          18 :         assert(path);
     765             : 
     766          18 :         r = cg_get_path(controller, path, NULL, &fs);
     767          18 :         if (r < 0)
     768           0 :                 return r;
     769             : 
     770          18 :         errno = 0;
     771          18 :         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
     772          18 :                 if (errno == ENOENT)
     773          18 :                         r = 0;
     774             :                 else
     775           0 :                         r = errno_or_else(EIO);
     776             :         }
     777             : 
     778          18 :         if (delete_root) {
     779          18 :                 if (rmdir(fs) < 0 && errno != ENOENT)
     780           0 :                         return -errno;
     781             :         }
     782             : 
     783          18 :         q = cg_hybrid_unified();
     784          18 :         if (q < 0)
     785           0 :                 return q;
     786          18 :         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
     787           0 :                 q = cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
     788           0 :                 if (q < 0)
     789           0 :                         log_warning_errno(q, "Failed to trim compat systemd cgroup %s: %m", path);
     790             :         }
     791             : 
     792          18 :         return r;
     793             : }
     794             : 
     795             : /* Create a cgroup in the hierarchy of controller.
     796             :  * Returns 0 if the group already existed, 1 on success, negative otherwise.
     797             :  */
     798          48 : int cg_create(const char *controller, const char *path) {
     799          48 :         _cleanup_free_ char *fs = NULL;
     800             :         int r;
     801             : 
     802          48 :         r = cg_get_path_and_check(controller, path, NULL, &fs);
     803          48 :         if (r < 0)
     804           0 :                 return r;
     805             : 
     806          48 :         r = mkdir_parents(fs, 0755);
     807          48 :         if (r < 0)
     808           0 :                 return r;
     809             : 
     810          48 :         r = mkdir_errno_wrapper(fs, 0755);
     811          48 :         if (r == -EEXIST)
     812          18 :                 return 0;
     813          30 :         if (r < 0)
     814          30 :                 return r;
     815             : 
     816           0 :         r = cg_hybrid_unified();
     817           0 :         if (r < 0)
     818           0 :                 return r;
     819             : 
     820           0 :         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
     821           0 :                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
     822           0 :                 if (r < 0)
     823           0 :                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
     824             :         }
     825             : 
     826           0 :         return 1;
     827             : }
     828             : 
     829          11 : int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
     830             :         int r, q;
     831             : 
     832          11 :         assert(pid >= 0);
     833             : 
     834          11 :         r = cg_create(controller, path);
     835          11 :         if (r < 0)
     836          11 :                 return r;
     837             : 
     838           0 :         q = cg_attach(controller, path, pid);
     839           0 :         if (q < 0)
     840           0 :                 return q;
     841             : 
     842             :         /* This does not remove the cgroup on failure */
     843           0 :         return r;
     844             : }
     845             : 
     846           6 : int cg_attach(const char *controller, const char *path, pid_t pid) {
     847           6 :         _cleanup_free_ char *fs = NULL;
     848             :         char c[DECIMAL_STR_MAX(pid_t) + 2];
     849             :         int r;
     850             : 
     851           6 :         assert(path);
     852           6 :         assert(pid >= 0);
     853             : 
     854           6 :         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
     855           6 :         if (r < 0)
     856           0 :                 return r;
     857             : 
     858           6 :         if (pid == 0)
     859           0 :                 pid = getpid_cached();
     860             : 
     861           6 :         xsprintf(c, PID_FMT "\n", pid);
     862             : 
     863           6 :         r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
     864           6 :         if (r < 0)
     865           6 :                 return r;
     866             : 
     867           0 :         r = cg_hybrid_unified();
     868           0 :         if (r < 0)
     869           0 :                 return r;
     870             : 
     871           0 :         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
     872           0 :                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
     873           0 :                 if (r < 0)
     874           0 :                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
     875             :         }
     876             : 
     877           0 :         return 0;
     878             : }
     879             : 
     880           0 : int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
     881             :         int r;
     882             : 
     883           0 :         assert(controller);
     884           0 :         assert(path);
     885           0 :         assert(pid >= 0);
     886             : 
     887           0 :         r = cg_attach(controller, path, pid);
     888           0 :         if (r < 0) {
     889           0 :                 char prefix[strlen(path) + 1];
     890             : 
     891             :                 /* This didn't work? Then let's try all prefixes of
     892             :                  * the destination */
     893             : 
     894           0 :                 PATH_FOREACH_PREFIX(prefix, path) {
     895             :                         int q;
     896             : 
     897           0 :                         q = cg_attach(controller, prefix, pid);
     898           0 :                         if (q >= 0)
     899           0 :                                 return q;
     900             :                 }
     901             :         }
     902             : 
     903           0 :         return r;
     904             : }
     905             : 
     906           0 : int cg_set_access(
     907             :                 const char *controller,
     908             :                 const char *path,
     909             :                 uid_t uid,
     910             :                 gid_t gid) {
     911             : 
     912             :         struct Attribute {
     913             :                 const char *name;
     914             :                 bool fatal;
     915             :         };
     916             : 
     917             :         /* cgroup v1, aka legacy/non-unified */
     918             :         static const struct Attribute legacy_attributes[] = {
     919             :                 { "cgroup.procs",           true  },
     920             :                 { "tasks",                  false },
     921             :                 { "cgroup.clone_children",  false },
     922             :                 {},
     923             :         };
     924             : 
     925             :         /* cgroup v2, aka unified */
     926             :         static const struct Attribute unified_attributes[] = {
     927             :                 { "cgroup.procs",           true  },
     928             :                 { "cgroup.subtree_control", true  },
     929             :                 { "cgroup.threads",         false },
     930             :                 {},
     931             :         };
     932             : 
     933             :         static const struct Attribute* const attributes[] = {
     934             :                 [false] = legacy_attributes,
     935             :                 [true]  = unified_attributes,
     936             :         };
     937             : 
     938           0 :         _cleanup_free_ char *fs = NULL;
     939             :         const struct Attribute *i;
     940             :         int r, unified;
     941             : 
     942           0 :         assert(path);
     943             : 
     944           0 :         if (uid == UID_INVALID && gid == GID_INVALID)
     945           0 :                 return 0;
     946             : 
     947           0 :         unified = cg_unified_controller(controller);
     948           0 :         if (unified < 0)
     949           0 :                 return unified;
     950             : 
     951             :         /* Configure access to the cgroup itself */
     952           0 :         r = cg_get_path(controller, path, NULL, &fs);
     953           0 :         if (r < 0)
     954           0 :                 return r;
     955             : 
     956           0 :         r = chmod_and_chown(fs, 0755, uid, gid);
     957           0 :         if (r < 0)
     958           0 :                 return r;
     959             : 
     960             :         /* Configure access to the cgroup's attributes */
     961           0 :         for (i = attributes[unified]; i->name; i++) {
     962           0 :                 fs = mfree(fs);
     963             : 
     964           0 :                 r = cg_get_path(controller, path, i->name, &fs);
     965           0 :                 if (r < 0)
     966           0 :                         return r;
     967             : 
     968           0 :                 r = chmod_and_chown(fs, 0644, uid, gid);
     969           0 :                 if (r < 0) {
     970           0 :                         if (i->fatal)
     971           0 :                                 return r;
     972             : 
     973           0 :                         log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
     974             :                 }
     975             :         }
     976             : 
     977           0 :         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
     978           0 :                 r = cg_hybrid_unified();
     979           0 :                 if (r < 0)
     980           0 :                         return r;
     981           0 :                 if (r > 0) {
     982             :                         /* Always propagate access mode from unified to legacy controller */
     983           0 :                         r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
     984           0 :                         if (r < 0)
     985           0 :                                 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
     986             :                 }
     987             :         }
     988             : 
     989           0 :         return 0;
     990             : }
     991             : 
     992           0 : int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags) {
     993           0 :         _cleanup_free_ char *fs = NULL;
     994             :         int r;
     995             : 
     996           0 :         assert(path);
     997           0 :         assert(name);
     998           0 :         assert(value || size <= 0);
     999             : 
    1000           0 :         r = cg_get_path(controller, path, NULL, &fs);
    1001           0 :         if (r < 0)
    1002           0 :                 return r;
    1003             : 
    1004           0 :         if (setxattr(fs, name, value, size, flags) < 0)
    1005           0 :                 return -errno;
    1006             : 
    1007           0 :         return 0;
    1008             : }
    1009             : 
    1010           0 : int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size) {
    1011           0 :         _cleanup_free_ char *fs = NULL;
    1012             :         ssize_t n;
    1013             :         int r;
    1014             : 
    1015           0 :         assert(path);
    1016           0 :         assert(name);
    1017             : 
    1018           0 :         r = cg_get_path(controller, path, NULL, &fs);
    1019           0 :         if (r < 0)
    1020           0 :                 return r;
    1021             : 
    1022           0 :         n = getxattr(fs, name, value, size);
    1023           0 :         if (n < 0)
    1024           0 :                 return -errno;
    1025             : 
    1026           0 :         return (int) n;
    1027             : }
    1028             : 
    1029        2680 : int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
    1030        2680 :         _cleanup_fclose_ FILE *f = NULL;
    1031             :         const char *fs, *controller_str;
    1032             :         int unified, r;
    1033        2680 :         size_t cs = 0;
    1034             : 
    1035        2680 :         assert(path);
    1036        2680 :         assert(pid >= 0);
    1037             : 
    1038        2680 :         if (controller) {
    1039        2665 :                 if (!cg_controller_is_valid(controller))
    1040           0 :                         return -EINVAL;
    1041             :         } else
    1042          15 :                 controller = SYSTEMD_CGROUP_CONTROLLER;
    1043             : 
    1044        2680 :         unified = cg_unified_controller(controller);
    1045        2680 :         if (unified < 0)
    1046           0 :                 return unified;
    1047        2680 :         if (unified == 0) {
    1048           0 :                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
    1049           0 :                         controller_str = SYSTEMD_CGROUP_CONTROLLER_LEGACY;
    1050             :                 else
    1051           0 :                         controller_str = controller;
    1052             : 
    1053           0 :                 cs = strlen(controller_str);
    1054             :         }
    1055             : 
    1056        2680 :         fs = procfs_file_alloca(pid, "cgroup");
    1057        2680 :         r = fopen_unlocked(fs, "re", &f);
    1058        2680 :         if (r == -ENOENT)
    1059           0 :                 return -ESRCH;
    1060        2680 :         if (r < 0)
    1061           0 :                 return r;
    1062             : 
    1063       29480 :         for (;;) {
    1064       32160 :                 _cleanup_free_ char *line = NULL;
    1065             :                 char *e, *p;
    1066             : 
    1067       32160 :                 r = read_line(f, LONG_LINE_MAX, &line);
    1068       32160 :                 if (r < 0)
    1069           0 :                         return r;
    1070       32160 :                 if (r == 0)
    1071           0 :                         break;
    1072             : 
    1073       32160 :                 if (unified) {
    1074       32160 :                         e = startswith(line, "0:");
    1075       32160 :                         if (!e)
    1076       29480 :                                 continue;
    1077             : 
    1078        2680 :                         e = strchr(e, ':');
    1079        2680 :                         if (!e)
    1080           0 :                                 continue;
    1081             :                 } else {
    1082             :                         char *l;
    1083             :                         size_t k;
    1084             :                         const char *word, *state;
    1085           0 :                         bool found = false;
    1086             : 
    1087           0 :                         l = strchr(line, ':');
    1088           0 :                         if (!l)
    1089           0 :                                 continue;
    1090             : 
    1091           0 :                         l++;
    1092           0 :                         e = strchr(l, ':');
    1093           0 :                         if (!e)
    1094           0 :                                 continue;
    1095             : 
    1096           0 :                         *e = 0;
    1097           0 :                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
    1098           0 :                                 if (k == cs && memcmp(word, controller_str, cs) == 0) {
    1099           0 :                                         found = true;
    1100           0 :                                         break;
    1101             :                                 }
    1102           0 :                         if (!found)
    1103           0 :                                 continue;
    1104             :                 }
    1105             : 
    1106        2680 :                 p = strdup(e + 1);
    1107        2680 :                 if (!p)
    1108           0 :                         return -ENOMEM;
    1109             : 
    1110             :                 /* Truncate suffix indicating the process is a zombie */
    1111        2680 :                 e = endswith(p, " (deleted)");
    1112        2680 :                 if (e)
    1113           0 :                         *e = 0;
    1114             : 
    1115        2680 :                 *path = p;
    1116        2680 :                 return 0;
    1117             :         }
    1118             : 
    1119           0 :         return -ENODATA;
    1120             : }
    1121             : 
    1122           0 : int cg_install_release_agent(const char *controller, const char *agent) {
    1123           0 :         _cleanup_free_ char *fs = NULL, *contents = NULL;
    1124             :         const char *sc;
    1125             :         int r;
    1126             : 
    1127           0 :         assert(agent);
    1128             : 
    1129           0 :         r = cg_unified_controller(controller);
    1130           0 :         if (r < 0)
    1131           0 :                 return r;
    1132           0 :         if (r > 0) /* doesn't apply to unified hierarchy */
    1133           0 :                 return -EOPNOTSUPP;
    1134             : 
    1135           0 :         r = cg_get_path(controller, NULL, "release_agent", &fs);
    1136           0 :         if (r < 0)
    1137           0 :                 return r;
    1138             : 
    1139           0 :         r = read_one_line_file(fs, &contents);
    1140           0 :         if (r < 0)
    1141           0 :                 return r;
    1142             : 
    1143           0 :         sc = strstrip(contents);
    1144           0 :         if (isempty(sc)) {
    1145           0 :                 r = write_string_file(fs, agent, WRITE_STRING_FILE_DISABLE_BUFFER);
    1146           0 :                 if (r < 0)
    1147           0 :                         return r;
    1148           0 :         } else if (!path_equal(sc, agent))
    1149           0 :                 return -EEXIST;
    1150             : 
    1151           0 :         fs = mfree(fs);
    1152           0 :         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
    1153           0 :         if (r < 0)
    1154           0 :                 return r;
    1155             : 
    1156           0 :         contents = mfree(contents);
    1157           0 :         r = read_one_line_file(fs, &contents);
    1158           0 :         if (r < 0)
    1159           0 :                 return r;
    1160             : 
    1161           0 :         sc = strstrip(contents);
    1162           0 :         if (streq(sc, "0")) {
    1163           0 :                 r = write_string_file(fs, "1", WRITE_STRING_FILE_DISABLE_BUFFER);
    1164           0 :                 if (r < 0)
    1165           0 :                         return r;
    1166             : 
    1167           0 :                 return 1;
    1168             :         }
    1169             : 
    1170           0 :         if (!streq(sc, "1"))
    1171           0 :                 return -EIO;
    1172             : 
    1173           0 :         return 0;
    1174             : }
    1175             : 
    1176           0 : int cg_uninstall_release_agent(const char *controller) {
    1177           0 :         _cleanup_free_ char *fs = NULL;
    1178             :         int r;
    1179             : 
    1180           0 :         r = cg_unified_controller(controller);
    1181           0 :         if (r < 0)
    1182           0 :                 return r;
    1183           0 :         if (r > 0) /* Doesn't apply to unified hierarchy */
    1184           0 :                 return -EOPNOTSUPP;
    1185             : 
    1186           0 :         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
    1187           0 :         if (r < 0)
    1188           0 :                 return r;
    1189             : 
    1190           0 :         r = write_string_file(fs, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
    1191           0 :         if (r < 0)
    1192           0 :                 return r;
    1193             : 
    1194           0 :         fs = mfree(fs);
    1195             : 
    1196           0 :         r = cg_get_path(controller, NULL, "release_agent", &fs);
    1197           0 :         if (r < 0)
    1198           0 :                 return r;
    1199             : 
    1200           0 :         r = write_string_file(fs, "", WRITE_STRING_FILE_DISABLE_BUFFER);
    1201           0 :         if (r < 0)
    1202           0 :                 return r;
    1203             : 
    1204           0 :         return 0;
    1205             : }
    1206             : 
    1207           0 : int cg_is_empty(const char *controller, const char *path) {
    1208           0 :         _cleanup_fclose_ FILE *f = NULL;
    1209             :         pid_t pid;
    1210             :         int r;
    1211             : 
    1212           0 :         assert(path);
    1213             : 
    1214           0 :         r = cg_enumerate_processes(controller, path, &f);
    1215           0 :         if (r == -ENOENT)
    1216           0 :                 return true;
    1217           0 :         if (r < 0)
    1218           0 :                 return r;
    1219             : 
    1220           0 :         r = cg_read_pid(f, &pid);
    1221           0 :         if (r < 0)
    1222           0 :                 return r;
    1223             : 
    1224           0 :         return r == 0;
    1225             : }
    1226             : 
    1227           0 : int cg_is_empty_recursive(const char *controller, const char *path) {
    1228             :         int r;
    1229             : 
    1230           0 :         assert(path);
    1231             : 
    1232             :         /* The root cgroup is always populated */
    1233           0 :         if (controller && empty_or_root(path))
    1234           0 :                 return false;
    1235             : 
    1236           0 :         r = cg_unified_controller(controller);
    1237           0 :         if (r < 0)
    1238           0 :                 return r;
    1239           0 :         if (r > 0) {
    1240           0 :                 _cleanup_free_ char *t = NULL;
    1241             : 
    1242             :                 /* On the unified hierarchy we can check empty state
    1243             :                  * via the "populated" attribute of "cgroup.events". */
    1244             : 
    1245           0 :                 r = cg_read_event(controller, path, "populated", &t);
    1246           0 :                 if (r == -ENOENT)
    1247           0 :                         return true;
    1248           0 :                 if (r < 0)
    1249           0 :                         return r;
    1250             : 
    1251           0 :                 return streq(t, "0");
    1252             :         } else {
    1253           0 :                 _cleanup_closedir_ DIR *d = NULL;
    1254             :                 char *fn;
    1255             : 
    1256           0 :                 r = cg_is_empty(controller, path);
    1257           0 :                 if (r <= 0)
    1258           0 :                         return r;
    1259             : 
    1260           0 :                 r = cg_enumerate_subgroups(controller, path, &d);
    1261           0 :                 if (r == -ENOENT)
    1262           0 :                         return true;
    1263           0 :                 if (r < 0)
    1264           0 :                         return r;
    1265             : 
    1266           0 :                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
    1267           0 :                         _cleanup_free_ char *p = NULL;
    1268             : 
    1269           0 :                         p = path_join(path, fn);
    1270           0 :                         free(fn);
    1271           0 :                         if (!p)
    1272           0 :                                 return -ENOMEM;
    1273             : 
    1274           0 :                         r = cg_is_empty_recursive(controller, p);
    1275           0 :                         if (r <= 0)
    1276           0 :                                 return r;
    1277             :                 }
    1278           0 :                 if (r < 0)
    1279           0 :                         return r;
    1280             : 
    1281           0 :                 return true;
    1282             :         }
    1283             : }
    1284             : 
    1285           0 : int cg_split_spec(const char *spec, char **controller, char **path) {
    1286           0 :         char *t = NULL, *u = NULL;
    1287             :         const char *e;
    1288             : 
    1289           0 :         assert(spec);
    1290             : 
    1291           0 :         if (*spec == '/') {
    1292           0 :                 if (!path_is_normalized(spec))
    1293           0 :                         return -EINVAL;
    1294             : 
    1295           0 :                 if (path) {
    1296           0 :                         t = strdup(spec);
    1297           0 :                         if (!t)
    1298           0 :                                 return -ENOMEM;
    1299             : 
    1300           0 :                         *path = path_simplify(t, false);
    1301             :                 }
    1302             : 
    1303           0 :                 if (controller)
    1304           0 :                         *controller = NULL;
    1305             : 
    1306           0 :                 return 0;
    1307             :         }
    1308             : 
    1309           0 :         e = strchr(spec, ':');
    1310           0 :         if (!e) {
    1311           0 :                 if (!cg_controller_is_valid(spec))
    1312           0 :                         return -EINVAL;
    1313             : 
    1314           0 :                 if (controller) {
    1315           0 :                         t = strdup(spec);
    1316           0 :                         if (!t)
    1317           0 :                                 return -ENOMEM;
    1318             : 
    1319           0 :                         *controller = t;
    1320             :                 }
    1321             : 
    1322           0 :                 if (path)
    1323           0 :                         *path = NULL;
    1324             : 
    1325           0 :                 return 0;
    1326             :         }
    1327             : 
    1328           0 :         t = strndup(spec, e-spec);
    1329           0 :         if (!t)
    1330           0 :                 return -ENOMEM;
    1331           0 :         if (!cg_controller_is_valid(t)) {
    1332           0 :                 free(t);
    1333           0 :                 return -EINVAL;
    1334             :         }
    1335             : 
    1336           0 :         if (isempty(e+1))
    1337           0 :                 u = NULL;
    1338             :         else {
    1339           0 :                 u = strdup(e+1);
    1340           0 :                 if (!u) {
    1341           0 :                         free(t);
    1342           0 :                         return -ENOMEM;
    1343             :                 }
    1344             : 
    1345           0 :                 if (!path_is_normalized(u) ||
    1346           0 :                     !path_is_absolute(u)) {
    1347           0 :                         free(t);
    1348           0 :                         free(u);
    1349           0 :                         return -EINVAL;
    1350             :                 }
    1351             : 
    1352           0 :                 path_simplify(u, false);
    1353             :         }
    1354             : 
    1355           0 :         if (controller)
    1356           0 :                 *controller = t;
    1357             :         else
    1358           0 :                 free(t);
    1359             : 
    1360           0 :         if (path)
    1361           0 :                 *path = u;
    1362             :         else
    1363           0 :                 free(u);
    1364             : 
    1365           0 :         return 0;
    1366             : }
    1367             : 
    1368           0 : int cg_mangle_path(const char *path, char **result) {
    1369           0 :         _cleanup_free_ char *c = NULL, *p = NULL;
    1370             :         char *t;
    1371             :         int r;
    1372             : 
    1373           0 :         assert(path);
    1374           0 :         assert(result);
    1375             : 
    1376             :         /* First, check if it already is a filesystem path */
    1377           0 :         if (path_startswith(path, "/sys/fs/cgroup")) {
    1378             : 
    1379           0 :                 t = strdup(path);
    1380           0 :                 if (!t)
    1381           0 :                         return -ENOMEM;
    1382             : 
    1383           0 :                 *result = path_simplify(t, false);
    1384           0 :                 return 0;
    1385             :         }
    1386             : 
    1387             :         /* Otherwise, treat it as cg spec */
    1388           0 :         r = cg_split_spec(path, &c, &p);
    1389           0 :         if (r < 0)
    1390           0 :                 return r;
    1391             : 
    1392           0 :         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
    1393             : }
    1394             : 
    1395        1269 : int cg_get_root_path(char **path) {
    1396             :         char *p, *e;
    1397             :         int r;
    1398             : 
    1399        1269 :         assert(path);
    1400             : 
    1401        1269 :         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
    1402        1269 :         if (r < 0)
    1403           0 :                 return r;
    1404             : 
    1405        1269 :         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
    1406        1269 :         if (!e)
    1407           0 :                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
    1408        1269 :         if (!e)
    1409           0 :                 e = endswith(p, "/system"); /* even more legacy */
    1410        1269 :         if (e)
    1411        1269 :                 *e = 0;
    1412             : 
    1413        1269 :         *path = p;
    1414        1269 :         return 0;
    1415             : }
    1416             : 
    1417        1228 : int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
    1418        1228 :         _cleanup_free_ char *rt = NULL;
    1419             :         char *p;
    1420             :         int r;
    1421             : 
    1422        1228 :         assert(cgroup);
    1423        1228 :         assert(shifted);
    1424             : 
    1425        1228 :         if (!root) {
    1426             :                 /* If the root was specified let's use that, otherwise
    1427             :                  * let's determine it from PID 1 */
    1428             : 
    1429        1212 :                 r = cg_get_root_path(&rt);
    1430        1212 :                 if (r < 0)
    1431           0 :                         return r;
    1432             : 
    1433        1212 :                 root = rt;
    1434             :         }
    1435             : 
    1436        1228 :         p = path_startswith(cgroup, root);
    1437        1228 :         if (p && p > cgroup)
    1438           2 :                 *shifted = p - 1;
    1439             :         else
    1440        1226 :                 *shifted = cgroup;
    1441             : 
    1442        1228 :         return 0;
    1443             : }
    1444             : 
    1445        1212 : int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
    1446        1212 :         _cleanup_free_ char *raw = NULL;
    1447             :         const char *c;
    1448             :         int r;
    1449             : 
    1450        1212 :         assert(pid >= 0);
    1451        1212 :         assert(cgroup);
    1452             : 
    1453        1212 :         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
    1454        1212 :         if (r < 0)
    1455           0 :                 return r;
    1456             : 
    1457        1212 :         r = cg_shift_path(raw, root, &c);
    1458        1212 :         if (r < 0)
    1459           0 :                 return r;
    1460             : 
    1461        1212 :         if (c == raw)
    1462        1212 :                 *cgroup = TAKE_PTR(raw);
    1463             :         else {
    1464             :                 char *n;
    1465             : 
    1466           0 :                 n = strdup(c);
    1467           0 :                 if (!n)
    1468           0 :                         return -ENOMEM;
    1469             : 
    1470           0 :                 *cgroup = n;
    1471             :         }
    1472             : 
    1473        1212 :         return 0;
    1474             : }
    1475             : 
    1476        1031 : int cg_path_decode_unit(const char *cgroup, char **unit) {
    1477             :         char *c, *s;
    1478             :         size_t n;
    1479             : 
    1480        1031 :         assert(cgroup);
    1481        1031 :         assert(unit);
    1482             : 
    1483        1031 :         n = strcspn(cgroup, "/");
    1484        1031 :         if (n < 3)
    1485          80 :                 return -ENXIO;
    1486             : 
    1487         951 :         c = strndupa(cgroup, n);
    1488         951 :         c = cg_unescape(c);
    1489             : 
    1490         951 :         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
    1491           8 :                 return -ENXIO;
    1492             : 
    1493         943 :         s = strdup(c);
    1494         943 :         if (!s)
    1495           0 :                 return -ENOMEM;
    1496             : 
    1497         943 :         *unit = s;
    1498         943 :         return 0;
    1499             : }
    1500             : 
    1501        3063 : static bool valid_slice_name(const char *p, size_t n) {
    1502             : 
    1503        3063 :         if (!p)
    1504           0 :                 return false;
    1505             : 
    1506        3063 :         if (n < STRLEN("x.slice"))
    1507          99 :                 return false;
    1508             : 
    1509        2964 :         if (memcmp(p + n - 6, ".slice", 6) == 0) {
    1510        1825 :                 char buf[n+1], *c;
    1511             : 
    1512        1825 :                 memcpy(buf, p, n);
    1513        1825 :                 buf[n] = 0;
    1514             : 
    1515        1825 :                 c = cg_unescape(buf);
    1516             : 
    1517        1825 :                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
    1518             :         }
    1519             : 
    1520        1139 :         return false;
    1521             : }
    1522             : 
    1523         871 : static const char *skip_slices(const char *p) {
    1524         871 :         assert(p);
    1525             : 
    1526             :         /* Skips over all slice assignments */
    1527             : 
    1528        1228 :         for (;;) {
    1529             :                 size_t n;
    1530             : 
    1531        2099 :                 p += strspn(p, "/");
    1532             : 
    1533        2099 :                 n = strcspn(p, "/");
    1534        2099 :                 if (!valid_slice_name(p, n))
    1535         871 :                         return p;
    1536             : 
    1537        1228 :                 p += n;
    1538             :         }
    1539             : }
    1540             : 
    1541         668 : int cg_path_get_unit(const char *path, char **ret) {
    1542             :         const char *e;
    1543             :         char *unit;
    1544             :         int r;
    1545             : 
    1546         668 :         assert(path);
    1547         668 :         assert(ret);
    1548             : 
    1549         668 :         e = skip_slices(path);
    1550             : 
    1551         668 :         r = cg_path_decode_unit(e, &unit);
    1552         668 :         if (r < 0)
    1553          84 :                 return r;
    1554             : 
    1555             :         /* We skipped over the slices, don't accept any now */
    1556         584 :         if (endswith(unit, ".slice")) {
    1557           0 :                 free(unit);
    1558           0 :                 return -ENXIO;
    1559             :         }
    1560             : 
    1561         584 :         *ret = unit;
    1562         584 :         return 0;
    1563             : }
    1564             : 
    1565         173 : int cg_pid_get_unit(pid_t pid, char **unit) {
    1566         173 :         _cleanup_free_ char *cgroup = NULL;
    1567             :         int r;
    1568             : 
    1569         173 :         assert(unit);
    1570             : 
    1571         173 :         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
    1572         173 :         if (r < 0)
    1573           0 :                 return r;
    1574             : 
    1575         173 :         return cg_path_get_unit(cgroup, unit);
    1576             : }
    1577             : 
    1578             : /**
    1579             :  * Skip session-*.scope, but require it to be there.
    1580             :  */
    1581         157 : static const char *skip_session(const char *p) {
    1582             :         size_t n;
    1583             : 
    1584         157 :         if (isempty(p))
    1585           4 :                 return NULL;
    1586             : 
    1587         153 :         p += strspn(p, "/");
    1588             : 
    1589         153 :         n = strcspn(p, "/");
    1590         153 :         if (n < STRLEN("session-x.scope"))
    1591          35 :                 return NULL;
    1592             : 
    1593         118 :         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
    1594          89 :                 char buf[n - 8 - 6 + 1];
    1595             : 
    1596          89 :                 memcpy(buf, p + 8, n - 8 - 6);
    1597          89 :                 buf[n - 8 - 6] = 0;
    1598             : 
    1599             :                 /* Note that session scopes never need unescaping,
    1600             :                  * since they cannot conflict with the kernel's own
    1601             :                  * names, hence we don't need to call cg_unescape()
    1602             :                  * here. */
    1603             : 
    1604          89 :                 if (!session_id_valid(buf))
    1605           0 :                         return false;
    1606             : 
    1607          89 :                 p += n;
    1608          89 :                 p += strspn(p, "/");
    1609          89 :                 return p;
    1610             :         }
    1611             : 
    1612          29 :         return NULL;
    1613             : }
    1614             : 
    1615             : /**
    1616             :  * Skip user@*.service, but require it to be there.
    1617             :  */
    1618         203 : static const char *skip_user_manager(const char *p) {
    1619             :         size_t n;
    1620             : 
    1621         203 :         if (isempty(p))
    1622           4 :                 return NULL;
    1623             : 
    1624         199 :         p += strspn(p, "/");
    1625             : 
    1626         199 :         n = strcspn(p, "/");
    1627         199 :         if (n < STRLEN("user@x.service"))
    1628          29 :                 return NULL;
    1629             : 
    1630         170 :         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
    1631          46 :                 char buf[n - 5 - 8 + 1];
    1632             : 
    1633          46 :                 memcpy(buf, p + 5, n - 5 - 8);
    1634          46 :                 buf[n - 5 - 8] = 0;
    1635             : 
    1636             :                 /* Note that user manager services never need unescaping,
    1637             :                  * since they cannot conflict with the kernel's own
    1638             :                  * names, hence we don't need to call cg_unescape()
    1639             :                  * here. */
    1640             : 
    1641          46 :                 if (parse_uid(buf, NULL) < 0)
    1642           0 :                         return NULL;
    1643             : 
    1644          46 :                 p += n;
    1645          46 :                 p += strspn(p, "/");
    1646             : 
    1647          46 :                 return p;
    1648             :         }
    1649             : 
    1650         124 :         return NULL;
    1651             : }
    1652             : 
    1653         203 : static const char *skip_user_prefix(const char *path) {
    1654             :         const char *e, *t;
    1655             : 
    1656         203 :         assert(path);
    1657             : 
    1658             :         /* Skip slices, if there are any */
    1659         203 :         e = skip_slices(path);
    1660             : 
    1661             :         /* Skip the user manager, if it's in the path now... */
    1662         203 :         t = skip_user_manager(e);
    1663         203 :         if (t)
    1664          46 :                 return t;
    1665             : 
    1666             :         /* Alternatively skip the user session if it is in the path... */
    1667         157 :         return skip_session(e);
    1668             : }
    1669             : 
    1670         188 : int cg_path_get_user_unit(const char *path, char **ret) {
    1671             :         const char *t;
    1672             : 
    1673         188 :         assert(path);
    1674         188 :         assert(ret);
    1675             : 
    1676         188 :         t = skip_user_prefix(path);
    1677         188 :         if (!t)
    1678          60 :                 return -ENXIO;
    1679             : 
    1680             :         /* And from here on it looks pretty much the same as for a system unit, hence let's use the same
    1681             :          * parser. */
    1682         128 :         return cg_path_get_unit(t, ret);
    1683             : }
    1684             : 
    1685         173 : int cg_pid_get_user_unit(pid_t pid, char **unit) {
    1686         173 :         _cleanup_free_ char *cgroup = NULL;
    1687             :         int r;
    1688             : 
    1689         173 :         assert(unit);
    1690             : 
    1691         173 :         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
    1692         173 :         if (r < 0)
    1693           0 :                 return r;
    1694             : 
    1695         173 :         return cg_path_get_user_unit(cgroup, unit);
    1696             : }
    1697             : 
    1698         172 : int cg_path_get_machine_name(const char *path, char **machine) {
    1699         172 :         _cleanup_free_ char *u = NULL;
    1700             :         const char *sl;
    1701             :         int r;
    1702             : 
    1703         172 :         r = cg_path_get_unit(path, &u);
    1704         172 :         if (r < 0)
    1705           0 :                 return r;
    1706             : 
    1707         860 :         sl = strjoina("/run/systemd/machines/unit:", u);
    1708         172 :         return readlink_malloc(sl, machine);
    1709             : }
    1710             : 
    1711         172 : int cg_pid_get_machine_name(pid_t pid, char **machine) {
    1712         172 :         _cleanup_free_ char *cgroup = NULL;
    1713             :         int r;
    1714             : 
    1715         172 :         assert(machine);
    1716             : 
    1717         172 :         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
    1718         172 :         if (r < 0)
    1719           0 :                 return r;
    1720             : 
    1721         172 :         return cg_path_get_machine_name(cgroup, machine);
    1722             : }
    1723             : 
    1724         182 : int cg_path_get_session(const char *path, char **session) {
    1725         182 :         _cleanup_free_ char *unit = NULL;
    1726             :         char *start, *end;
    1727             :         int r;
    1728             : 
    1729         182 :         assert(path);
    1730             : 
    1731         182 :         r = cg_path_get_unit(path, &unit);
    1732         182 :         if (r < 0)
    1733           1 :                 return r;
    1734             : 
    1735         181 :         start = startswith(unit, "session-");
    1736         181 :         if (!start)
    1737          96 :                 return -ENXIO;
    1738          85 :         end = endswith(start, ".scope");
    1739          85 :         if (!end)
    1740           0 :                 return -ENXIO;
    1741             : 
    1742          85 :         *end = 0;
    1743          85 :         if (!session_id_valid(start))
    1744           1 :                 return -ENXIO;
    1745             : 
    1746          84 :         if (session) {
    1747             :                 char *rr;
    1748             : 
    1749          84 :                 rr = strdup(start);
    1750          84 :                 if (!rr)
    1751           0 :                         return -ENOMEM;
    1752             : 
    1753          84 :                 *session = rr;
    1754             :         }
    1755             : 
    1756          84 :         return 0;
    1757             : }
    1758             : 
    1759         176 : int cg_pid_get_session(pid_t pid, char **session) {
    1760         176 :         _cleanup_free_ char *cgroup = NULL;
    1761             :         int r;
    1762             : 
    1763         176 :         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
    1764         176 :         if (r < 0)
    1765           0 :                 return r;
    1766             : 
    1767         176 :         return cg_path_get_session(cgroup, session);
    1768             : }
    1769             : 
    1770         178 : int cg_path_get_owner_uid(const char *path, uid_t *uid) {
    1771         178 :         _cleanup_free_ char *slice = NULL;
    1772             :         char *start, *end;
    1773             :         int r;
    1774             : 
    1775         178 :         assert(path);
    1776             : 
    1777         178 :         r = cg_path_get_slice(path, &slice);
    1778         178 :         if (r < 0)
    1779           0 :                 return r;
    1780             : 
    1781         178 :         start = startswith(slice, "user-");
    1782         178 :         if (!start)
    1783          59 :                 return -ENXIO;
    1784         119 :         end = endswith(start, ".slice");
    1785         119 :         if (!end)
    1786           0 :                 return -ENXIO;
    1787             : 
    1788         119 :         *end = 0;
    1789         119 :         if (parse_uid(start, uid) < 0)
    1790           0 :                 return -ENXIO;
    1791             : 
    1792         119 :         return 0;
    1793             : }
    1794             : 
    1795         173 : int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
    1796         173 :         _cleanup_free_ char *cgroup = NULL;
    1797             :         int r;
    1798             : 
    1799         173 :         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
    1800         173 :         if (r < 0)
    1801           0 :                 return r;
    1802             : 
    1803         173 :         return cg_path_get_owner_uid(cgroup, uid);
    1804             : }
    1805             : 
    1806         367 : int cg_path_get_slice(const char *p, char **slice) {
    1807         367 :         const char *e = NULL;
    1808             : 
    1809         367 :         assert(p);
    1810         367 :         assert(slice);
    1811             : 
    1812             :         /* Finds the right-most slice unit from the beginning, but
    1813             :          * stops before we come to the first non-slice unit. */
    1814             : 
    1815         597 :         for (;;) {
    1816             :                 size_t n;
    1817             : 
    1818         964 :                 p += strspn(p, "/");
    1819             : 
    1820         964 :                 n = strcspn(p, "/");
    1821         964 :                 if (!valid_slice_name(p, n)) {
    1822             : 
    1823         367 :                         if (!e) {
    1824             :                                 char *s;
    1825             : 
    1826          13 :                                 s = strdup(SPECIAL_ROOT_SLICE);
    1827          13 :                                 if (!s)
    1828           0 :                                         return -ENOMEM;
    1829             : 
    1830          13 :                                 *slice = s;
    1831          13 :                                 return 0;
    1832             :                         }
    1833             : 
    1834         354 :                         return cg_path_decode_unit(e, slice);
    1835             :                 }
    1836             : 
    1837         597 :                 e = p;
    1838         597 :                 p += n;
    1839             :         }
    1840             : }
    1841             : 
    1842         173 : int cg_pid_get_slice(pid_t pid, char **slice) {
    1843         173 :         _cleanup_free_ char *cgroup = NULL;
    1844             :         int r;
    1845             : 
    1846         173 :         assert(slice);
    1847             : 
    1848         173 :         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
    1849         173 :         if (r < 0)
    1850           0 :                 return r;
    1851             : 
    1852         173 :         return cg_path_get_slice(cgroup, slice);
    1853             : }
    1854             : 
    1855          15 : int cg_path_get_user_slice(const char *p, char **slice) {
    1856             :         const char *t;
    1857          15 :         assert(p);
    1858          15 :         assert(slice);
    1859             : 
    1860          15 :         t = skip_user_prefix(p);
    1861          15 :         if (!t)
    1862           8 :                 return -ENXIO;
    1863             : 
    1864             :         /* And now it looks pretty much the same as for a system
    1865             :          * slice, so let's just use the same parser from here on. */
    1866           7 :         return cg_path_get_slice(t, slice);
    1867             : }
    1868             : 
    1869           0 : int cg_pid_get_user_slice(pid_t pid, char **slice) {
    1870           0 :         _cleanup_free_ char *cgroup = NULL;
    1871             :         int r;
    1872             : 
    1873           0 :         assert(slice);
    1874             : 
    1875           0 :         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
    1876           0 :         if (r < 0)
    1877           0 :                 return r;
    1878             : 
    1879           0 :         return cg_path_get_user_slice(cgroup, slice);
    1880             : }
    1881             : 
    1882          26 : char *cg_escape(const char *p) {
    1883          26 :         bool need_prefix = false;
    1884             : 
    1885             :         /* This implements very minimal escaping for names to be used
    1886             :          * as file names in the cgroup tree: any name which might
    1887             :          * conflict with a kernel name or is prefixed with '_' is
    1888             :          * prefixed with a '_'. That way, when reading cgroup names it
    1889             :          * is sufficient to remove a single prefixing underscore if
    1890             :          * there is one. */
    1891             : 
    1892             :         /* The return value of this function (unlike cg_unescape())
    1893             :          * needs free()! */
    1894             : 
    1895          26 :         if (IN_SET(p[0], 0, '_', '.') ||
    1896          41 :             STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
    1897          20 :             startswith(p, "cgroup."))
    1898           7 :                 need_prefix = true;
    1899             :         else {
    1900             :                 const char *dot;
    1901             : 
    1902          19 :                 dot = strrchr(p, '.');
    1903          19 :                 if (dot) {
    1904             :                         CGroupController c;
    1905          18 :                         size_t l = dot - p;
    1906             : 
    1907         171 :                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    1908             :                                 const char *n;
    1909             : 
    1910         154 :                                 n = cgroup_controller_to_string(c);
    1911             : 
    1912         154 :                                 if (l != strlen(n))
    1913         139 :                                         continue;
    1914             : 
    1915          15 :                                 if (memcmp(p, n, l) != 0)
    1916          14 :                                         continue;
    1917             : 
    1918           1 :                                 need_prefix = true;
    1919           1 :                                 break;
    1920             :                         }
    1921             :                 }
    1922             :         }
    1923             : 
    1924          26 :         if (need_prefix)
    1925           8 :                 return strjoin("_", p);
    1926             : 
    1927          18 :         return strdup(p);
    1928             : }
    1929             : 
    1930        2786 : char *cg_unescape(const char *p) {
    1931        2786 :         assert(p);
    1932             : 
    1933             :         /* The return value of this function (unlike cg_escape())
    1934             :          * doesn't need free()! */
    1935             : 
    1936        2786 :         if (p[0] == '_')
    1937          11 :                 return (char*) p+1;
    1938             : 
    1939        2775 :         return (char*) p;
    1940             : }
    1941             : 
    1942             : #define CONTROLLER_VALID                        \
    1943             :         DIGITS LETTERS                          \
    1944             :         "_"
    1945             : 
    1946        3167 : bool cg_controller_is_valid(const char *p) {
    1947             :         const char *t, *s;
    1948             : 
    1949        3167 :         if (!p)
    1950           0 :                 return false;
    1951             : 
    1952        3167 :         if (streq(p, SYSTEMD_CGROUP_CONTROLLER))
    1953        2772 :                 return true;
    1954             : 
    1955         395 :         s = startswith(p, "name=");
    1956         395 :         if (s)
    1957           2 :                 p = s;
    1958             : 
    1959         395 :         if (IN_SET(*p, 0, '_'))
    1960           4 :                 return false;
    1961             : 
    1962        2454 :         for (t = p; *t; t++)
    1963        2066 :                 if (!strchr(CONTROLLER_VALID, *t))
    1964           3 :                         return false;
    1965             : 
    1966         388 :         if (t - p > FILENAME_MAX)
    1967           0 :                 return false;
    1968             : 
    1969         388 :         return true;
    1970             : }
    1971             : 
    1972          21 : int cg_slice_to_path(const char *unit, char **ret) {
    1973          21 :         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
    1974             :         const char *dash;
    1975             :         int r;
    1976             : 
    1977          21 :         assert(unit);
    1978          21 :         assert(ret);
    1979             : 
    1980          21 :         if (streq(unit, SPECIAL_ROOT_SLICE)) {
    1981             :                 char *x;
    1982             : 
    1983           1 :                 x = strdup("");
    1984           1 :                 if (!x)
    1985           0 :                         return -ENOMEM;
    1986           1 :                 *ret = x;
    1987           1 :                 return 0;
    1988             :         }
    1989             : 
    1990          20 :         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
    1991          10 :                 return -EINVAL;
    1992             : 
    1993          10 :         if (!endswith(unit, ".slice"))
    1994           1 :                 return -EINVAL;
    1995             : 
    1996           9 :         r = unit_name_to_prefix(unit, &p);
    1997           9 :         if (r < 0)
    1998           0 :                 return r;
    1999             : 
    2000           9 :         dash = strchr(p, '-');
    2001             : 
    2002             :         /* Don't allow initial dashes */
    2003           9 :         if (dash == p)
    2004           3 :                 return -EINVAL;
    2005             : 
    2006          12 :         while (dash) {
    2007           8 :                 _cleanup_free_ char *escaped = NULL;
    2008           8 :                 char n[dash - p + sizeof(".slice")];
    2009             : 
    2010             : #if HAS_FEATURE_MEMORY_SANITIZER
    2011             :                 /* msan doesn't instrument stpncpy, so it thinks
    2012             :                  * n is later used uninitialized:
    2013             :                  * https://github.com/google/sanitizers/issues/926
    2014             :                  */
    2015             :                 zero(n);
    2016             : #endif
    2017             : 
    2018             :                 /* Don't allow trailing or double dashes */
    2019           8 :                 if (IN_SET(dash[1], 0, '-'))
    2020           2 :                         return -EINVAL;
    2021             : 
    2022           6 :                 strcpy(stpncpy(n, p, dash - p), ".slice");
    2023           6 :                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
    2024           0 :                         return -EINVAL;
    2025             : 
    2026           6 :                 escaped = cg_escape(n);
    2027           6 :                 if (!escaped)
    2028           0 :                         return -ENOMEM;
    2029             : 
    2030           6 :                 if (!strextend(&s, escaped, "/", NULL))
    2031           0 :                         return -ENOMEM;
    2032             : 
    2033           6 :                 dash = strchr(dash+1, '-');
    2034             :         }
    2035             : 
    2036           4 :         e = cg_escape(unit);
    2037           4 :         if (!e)
    2038           0 :                 return -ENOMEM;
    2039             : 
    2040           4 :         if (!strextend(&s, e, NULL))
    2041           0 :                 return -ENOMEM;
    2042             : 
    2043           4 :         *ret = TAKE_PTR(s);
    2044             : 
    2045           4 :         return 0;
    2046             : }
    2047             : 
    2048           0 : int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
    2049           0 :         _cleanup_free_ char *p = NULL;
    2050             :         int r;
    2051             : 
    2052           0 :         r = cg_get_path(controller, path, attribute, &p);
    2053           0 :         if (r < 0)
    2054           0 :                 return r;
    2055             : 
    2056           0 :         return write_string_file(p, value, WRITE_STRING_FILE_DISABLE_BUFFER);
    2057             : }
    2058             : 
    2059          54 : int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
    2060          54 :         _cleanup_free_ char *p = NULL;
    2061             :         int r;
    2062             : 
    2063          54 :         r = cg_get_path(controller, path, attribute, &p);
    2064          54 :         if (r < 0)
    2065           0 :                 return r;
    2066             : 
    2067          54 :         return read_one_line_file(p, ret);
    2068             : }
    2069             : 
    2070           1 : int cg_get_keyed_attribute(
    2071             :                 const char *controller,
    2072             :                 const char *path,
    2073             :                 const char *attribute,
    2074             :                 char **keys,
    2075             :                 char **ret_values) {
    2076             : 
    2077           1 :         _cleanup_free_ char *filename = NULL, *contents = NULL;
    2078             :         const char *p;
    2079           1 :         size_t n, i, n_done = 0;
    2080             :         char **v;
    2081             :         int r;
    2082             : 
    2083             :         /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
    2084             :          * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
    2085             :          * entries as 'keys'. On success each entry will be set to the value of the matching key.
    2086             :          *
    2087             :          * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
    2088             : 
    2089           1 :         r = cg_get_path(controller, path, attribute, &filename);
    2090           1 :         if (r < 0)
    2091           0 :                 return r;
    2092             : 
    2093           1 :         r = read_full_file(filename, &contents, NULL);
    2094           1 :         if (r < 0)
    2095           1 :                 return r;
    2096             : 
    2097           0 :         n = strv_length(keys);
    2098           0 :         if (n == 0) /* No keys to retrieve? That's easy, we are done then */
    2099           0 :                 return 0;
    2100             : 
    2101             :         /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
    2102           0 :         v = newa0(char*, n);
    2103             : 
    2104           0 :         for (p = contents; *p;) {
    2105           0 :                 const char *w = NULL;
    2106             : 
    2107           0 :                 for (i = 0; i < n; i++)
    2108           0 :                         if (!v[i]) {
    2109           0 :                                 w = first_word(p, keys[i]);
    2110           0 :                                 if (w)
    2111           0 :                                         break;
    2112             :                         }
    2113             : 
    2114           0 :                 if (w) {
    2115             :                         size_t l;
    2116             : 
    2117           0 :                         l = strcspn(w, NEWLINE);
    2118           0 :                         v[i] = strndup(w, l);
    2119           0 :                         if (!v[i]) {
    2120           0 :                                 r = -ENOMEM;
    2121           0 :                                 goto fail;
    2122             :                         }
    2123             : 
    2124           0 :                         n_done++;
    2125           0 :                         if (n_done >= n)
    2126           0 :                                 goto done;
    2127             : 
    2128           0 :                         p = w + l;
    2129             :                 } else
    2130           0 :                         p += strcspn(p, NEWLINE);
    2131             : 
    2132           0 :                 p += strspn(p, NEWLINE);
    2133             :         }
    2134             : 
    2135           0 :         r = -ENXIO;
    2136             : 
    2137           0 : fail:
    2138           0 :         for (i = 0; i < n; i++)
    2139           0 :                 free(v[i]);
    2140             : 
    2141           0 :         return r;
    2142             : 
    2143           0 : done:
    2144           0 :         memcpy(ret_values, v, sizeof(char*) * n);
    2145           0 :         return 0;
    2146             : 
    2147             : }
    2148             : 
    2149          25 : int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
    2150             :         CGroupController c;
    2151             :         CGroupMask done;
    2152             :         bool created;
    2153             :         int r;
    2154             : 
    2155             :         /* This one will create a cgroup in our private tree, but also
    2156             :          * duplicate it in the trees specified in mask, and remove it
    2157             :          * in all others.
    2158             :          *
    2159             :          * Returns 0 if the group already existed in the systemd hierarchy,
    2160             :          * 1 on success, negative otherwise.
    2161             :          */
    2162             : 
    2163             :         /* First create the cgroup in our own hierarchy. */
    2164          25 :         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
    2165          25 :         if (r < 0)
    2166          19 :                 return r;
    2167           6 :         created = r;
    2168             : 
    2169             :         /* If we are in the unified hierarchy, we are done now */
    2170           6 :         r = cg_all_unified();
    2171           6 :         if (r < 0)
    2172           0 :                 return r;
    2173           6 :         if (r > 0)
    2174           0 :                 return created;
    2175             : 
    2176           6 :         supported &= CGROUP_MASK_V1;
    2177           6 :         mask = CGROUP_MASK_EXTEND_JOINED(mask);
    2178           6 :         done = 0;
    2179             : 
    2180             :         /* Otherwise, do the same in the other hierarchies */
    2181          60 :         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    2182          54 :                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
    2183             :                 const char *n;
    2184             : 
    2185          54 :                 if (!FLAGS_SET(supported, bit))
    2186          18 :                         continue;
    2187             : 
    2188          36 :                 if (FLAGS_SET(done, bit))
    2189           6 :                         continue;
    2190             : 
    2191          30 :                 n = cgroup_controller_to_string(c);
    2192          30 :                 if (FLAGS_SET(mask, bit))
    2193          12 :                         (void) cg_create(n, path);
    2194             :                 else
    2195          18 :                         (void) cg_trim(n, path, true);
    2196             : 
    2197          30 :                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
    2198             :         }
    2199             : 
    2200           6 :         return created;
    2201             : }
    2202             : 
    2203           0 : int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
    2204             :         CGroupController c;
    2205             :         CGroupMask done;
    2206             :         int r;
    2207             : 
    2208           0 :         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
    2209           0 :         if (r < 0)
    2210           0 :                 return r;
    2211             : 
    2212           0 :         r = cg_all_unified();
    2213           0 :         if (r < 0)
    2214           0 :                 return r;
    2215           0 :         if (r > 0)
    2216           0 :                 return 0;
    2217             : 
    2218           0 :         supported &= CGROUP_MASK_V1;
    2219           0 :         done = 0;
    2220             : 
    2221           0 :         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    2222           0 :                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
    2223           0 :                 const char *p = NULL;
    2224             : 
    2225           0 :                 if (!FLAGS_SET(supported, bit))
    2226           0 :                         continue;
    2227             : 
    2228           0 :                 if (FLAGS_SET(done, bit))
    2229           0 :                         continue;
    2230             : 
    2231           0 :                 if (path_callback)
    2232           0 :                         p = path_callback(bit, userdata);
    2233           0 :                 if (!p)
    2234           0 :                         p = path;
    2235             : 
    2236           0 :                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
    2237           0 :                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
    2238             :         }
    2239             : 
    2240           0 :         return 0;
    2241             : }
    2242             : 
    2243           0 : int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
    2244             :         Iterator i;
    2245             :         void *pidp;
    2246           0 :         int r = 0;
    2247             : 
    2248           0 :         SET_FOREACH(pidp, pids, i) {
    2249           0 :                 pid_t pid = PTR_TO_PID(pidp);
    2250             :                 int q;
    2251             : 
    2252           0 :                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
    2253           0 :                 if (q < 0 && r >= 0)
    2254           0 :                         r = q;
    2255             :         }
    2256             : 
    2257           0 :         return r;
    2258             : }
    2259             : 
    2260           0 : int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
    2261             :         CGroupController c;
    2262             :         CGroupMask done;
    2263           0 :         int r = 0, q;
    2264             : 
    2265           0 :         if (!path_equal(from, to))  {
    2266           0 :                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
    2267           0 :                 if (r < 0)
    2268           0 :                         return r;
    2269             :         }
    2270             : 
    2271           0 :         q = cg_all_unified();
    2272           0 :         if (q < 0)
    2273           0 :                 return q;
    2274           0 :         if (q > 0)
    2275           0 :                 return r;
    2276             : 
    2277           0 :         supported &= CGROUP_MASK_V1;
    2278           0 :         done = 0;
    2279             : 
    2280           0 :         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    2281           0 :                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
    2282           0 :                 const char *p = NULL;
    2283             : 
    2284           0 :                 if (!FLAGS_SET(supported, bit))
    2285           0 :                         continue;
    2286             : 
    2287           0 :                 if (FLAGS_SET(done, bit))
    2288           0 :                         continue;
    2289             : 
    2290           0 :                 if (to_callback)
    2291           0 :                         p = to_callback(bit, userdata);
    2292           0 :                 if (!p)
    2293           0 :                         p = to;
    2294             : 
    2295           0 :                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
    2296           0 :                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
    2297             :         }
    2298             : 
    2299           0 :         return r;
    2300             : }
    2301             : 
    2302           0 : int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
    2303             :         CGroupController c;
    2304             :         CGroupMask done;
    2305             :         int r, q;
    2306             : 
    2307           0 :         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
    2308           0 :         if (r < 0)
    2309           0 :                 return r;
    2310             : 
    2311           0 :         q = cg_all_unified();
    2312           0 :         if (q < 0)
    2313           0 :                 return q;
    2314           0 :         if (q > 0)
    2315           0 :                 return r;
    2316             : 
    2317           0 :         supported &= CGROUP_MASK_V1;
    2318           0 :         done = 0;
    2319             : 
    2320           0 :         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    2321           0 :                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
    2322             : 
    2323           0 :                 if (!FLAGS_SET(supported, bit))
    2324           0 :                         continue;
    2325             : 
    2326           0 :                 if (FLAGS_SET(done, bit))
    2327           0 :                         continue;
    2328             : 
    2329           0 :                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
    2330           0 :                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
    2331             :         }
    2332             : 
    2333           0 :         return r;
    2334             : }
    2335             : 
    2336         449 : int cg_mask_to_string(CGroupMask mask, char **ret) {
    2337         449 :         _cleanup_free_ char *s = NULL;
    2338         449 :         size_t n = 0, allocated = 0;
    2339         449 :         bool space = false;
    2340             :         CGroupController c;
    2341             : 
    2342         449 :         assert(ret);
    2343             : 
    2344         449 :         if (mask == 0) {
    2345         203 :                 *ret = NULL;
    2346         203 :                 return 0;
    2347             :         }
    2348             : 
    2349        2460 :         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    2350             :                 const char *k;
    2351             :                 size_t l;
    2352             : 
    2353        2214 :                 if (!FLAGS_SET(mask, CGROUP_CONTROLLER_TO_MASK(c)))
    2354        1674 :                         continue;
    2355             : 
    2356         540 :                 k = cgroup_controller_to_string(c);
    2357         540 :                 l = strlen(k);
    2358             : 
    2359         540 :                 if (!GREEDY_REALLOC(s, allocated, n + space + l + 1))
    2360           0 :                         return -ENOMEM;
    2361             : 
    2362         540 :                 if (space)
    2363         294 :                         s[n] = ' ';
    2364         540 :                 memcpy(s + n + space, k, l);
    2365         540 :                 n += space + l;
    2366             : 
    2367         540 :                 space = true;
    2368             :         }
    2369             : 
    2370         246 :         assert(s);
    2371             : 
    2372         246 :         s[n] = 0;
    2373         246 :         *ret = TAKE_PTR(s);
    2374             : 
    2375         246 :         return 0;
    2376             : }
    2377             : 
    2378          23 : int cg_mask_from_string(const char *value, CGroupMask *ret) {
    2379          23 :         CGroupMask m = 0;
    2380             : 
    2381          23 :         assert(ret);
    2382          23 :         assert(value);
    2383             : 
    2384          33 :         for (;;) {
    2385          56 :                 _cleanup_free_ char *n = NULL;
    2386             :                 CGroupController v;
    2387             :                 int r;
    2388             : 
    2389          56 :                 r = extract_first_word(&value, &n, NULL, 0);
    2390          56 :                 if (r < 0)
    2391           0 :                         return r;
    2392          56 :                 if (r == 0)
    2393          23 :                         break;
    2394             : 
    2395          33 :                 v = cgroup_controller_from_string(n);
    2396          33 :                 if (v < 0)
    2397           2 :                         continue;
    2398             : 
    2399          31 :                 m |= CGROUP_CONTROLLER_TO_MASK(v);
    2400             :         }
    2401             : 
    2402          23 :         *ret = m;
    2403          23 :         return 0;
    2404             : }
    2405             : 
    2406          48 : int cg_mask_supported(CGroupMask *ret) {
    2407             :         CGroupMask mask;
    2408             :         int r;
    2409             : 
    2410             :         /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
    2411             :          * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
    2412             :          * pseudo-controllers. */
    2413             : 
    2414          48 :         r = cg_all_unified();
    2415          48 :         if (r < 0)
    2416           0 :                 return r;
    2417          48 :         if (r > 0) {
    2418           0 :                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
    2419             : 
    2420             :                 /* In the unified hierarchy we can read the supported
    2421             :                  * and accessible controllers from a the top-level
    2422             :                  * cgroup attribute */
    2423             : 
    2424           0 :                 r = cg_get_root_path(&root);
    2425           0 :                 if (r < 0)
    2426           0 :                         return r;
    2427             : 
    2428           0 :                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
    2429           0 :                 if (r < 0)
    2430           0 :                         return r;
    2431             : 
    2432           0 :                 r = read_one_line_file(path, &controllers);
    2433           0 :                 if (r < 0)
    2434           0 :                         return r;
    2435             : 
    2436           0 :                 r = cg_mask_from_string(controllers, &mask);
    2437           0 :                 if (r < 0)
    2438           0 :                         return r;
    2439             : 
    2440             :                 /* Currently, we support the cpu, memory, io and pids controller in the unified hierarchy, mask
    2441             :                  * everything else off. */
    2442           0 :                 mask &= CGROUP_MASK_V2;
    2443             : 
    2444             :         } else {
    2445             :                 CGroupController c;
    2446             : 
    2447             :                 /* In the legacy hierarchy, we check which hierarchies are mounted. */
    2448             : 
    2449          48 :                 mask = 0;
    2450         480 :                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    2451         432 :                         CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
    2452             :                         const char *n;
    2453             : 
    2454         432 :                         if (!FLAGS_SET(CGROUP_MASK_V1, bit))
    2455         144 :                                 continue;
    2456             : 
    2457         288 :                         n = cgroup_controller_to_string(c);
    2458         288 :                         if (controller_is_accessible(n) >= 0)
    2459         288 :                                 mask |= bit;
    2460             :                 }
    2461             :         }
    2462             : 
    2463          48 :         *ret = mask;
    2464          48 :         return 0;
    2465             : }
    2466             : 
    2467           0 : int cg_kernel_controllers(Set **ret) {
    2468           0 :         _cleanup_set_free_free_ Set *controllers = NULL;
    2469           0 :         _cleanup_fclose_ FILE *f = NULL;
    2470             :         int r;
    2471             : 
    2472           0 :         assert(ret);
    2473             : 
    2474             :         /* Determines the full list of kernel-known controllers. Might include controllers we don't actually support
    2475             :          * and controllers that aren't currently accessible (because not mounted). This does not include "name="
    2476             :          * pseudo-controllers. */
    2477             : 
    2478           0 :         controllers = set_new(&string_hash_ops);
    2479           0 :         if (!controllers)
    2480           0 :                 return -ENOMEM;
    2481             : 
    2482           0 :         r = fopen_unlocked("/proc/cgroups", "re", &f);
    2483           0 :         if (r == -ENOENT) {
    2484           0 :                 *ret = NULL;
    2485           0 :                 return 0;
    2486             :         }
    2487           0 :         if (r < 0)
    2488           0 :                 return r;
    2489             : 
    2490             :         /* Ignore the header line */
    2491           0 :         (void) read_line(f, (size_t) -1, NULL);
    2492             : 
    2493           0 :         for (;;) {
    2494             :                 char *controller;
    2495           0 :                 int enabled = 0;
    2496             : 
    2497           0 :                 errno = 0;
    2498           0 :                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
    2499             : 
    2500           0 :                         if (feof(f))
    2501           0 :                                 break;
    2502             : 
    2503           0 :                         if (ferror(f))
    2504           0 :                                 return errno_or_else(EIO);
    2505             : 
    2506           0 :                         return -EBADMSG;
    2507             :                 }
    2508             : 
    2509           0 :                 if (!enabled) {
    2510           0 :                         free(controller);
    2511           0 :                         continue;
    2512             :                 }
    2513             : 
    2514           0 :                 if (!cg_controller_is_valid(controller)) {
    2515           0 :                         free(controller);
    2516           0 :                         return -EBADMSG;
    2517             :                 }
    2518             : 
    2519           0 :                 r = set_consume(controllers, controller);
    2520           0 :                 if (r < 0)
    2521           0 :                         return r;
    2522             :         }
    2523             : 
    2524           0 :         *ret = TAKE_PTR(controllers);
    2525             : 
    2526           0 :         return 0;
    2527             : }
    2528             : 
    2529             : static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
    2530             : 
    2531             : /* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd.  This
    2532             :  * unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
    2533             :  * /sys/fs/cgroup/systemd.  From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
    2534             :  * "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
    2535             :  *
    2536             :  * To keep live upgrade working, we detect and support v232 layout.  When v232 layout is detected, to keep cgroup v2
    2537             :  * process management but disable the compat dual layout, we return %true on
    2538             :  * cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) and %false on cg_hybrid_unified().
    2539             :  */
    2540             : static thread_local bool unified_systemd_v232;
    2541             : 
    2542        3467 : static int cg_unified_update(void) {
    2543             : 
    2544             :         struct statfs fs;
    2545             : 
    2546             :         /* Checks if we support the unified hierarchy. Returns an
    2547             :          * error when the cgroup hierarchies aren't mounted yet or we
    2548             :          * have any other trouble determining if the unified hierarchy
    2549             :          * is supported. */
    2550             : 
    2551        3467 :         if (unified_cache >= CGROUP_UNIFIED_NONE)
    2552        3440 :                 return 0;
    2553             : 
    2554          27 :         if (statfs("/sys/fs/cgroup/", &fs) < 0)
    2555           0 :                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/\") failed: %m");
    2556             : 
    2557          27 :         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
    2558           0 :                 log_debug("Found cgroup2 on /sys/fs/cgroup/, full unified hierarchy");
    2559           0 :                 unified_cache = CGROUP_UNIFIED_ALL;
    2560          27 :         } else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) {
    2561          27 :                 if (statfs("/sys/fs/cgroup/unified/", &fs) == 0 &&
    2562          27 :                     F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
    2563          27 :                         log_debug("Found cgroup2 on /sys/fs/cgroup/unified, unified hierarchy for systemd controller");
    2564          27 :                         unified_cache = CGROUP_UNIFIED_SYSTEMD;
    2565          27 :                         unified_systemd_v232 = false;
    2566             :                 } else {
    2567           0 :                         if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0)
    2568           0 :                                 return log_debug_errno(errno, "statfs(\"/sys/fs/cgroup/systemd\" failed: %m");
    2569             : 
    2570           0 :                         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) {
    2571           0 :                                 log_debug("Found cgroup2 on /sys/fs/cgroup/systemd, unified hierarchy for systemd controller (v232 variant)");
    2572           0 :                                 unified_cache = CGROUP_UNIFIED_SYSTEMD;
    2573           0 :                                 unified_systemd_v232 = true;
    2574           0 :                         } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) {
    2575           0 :                                 log_debug("Found cgroup on /sys/fs/cgroup/systemd, legacy hierarchy");
    2576           0 :                                 unified_cache = CGROUP_UNIFIED_NONE;
    2577             :                         } else {
    2578           0 :                                 log_debug("Unexpected filesystem type %llx mounted on /sys/fs/cgroup/systemd, assuming legacy hierarchy",
    2579             :                                           (unsigned long long) fs.f_type);
    2580           0 :                                 unified_cache = CGROUP_UNIFIED_NONE;
    2581             :                         }
    2582             :                 }
    2583             :         } else
    2584           0 :                 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
    2585             :                                        "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
    2586             :                                        (unsigned long long)fs.f_type);
    2587             : 
    2588          27 :         return 0;
    2589             : }
    2590             : 
    2591        2714 : int cg_unified_controller(const char *controller) {
    2592             :         int r;
    2593             : 
    2594        2714 :         r = cg_unified_update();
    2595        2714 :         if (r < 0)
    2596           0 :                 return r;
    2597             : 
    2598        2714 :         if (unified_cache == CGROUP_UNIFIED_NONE)
    2599           0 :                 return false;
    2600             : 
    2601        2714 :         if (unified_cache >= CGROUP_UNIFIED_ALL)
    2602           0 :                 return true;
    2603             : 
    2604        2714 :         return streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER);
    2605             : }
    2606             : 
    2607         610 : int cg_all_unified(void) {
    2608             :         int r;
    2609             : 
    2610         610 :         r = cg_unified_update();
    2611         610 :         if (r < 0)
    2612           0 :                 return r;
    2613             : 
    2614         610 :         return unified_cache >= CGROUP_UNIFIED_ALL;
    2615             : }
    2616             : 
    2617         126 : int cg_hybrid_unified(void) {
    2618             :         int r;
    2619             : 
    2620         126 :         r = cg_unified_update();
    2621         126 :         if (r < 0)
    2622           0 :                 return r;
    2623             : 
    2624         126 :         return unified_cache == CGROUP_UNIFIED_SYSTEMD && !unified_systemd_v232;
    2625             : }
    2626             : 
    2627          17 : int cg_unified_flush(void) {
    2628          17 :         unified_cache = CGROUP_UNIFIED_UNKNOWN;
    2629             : 
    2630          17 :         return cg_unified_update();
    2631             : }
    2632             : 
    2633           6 : int cg_enable_everywhere(
    2634             :                 CGroupMask supported,
    2635             :                 CGroupMask mask,
    2636             :                 const char *p,
    2637             :                 CGroupMask *ret_result_mask) {
    2638             : 
    2639           6 :         _cleanup_fclose_ FILE *f = NULL;
    2640           6 :         _cleanup_free_ char *fs = NULL;
    2641             :         CGroupController c;
    2642           6 :         CGroupMask ret = 0;
    2643             :         int r;
    2644             : 
    2645           6 :         assert(p);
    2646             : 
    2647           6 :         if (supported == 0) {
    2648           0 :                 if (ret_result_mask)
    2649           0 :                         *ret_result_mask = 0;
    2650           0 :                 return 0;
    2651             :         }
    2652             : 
    2653           6 :         r = cg_all_unified();
    2654           6 :         if (r < 0)
    2655           0 :                 return r;
    2656           6 :         if (r == 0) {
    2657             :                 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
    2658             :                  * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
    2659             :                  * caller tends to use the returned mask later on to compare if all controllers where properly joined,
    2660             :                  * and if not requeues realization. This use is the primary purpose of the return value, hence let's
    2661             :                  * minimize surprises here and reduce triggers for re-realization by always saying we fully
    2662             :                  * succeeded.) */
    2663           6 :                 if (ret_result_mask)
    2664           6 :                         *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
    2665             :                                                                                * CGROUP_MASK_V2: The 'supported' mask
    2666             :                                                                                * might contain pure-V1 or BPF
    2667             :                                                                                * controllers, and we never want to
    2668             :                                                                                * claim that we could enable those with
    2669             :                                                                                * cgroup.subtree_control */
    2670           6 :                 return 0;
    2671             :         }
    2672             : 
    2673           0 :         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
    2674           0 :         if (r < 0)
    2675           0 :                 return r;
    2676             : 
    2677           0 :         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    2678           0 :                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
    2679             :                 const char *n;
    2680             : 
    2681           0 :                 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
    2682           0 :                         continue;
    2683             : 
    2684           0 :                 if (!FLAGS_SET(supported, bit))
    2685           0 :                         continue;
    2686             : 
    2687           0 :                 n = cgroup_controller_to_string(c);
    2688           0 :                 {
    2689           0 :                         char s[1 + strlen(n) + 1];
    2690             : 
    2691           0 :                         s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
    2692           0 :                         strcpy(s + 1, n);
    2693             : 
    2694           0 :                         if (!f) {
    2695           0 :                                 f = fopen(fs, "we");
    2696           0 :                                 if (!f)
    2697           0 :                                         return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
    2698             :                         }
    2699             : 
    2700           0 :                         r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
    2701           0 :                         if (r < 0) {
    2702           0 :                                 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
    2703             :                                                 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
    2704           0 :                                 clearerr(f);
    2705             : 
    2706             :                                 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
    2707             :                                  * happens for example when we attempt to turn off a controller up in the tree that is
    2708             :                                  * used down in the tree. */
    2709           0 :                                 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
    2710             :                                                                            * only here, and not follow the same logic
    2711             :                                                                            * for other errors such as EINVAL or
    2712             :                                                                            * EOPNOTSUPP or anything else. That's
    2713             :                                                                            * because EBUSY indicates that the
    2714             :                                                                            * controllers is currently enabled and
    2715             :                                                                            * cannot be disabled because something down
    2716             :                                                                            * the hierarchy is still using it. Any other
    2717             :                                                                            * error most likely means something like "I
    2718             :                                                                            * never heard of this controller" or
    2719             :                                                                            * similar. In the former case it's hence
    2720             :                                                                            * safe to assume the controller is still on
    2721             :                                                                            * after the failed operation, while in the
    2722             :                                                                            * latter case it's safer to assume the
    2723             :                                                                            * controller is unknown and hence certainly
    2724             :                                                                            * not enabled. */
    2725           0 :                                         ret |= bit;
    2726             :                         } else {
    2727             :                                 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
    2728           0 :                                 if (FLAGS_SET(mask, bit))
    2729           0 :                                         ret |= bit;
    2730             :                         }
    2731             :                 }
    2732             :         }
    2733             : 
    2734             :         /* Let's return the precise set of controllers now enabled for the cgroup. */
    2735           0 :         if (ret_result_mask)
    2736           0 :                 *ret_result_mask = ret;
    2737             : 
    2738           0 :         return 0;
    2739             : }
    2740             : 
    2741           8 : bool cg_is_unified_wanted(void) {
    2742             :         static thread_local int wanted = -1;
    2743             :         int r;
    2744             :         bool b;
    2745           8 :         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
    2746           8 :         _cleanup_free_ char *c = NULL;
    2747             : 
    2748             :         /* If we have a cached value, return that. */
    2749           8 :         if (wanted >= 0)
    2750           7 :                 return wanted;
    2751             : 
    2752             :         /* If the hierarchy is already mounted, then follow whatever
    2753             :          * was chosen for it. */
    2754           1 :         if (cg_unified_flush() >= 0)
    2755           1 :                 return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
    2756             : 
    2757             :         /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
    2758             :          * respect that. */
    2759           0 :         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
    2760           0 :         if (r > 0)
    2761           0 :                 return (wanted = b);
    2762             : 
    2763             :         /* If we passed cgroup_no_v1=all with no other instructions, it seems
    2764             :          * highly unlikely that we want to use hybrid or legacy hierarchy. */
    2765           0 :         r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
    2766           0 :         if (r > 0 && streq_ptr(c, "all"))
    2767           0 :                 return (wanted = true);
    2768             : 
    2769           0 :         return (wanted = is_default);
    2770             : }
    2771             : 
    2772           8 : bool cg_is_legacy_wanted(void) {
    2773             :         static thread_local int wanted = -1;
    2774             : 
    2775             :         /* If we have a cached value, return that. */
    2776           8 :         if (wanted >= 0)
    2777           7 :                 return wanted;
    2778             : 
    2779             :         /* Check if we have cgroup v2 already mounted. */
    2780           1 :         if (cg_unified_flush() >= 0 &&
    2781           1 :             unified_cache == CGROUP_UNIFIED_ALL)
    2782           0 :                 return (wanted = false);
    2783             : 
    2784             :         /* Otherwise, assume that at least partial legacy is wanted,
    2785             :          * since cgroup v2 should already be mounted at this point. */
    2786           1 :         return (wanted = true);
    2787             : }
    2788             : 
    2789           8 : bool cg_is_hybrid_wanted(void) {
    2790             :         static thread_local int wanted = -1;
    2791             :         int r;
    2792             :         bool b;
    2793           8 :         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
    2794             :         /* We default to true if the default is "hybrid", obviously,
    2795             :          * but also when the default is "unified", because if we get
    2796             :          * called, it means that unified hierarchy was not mounted. */
    2797             : 
    2798             :         /* If we have a cached value, return that. */
    2799           8 :         if (wanted >= 0)
    2800           7 :                 return wanted;
    2801             : 
    2802             :         /* If the hierarchy is already mounted, then follow whatever
    2803             :          * was chosen for it. */
    2804           1 :         if (cg_unified_flush() >= 0 &&
    2805           1 :             unified_cache == CGROUP_UNIFIED_ALL)
    2806           0 :                 return (wanted = false);
    2807             : 
    2808             :         /* Otherwise, let's see what the kernel command line has to say.
    2809             :          * Since checking is expensive, cache a non-error result. */
    2810           1 :         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
    2811             : 
    2812             :         /* The meaning of the kernel option is reversed wrt. to the return value
    2813             :          * of this function, hence the negation. */
    2814           1 :         return (wanted = r > 0 ? !b : is_default);
    2815             : }
    2816             : 
    2817           2 : int cg_weight_parse(const char *s, uint64_t *ret) {
    2818             :         uint64_t u;
    2819             :         int r;
    2820             : 
    2821           2 :         if (isempty(s)) {
    2822           0 :                 *ret = CGROUP_WEIGHT_INVALID;
    2823           0 :                 return 0;
    2824             :         }
    2825             : 
    2826           2 :         r = safe_atou64(s, &u);
    2827           2 :         if (r < 0)
    2828           0 :                 return r;
    2829             : 
    2830           2 :         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
    2831           0 :                 return -ERANGE;
    2832             : 
    2833           2 :         *ret = u;
    2834           2 :         return 0;
    2835             : }
    2836             : 
    2837             : const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
    2838             :         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
    2839             :         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
    2840             :         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
    2841             :         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
    2842             : };
    2843             : 
    2844             : static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
    2845             :         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
    2846             :         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
    2847             :         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
    2848             :         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
    2849             : };
    2850             : 
    2851          12 : DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
    2852             : 
    2853           1 : int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
    2854             :         uint64_t u;
    2855             :         int r;
    2856             : 
    2857           1 :         if (isempty(s)) {
    2858           0 :                 *ret = CGROUP_CPU_SHARES_INVALID;
    2859           0 :                 return 0;
    2860             :         }
    2861             : 
    2862           1 :         r = safe_atou64(s, &u);
    2863           1 :         if (r < 0)
    2864           0 :                 return r;
    2865             : 
    2866           1 :         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
    2867           0 :                 return -ERANGE;
    2868             : 
    2869           1 :         *ret = u;
    2870           1 :         return 0;
    2871             : }
    2872             : 
    2873           0 : int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
    2874             :         uint64_t u;
    2875             :         int r;
    2876             : 
    2877           0 :         if (isempty(s)) {
    2878           0 :                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
    2879           0 :                 return 0;
    2880             :         }
    2881             : 
    2882           0 :         r = safe_atou64(s, &u);
    2883           0 :         if (r < 0)
    2884           0 :                 return r;
    2885             : 
    2886           0 :         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
    2887           0 :                 return -ERANGE;
    2888             : 
    2889           0 :         *ret = u;
    2890           0 :         return 0;
    2891             : }
    2892             : 
    2893           2 : bool is_cgroup_fs(const struct statfs *s) {
    2894           2 :         return is_fs_type(s, CGROUP_SUPER_MAGIC) ||
    2895           0 :                is_fs_type(s, CGROUP2_SUPER_MAGIC);
    2896             : }
    2897             : 
    2898           1 : bool fd_is_cgroup_fs(int fd) {
    2899             :         struct statfs s;
    2900             : 
    2901           1 :         if (fstatfs(fd, &s) < 0)
    2902           0 :                 return -errno;
    2903             : 
    2904           1 :         return is_cgroup_fs(&s);
    2905             : }
    2906             : 
    2907             : static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
    2908             :         [CGROUP_CONTROLLER_CPU] = "cpu",
    2909             :         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
    2910             :         [CGROUP_CONTROLLER_IO] = "io",
    2911             :         [CGROUP_CONTROLLER_BLKIO] = "blkio",
    2912             :         [CGROUP_CONTROLLER_MEMORY] = "memory",
    2913             :         [CGROUP_CONTROLLER_DEVICES] = "devices",
    2914             :         [CGROUP_CONTROLLER_PIDS] = "pids",
    2915             :         [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
    2916             :         [CGROUP_CONTROLLER_BPF_DEVICES] = "bpf-devices",
    2917             : };
    2918             : 
    2919        1112 : DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
    2920             : 
    2921          12 : CGroupMask get_cpu_accounting_mask(void) {
    2922             :         static CGroupMask needed_mask = (CGroupMask) -1;
    2923             : 
    2924             :         /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
    2925             :          * provided externally from the CPU controller, which means we don't
    2926             :          * need to enable the CPU controller just to get metrics. This is good,
    2927             :          * because enabling the CPU controller comes at a minor performance
    2928             :          * hit, especially when it's propagated deep into large hierarchies.
    2929             :          * There's also no separate CPU accounting controller available within
    2930             :          * a unified hierarchy.
    2931             :          *
    2932             :          * This combination of factors results in the desired cgroup mask to
    2933             :          * enable for CPU accounting varying as follows:
    2934             :          *
    2935             :          *                   ╔═════════════════════╤═════════════════════╗
    2936             :          *                   ║     Linux ≥4.15     │     Linux <4.15     ║
    2937             :          *   ╔═══════════════╬═════════════════════╪═════════════════════╣
    2938             :          *   ║ Unified       ║ nothing             │ CGROUP_MASK_CPU     ║
    2939             :          *   ╟───────────────╫─────────────────────┼─────────────────────╢
    2940             :          *   ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
    2941             :          *   ╚═══════════════╩═════════════════════╧═════════════════════╝
    2942             :          *
    2943             :          * We check kernel version here instead of manually checking whether
    2944             :          * cpu.stat is present for every cgroup, as that check in itself would
    2945             :          * already be fairly expensive.
    2946             :          *
    2947             :          * Kernels where this patch has been backported will therefore have the
    2948             :          * CPU controller enabled unnecessarily. This is more expensive than
    2949             :          * necessary, but harmless. ☺️
    2950             :          */
    2951             : 
    2952          12 :         if (needed_mask == (CGroupMask) -1) {
    2953           2 :                 if (cg_all_unified()) {
    2954             :                         struct utsname u;
    2955           0 :                         assert_se(uname(&u) >= 0);
    2956             : 
    2957           0 :                         if (str_verscmp(u.release, "4.15") < 0)
    2958           0 :                                 needed_mask = CGROUP_MASK_CPU;
    2959             :                         else
    2960           0 :                                 needed_mask = 0;
    2961             :                 } else
    2962           2 :                         needed_mask = CGROUP_MASK_CPUACCT;
    2963             :         }
    2964             : 
    2965          12 :         return needed_mask;
    2966             : }
    2967             : 
    2968           0 : bool cpu_accounting_is_cheap(void) {
    2969           0 :         return get_cpu_accounting_mask() == 0;
    2970             : }

Generated by: LCOV version 1.14