LCOV - systemd_full.info

LCOV - code coverage report

Current view:	top level - core - cgroup.c (source / functions)		Hit	Total	Coverage
Test:	systemd_full.info	Lines:	531	1704	31.2 %
Date:	2019-08-23 13:36:53	Functions:	66	120	55.0 %
Legend:	Lines: hit not hit \| Branches: + taken - not taken # not executed	Branches:	328	1846	17.8 %

           Branch data     Line data    Source code

       1                 :            : /* SPDX-License-Identifier: LGPL-2.1+ */
       2                 :            : 
       3                 :            : #include <fcntl.h>
       4                 :            : #include <fnmatch.h>
       5                 :            : 
       6                 :            : #include "sd-messages.h"
       7                 :            : 
       8                 :            : #include "alloc-util.h"
       9                 :            : #include "blockdev-util.h"
      10                 :            : #include "bpf-devices.h"
      11                 :            : #include "bpf-firewall.h"
      12                 :            : #include "btrfs-util.h"
      13                 :            : #include "bus-error.h"
      14                 :            : #include "cgroup-util.h"
      15                 :            : #include "cgroup.h"
      16                 :            : #include "fd-util.h"
      17                 :            : #include "fileio.h"
      18                 :            : #include "fs-util.h"
      19                 :            : #include "nulstr-util.h"
      20                 :            : #include "parse-util.h"
      21                 :            : #include "path-util.h"
      22                 :            : #include "process-util.h"
      23                 :            : #include "procfs-util.h"
      24                 :            : #include "special.h"
      25                 :            : #include "stat-util.h"
      26                 :            : #include "stdio-util.h"
      27                 :            : #include "string-table.h"
      28                 :            : #include "string-util.h"
      29                 :            : #include "virt.h"
      30                 :            : 
      31                 :            : #define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
      32                 :            : 
      33                 :            : /* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
      34                 :            :  * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask
      35                 :            :  * out specific attributes from us. */
      36                 :            : #define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING)
      37                 :            : 
      38                 :        116 : bool manager_owns_host_root_cgroup(Manager *m) {
      39         [ -  + ]:        116 :         assert(m);
      40                 :            : 
      41                 :            :         /* Returns true if we are managing the root cgroup. Note that it isn't sufficient to just check whether the
      42                 :            :          * group root path equals "/" since that will also be the case if CLONE_NEWCGROUP is in the mix. Since there's
      43                 :            :          * appears to be no nice way to detect whether we are in a CLONE_NEWCGROUP namespace we instead just check if
      44                 :            :          * we run in any kind of container virtualization. */
      45                 :            : 
      46         [ +  - ]:        116 :         if (MANAGER_IS_USER(m))
      47                 :        116 :                 return false;
      48                 :            : 
      49         [ #  # ]:          0 :         if (detect_container() > 0)
      50                 :          0 :                 return false;
      51                 :            : 
      52                 :          0 :         return empty_or_root(m->cgroup_root);
      53                 :            : }
      54                 :            : 
      55                 :         72 : bool unit_has_host_root_cgroup(Unit *u) {
      56         [ -  + ]:         72 :         assert(u);
      57                 :            : 
      58                 :            :         /* Returns whether this unit manages the root cgroup. This will return true if this unit is the root slice and
      59                 :            :          * the manager manages the root cgroup. */
      60                 :            : 
      61         [ +  - ]:         72 :         if (!manager_owns_host_root_cgroup(u->manager))
      62                 :         72 :                 return false;
      63                 :            : 
      64                 :          0 :         return unit_has_name(u, SPECIAL_ROOT_SLICE);
      65                 :            : }
      66                 :            : 
      67                 :          0 : static int set_attribute_and_warn(Unit *u, const char *controller, const char *attribute, const char *value) {
      68                 :            :         int r;
      69                 :            : 
      70                 :          0 :         r = cg_set_attribute(controller, u->cgroup_path, attribute, value);
      71         [ #  # ]:          0 :         if (r < 0)
      72   [ #  #  #  #  :          0 :                 log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%.*s': %m",
          #  #  #  #  #  
             #  #  #  #  
                      # ]
      73                 :            :                               strna(attribute), isempty(u->cgroup_path) ? "/" : u->cgroup_path, (int) strcspn(value, NEWLINE), value);
      74                 :            : 
      75                 :          0 :         return r;
      76                 :            : }
      77                 :            : 
      78                 :          0 : static void cgroup_compat_warn(void) {
      79                 :            :         static bool cgroup_compat_warned = false;
      80                 :            : 
      81         [ #  # ]:          0 :         if (cgroup_compat_warned)
      82                 :          0 :                 return;
      83                 :            : 
      84         [ #  # ]:          0 :         log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. "
      85                 :            :                     "See cgroup-compat debug messages for details.");
      86                 :            : 
      87                 :          0 :         cgroup_compat_warned = true;
      88                 :            : }
      89                 :            : 
      90                 :            : #define log_cgroup_compat(unit, fmt, ...) do {                                  \
      91                 :            :                 cgroup_compat_warn();                                           \
      92                 :            :                 log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__);     \
      93                 :            :         } while (false)
      94                 :            : 
      95                 :       2356 : void cgroup_context_init(CGroupContext *c) {
      96         [ -  + ]:       2356 :         assert(c);
      97                 :            : 
      98                 :            :         /* Initialize everything to the kernel defaults. */
      99                 :            : 
     100                 :       2356 :         *c = (CGroupContext) {
     101                 :            :                 .cpu_weight = CGROUP_WEIGHT_INVALID,
     102                 :            :                 .startup_cpu_weight = CGROUP_WEIGHT_INVALID,
     103                 :            :                 .cpu_quota_per_sec_usec = USEC_INFINITY,
     104                 :            :                 .cpu_quota_period_usec = USEC_INFINITY,
     105                 :            : 
     106                 :            :                 .cpu_shares = CGROUP_CPU_SHARES_INVALID,
     107                 :            :                 .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID,
     108                 :            : 
     109                 :            :                 .memory_high = CGROUP_LIMIT_MAX,
     110                 :            :                 .memory_max = CGROUP_LIMIT_MAX,
     111                 :            :                 .memory_swap_max = CGROUP_LIMIT_MAX,
     112                 :            : 
     113                 :            :                 .memory_limit = CGROUP_LIMIT_MAX,
     114                 :            : 
     115                 :            :                 .io_weight = CGROUP_WEIGHT_INVALID,
     116                 :            :                 .startup_io_weight = CGROUP_WEIGHT_INVALID,
     117                 :            : 
     118                 :            :                 .blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
     119                 :            :                 .startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
     120                 :            : 
     121                 :            :                 .tasks_max = CGROUP_LIMIT_MAX,
     122                 :            :         };
     123                 :       2356 : }
     124                 :            : 
     125                 :          0 : void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
     126         [ #  # ]:          0 :         assert(c);
     127         [ #  # ]:          0 :         assert(a);
     128                 :            : 
     129   [ #  #  #  #  :          0 :         LIST_REMOVE(device_allow, c->device_allow, a);
             #  #  #  # ]
     130                 :          0 :         free(a->path);
     131                 :          0 :         free(a);
     132                 :          0 : }
     133                 :            : 
     134                 :          0 : void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w) {
     135         [ #  # ]:          0 :         assert(c);
     136         [ #  # ]:          0 :         assert(w);
     137                 :            : 
     138   [ #  #  #  #  :          0 :         LIST_REMOVE(device_weights, c->io_device_weights, w);
             #  #  #  # ]
     139                 :          0 :         free(w->path);
     140                 :          0 :         free(w);
     141                 :          0 : }
     142                 :            : 
     143                 :          0 : void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l) {
     144         [ #  # ]:          0 :         assert(c);
     145         [ #  # ]:          0 :         assert(l);
     146                 :            : 
     147   [ #  #  #  #  :          0 :         LIST_REMOVE(device_latencies, c->io_device_latencies, l);
             #  #  #  # ]
     148                 :          0 :         free(l->path);
     149                 :          0 :         free(l);
     150                 :          0 : }
     151                 :            : 
     152                 :          0 : void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) {
     153         [ #  # ]:          0 :         assert(c);
     154         [ #  # ]:          0 :         assert(l);
     155                 :            : 
     156   [ #  #  #  #  :          0 :         LIST_REMOVE(device_limits, c->io_device_limits, l);
             #  #  #  # ]
     157                 :          0 :         free(l->path);
     158                 :          0 :         free(l);
     159                 :          0 : }
     160                 :            : 
     161                 :          0 : void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
     162         [ #  # ]:          0 :         assert(c);
     163         [ #  # ]:          0 :         assert(w);
     164                 :            : 
     165   [ #  #  #  #  :          0 :         LIST_REMOVE(device_weights, c->blockio_device_weights, w);
             #  #  #  # ]
     166                 :          0 :         free(w->path);
     167                 :          0 :         free(w);
     168                 :          0 : }
     169                 :            : 
     170                 :          0 : void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
     171         [ #  # ]:          0 :         assert(c);
     172         [ #  # ]:          0 :         assert(b);
     173                 :            : 
     174   [ #  #  #  #  :          0 :         LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
             #  #  #  # ]
     175                 :          0 :         free(b->path);
     176                 :          0 :         free(b);
     177                 :          0 : }
     178                 :            : 
     179                 :       2356 : void cgroup_context_done(CGroupContext *c) {
     180         [ -  + ]:       2356 :         assert(c);
     181                 :            : 
     182         [ -  + ]:       2356 :         while (c->io_device_weights)
     183                 :          0 :                 cgroup_context_free_io_device_weight(c, c->io_device_weights);
     184                 :            : 
     185         [ -  + ]:       2356 :         while (c->io_device_latencies)
     186                 :          0 :                 cgroup_context_free_io_device_latency(c, c->io_device_latencies);
     187                 :            : 
     188         [ -  + ]:       2356 :         while (c->io_device_limits)
     189                 :          0 :                 cgroup_context_free_io_device_limit(c, c->io_device_limits);
     190                 :            : 
     191         [ -  + ]:       2356 :         while (c->blockio_device_weights)
     192                 :          0 :                 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
     193                 :            : 
     194         [ -  + ]:       2356 :         while (c->blockio_device_bandwidths)
     195                 :          0 :                 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
     196                 :            : 
     197         [ -  + ]:       2356 :         while (c->device_allow)
     198                 :          0 :                 cgroup_context_free_device_allow(c, c->device_allow);
     199                 :            : 
     200                 :       2356 :         c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
     201                 :       2356 :         c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
     202                 :            : 
     203                 :       2356 :         c->ip_filters_ingress = strv_free(c->ip_filters_ingress);
     204                 :       2356 :         c->ip_filters_egress = strv_free(c->ip_filters_egress);
     205                 :       2356 : }
     206                 :            : 
     207                 :        728 : void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
     208                 :        728 :         _cleanup_free_ char *disable_controllers_str = NULL;
     209                 :            :         CGroupIODeviceLimit *il;
     210                 :            :         CGroupIODeviceWeight *iw;
     211                 :            :         CGroupIODeviceLatency *l;
     212                 :            :         CGroupBlockIODeviceBandwidth *b;
     213                 :            :         CGroupBlockIODeviceWeight *w;
     214                 :            :         CGroupDeviceAllow *a;
     215                 :            :         IPAddressAccessItem *iaai;
     216                 :            :         char **path;
     217                 :            :         char u[FORMAT_TIMESPAN_MAX];
     218                 :            :         char v[FORMAT_TIMESPAN_MAX];
     219                 :            : 
     220         [ -  + ]:        728 :         assert(c);
     221         [ -  + ]:        728 :         assert(f);
     222                 :            : 
     223                 :        728 :         prefix = strempty(prefix);
     224                 :            : 
     225                 :        728 :         (void) cg_mask_to_string(c->disable_controllers, &disable_controllers_str);
     226                 :            : 
     227                 :       1456 :         fprintf(f,
     228                 :            :                 "%sCPUAccounting=%s\n"
     229                 :            :                 "%sIOAccounting=%s\n"
     230                 :            :                 "%sBlockIOAccounting=%s\n"
     231                 :            :                 "%sMemoryAccounting=%s\n"
     232                 :            :                 "%sTasksAccounting=%s\n"
     233                 :            :                 "%sIPAccounting=%s\n"
     234                 :            :                 "%sCPUWeight=%" PRIu64 "\n"
     235                 :            :                 "%sStartupCPUWeight=%" PRIu64 "\n"
     236                 :            :                 "%sCPUShares=%" PRIu64 "\n"
     237                 :            :                 "%sStartupCPUShares=%" PRIu64 "\n"
     238                 :            :                 "%sCPUQuotaPerSecSec=%s\n"
     239                 :            :                 "%sCPUQuotaPeriodSec=%s\n"
     240                 :            :                 "%sIOWeight=%" PRIu64 "\n"
     241                 :            :                 "%sStartupIOWeight=%" PRIu64 "\n"
     242                 :            :                 "%sBlockIOWeight=%" PRIu64 "\n"
     243                 :            :                 "%sStartupBlockIOWeight=%" PRIu64 "\n"
     244                 :            :                 "%sDefaultMemoryMin=%" PRIu64 "\n"
     245                 :            :                 "%sDefaultMemoryLow=%" PRIu64 "\n"
     246                 :            :                 "%sMemoryMin=%" PRIu64 "\n"
     247                 :            :                 "%sMemoryLow=%" PRIu64 "\n"
     248                 :            :                 "%sMemoryHigh=%" PRIu64 "\n"
     249                 :            :                 "%sMemoryMax=%" PRIu64 "\n"
     250                 :            :                 "%sMemorySwapMax=%" PRIu64 "\n"
     251                 :            :                 "%sMemoryLimit=%" PRIu64 "\n"
     252                 :            :                 "%sTasksMax=%" PRIu64 "\n"
     253                 :            :                 "%sDevicePolicy=%s\n"
     254                 :            :                 "%sDisableControllers=%s\n"
     255                 :            :                 "%sDelegate=%s\n",
     256                 :        728 :                 prefix, yes_no(c->cpu_accounting),
     257                 :        728 :                 prefix, yes_no(c->io_accounting),
     258                 :        728 :                 prefix, yes_no(c->blockio_accounting),
     259                 :        728 :                 prefix, yes_no(c->memory_accounting),
     260                 :        728 :                 prefix, yes_no(c->tasks_accounting),
     261                 :        728 :                 prefix, yes_no(c->ip_accounting),
     262                 :            :                 prefix, c->cpu_weight,
     263                 :            :                 prefix, c->startup_cpu_weight,
     264                 :            :                 prefix, c->cpu_shares,
     265                 :            :                 prefix, c->startup_cpu_shares,
     266                 :            :                 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
     267                 :            :                 prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1),
     268                 :            :                 prefix, c->io_weight,
     269                 :            :                 prefix, c->startup_io_weight,
     270                 :            :                 prefix, c->blockio_weight,
     271                 :            :                 prefix, c->startup_blockio_weight,
     272                 :            :                 prefix, c->default_memory_min,
     273                 :            :                 prefix, c->default_memory_low,
     274                 :            :                 prefix, c->memory_min,
     275                 :            :                 prefix, c->memory_low,
     276                 :            :                 prefix, c->memory_high,
     277                 :            :                 prefix, c->memory_max,
     278                 :            :                 prefix, c->memory_swap_max,
     279                 :            :                 prefix, c->memory_limit,
     280                 :            :                 prefix, c->tasks_max,
     281                 :            :                 prefix, cgroup_device_policy_to_string(c->device_policy),
     282                 :            :                 prefix, strempty(disable_controllers_str),
     283                 :        728 :                 prefix, yes_no(c->delegate));
     284                 :            : 
     285         [ -  + ]:        728 :         if (c->delegate) {
     286                 :          0 :                 _cleanup_free_ char *t = NULL;
     287                 :            : 
     288                 :          0 :                 (void) cg_mask_to_string(c->delegate_controllers, &t);
     289                 :            : 
     290                 :          0 :                 fprintf(f, "%sDelegateControllers=%s\n",
     291                 :            :                         prefix,
     292                 :            :                         strempty(t));
     293                 :            :         }
     294                 :            : 
     295         [ -  + ]:        728 :         LIST_FOREACH(device_allow, a, c->device_allow)
     296                 :          0 :                 fprintf(f,
     297                 :            :                         "%sDeviceAllow=%s %s%s%s\n",
     298                 :            :                         prefix,
     299                 :            :                         a->path,
     300   [ #  #  #  #  :          0 :                         a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
                   #  # ]
     301                 :            : 
     302         [ -  + ]:        728 :         LIST_FOREACH(device_weights, iw, c->io_device_weights)
     303                 :          0 :                 fprintf(f,
     304                 :            :                         "%sIODeviceWeight=%s %" PRIu64 "\n",
     305                 :            :                         prefix,
     306                 :            :                         iw->path,
     307                 :            :                         iw->weight);
     308                 :            : 
     309         [ -  + ]:        728 :         LIST_FOREACH(device_latencies, l, c->io_device_latencies)
     310                 :          0 :                 fprintf(f,
     311                 :            :                         "%sIODeviceLatencyTargetSec=%s %s\n",
     312                 :            :                         prefix,
     313                 :            :                         l->path,
     314                 :            :                         format_timespan(u, sizeof(u), l->target_usec, 1));
     315                 :            : 
     316         [ -  + ]:        728 :         LIST_FOREACH(device_limits, il, c->io_device_limits) {
     317                 :            :                 char buf[FORMAT_BYTES_MAX];
     318                 :            :                 CGroupIOLimitType type;
     319                 :            : 
     320         [ #  # ]:          0 :                 for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
     321         [ #  # ]:          0 :                         if (il->limits[type] != cgroup_io_limit_defaults[type])
     322                 :          0 :                                 fprintf(f,
     323                 :            :                                         "%s%s=%s %s\n",
     324                 :            :                                         prefix,
     325                 :            :                                         cgroup_io_limit_type_to_string(type),
     326                 :            :                                         il->path,
     327                 :            :                                         format_bytes(buf, sizeof(buf), il->limits[type]));
     328                 :            :         }
     329                 :            : 
     330         [ -  + ]:        728 :         LIST_FOREACH(device_weights, w, c->blockio_device_weights)
     331                 :          0 :                 fprintf(f,
     332                 :            :                         "%sBlockIODeviceWeight=%s %" PRIu64,
     333                 :            :                         prefix,
     334                 :            :                         w->path,
     335                 :            :                         w->weight);
     336                 :            : 
     337         [ -  + ]:        728 :         LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
     338                 :            :                 char buf[FORMAT_BYTES_MAX];
     339                 :            : 
     340         [ #  # ]:          0 :                 if (b->rbps != CGROUP_LIMIT_MAX)
     341                 :          0 :                         fprintf(f,
     342                 :            :                                 "%sBlockIOReadBandwidth=%s %s\n",
     343                 :            :                                 prefix,
     344                 :            :                                 b->path,
     345                 :            :                                 format_bytes(buf, sizeof(buf), b->rbps));
     346         [ #  # ]:          0 :                 if (b->wbps != CGROUP_LIMIT_MAX)
     347                 :          0 :                         fprintf(f,
     348                 :            :                                 "%sBlockIOWriteBandwidth=%s %s\n",
     349                 :            :                                 prefix,
     350                 :            :                                 b->path,
     351                 :            :                                 format_bytes(buf, sizeof(buf), b->wbps));
     352                 :            :         }
     353                 :            : 
     354         [ -  + ]:        728 :         LIST_FOREACH(items, iaai, c->ip_address_allow) {
     355                 :          0 :                 _cleanup_free_ char *k = NULL;
     356                 :            : 
     357                 :          0 :                 (void) in_addr_to_string(iaai->family, &iaai->address, &k);
     358                 :          0 :                 fprintf(f, "%sIPAddressAllow=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
     359                 :            :         }
     360                 :            : 
     361         [ -  + ]:        728 :         LIST_FOREACH(items, iaai, c->ip_address_deny) {
     362                 :          0 :                 _cleanup_free_ char *k = NULL;
     363                 :            : 
     364                 :          0 :                 (void) in_addr_to_string(iaai->family, &iaai->address, &k);
     365                 :          0 :                 fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
     366                 :            :         }
     367                 :            : 
     368   [ -  +  #  # ]:        728 :         STRV_FOREACH(path, c->ip_filters_ingress)
     369                 :          0 :                 fprintf(f, "%sIPIngressFilterPath=%s\n", prefix, *path);
     370                 :            : 
     371   [ -  +  #  # ]:        728 :         STRV_FOREACH(path, c->ip_filters_egress)
     372                 :          0 :                 fprintf(f, "%sIPEgressFilterPath=%s\n", prefix, *path);
     373                 :        728 : }
     374                 :            : 
     375                 :          0 : int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) {
     376                 :          0 :         _cleanup_free_ CGroupDeviceAllow *a = NULL;
     377                 :          0 :         _cleanup_free_ char *d = NULL;
     378                 :            : 
     379         [ #  # ]:          0 :         assert(c);
     380         [ #  # ]:          0 :         assert(dev);
     381   [ #  #  #  # ]:          0 :         assert(isempty(mode) || in_charset(mode, "rwm"));
     382                 :            : 
     383                 :          0 :         a = new(CGroupDeviceAllow, 1);
     384         [ #  # ]:          0 :         if (!a)
     385                 :          0 :                 return -ENOMEM;
     386                 :            : 
     387                 :          0 :         d = strdup(dev);
     388         [ #  # ]:          0 :         if (!d)
     389                 :          0 :                 return -ENOMEM;
     390                 :            : 
     391                 :          0 :         *a = (CGroupDeviceAllow) {
     392                 :          0 :                 .path = TAKE_PTR(d),
     393   [ #  #  #  # ]:          0 :                 .r = isempty(mode) || strchr(mode, 'r'),
     394   [ #  #  #  # ]:          0 :                 .w = isempty(mode) || strchr(mode, 'w'),
     395   [ #  #  #  # ]:          0 :                 .m = isempty(mode) || strchr(mode, 'm'),
     396                 :            :         };
     397                 :            : 
     398   [ #  #  #  # ]:          0 :         LIST_PREPEND(device_allow, c->device_allow, a);
     399                 :          0 :         TAKE_PTR(a);
     400                 :            : 
     401                 :          0 :         return 0;
     402                 :            : }
     403                 :            : 
     404                 :            : #define UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(entry)                       \
     405                 :            :         uint64_t unit_get_ancestor_##entry(Unit *u) {                   \
     406                 :            :                 CGroupContext *c;                                       \
     407                 :            :                                                                         \
     408                 :            :                 /* 1. Is entry set in this unit? If so, use that.       \
     409                 :            :                  * 2. Is the default for this entry set in any          \
     410                 :            :                  *    ancestor? If so, use that.                        \
     411                 :            :                  * 3. Otherwise, return CGROUP_LIMIT_MIN. */            \
     412                 :            :                                                                         \
     413                 :            :                 assert(u);                                              \
     414                 :            :                                                                         \
     415                 :            :                 c = unit_get_cgroup_context(u);                         \
     416                 :            :                 if (c && c->entry##_set)                                \
     417                 :            :                         return c->entry;                                \
     418                 :            :                                                                         \
     419                 :            :                 while ((u = UNIT_DEREF(u->slice))) {                    \
     420                 :            :                         c = unit_get_cgroup_context(u);                 \
     421                 :            :                         if (c && c->default_##entry##_set)              \
     422                 :            :                                 return c->default_##entry;              \
     423                 :            :                 }                                                       \
     424                 :            :                                                                         \
     425                 :            :                 /* We've reached the root, but nobody had default for   \
     426                 :            :                  * this entry set, so set it to the kernel default. */  \
     427                 :            :                 return CGROUP_LIMIT_MIN;                                \
     428                 :            : }
     429                 :            : 
     430   [ -  +  +  -  :        544 : UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_low);
          +  +  +  -  +  
                +  +  + ]
     431   [ #  #  #  #  :          0 : UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_min);
          #  #  #  #  #  
                #  #  # ]
     432                 :            : 
     433                 :         24 : static void cgroup_xattr_apply(Unit *u) {
     434                 :            :         char ids[SD_ID128_STRING_MAX];
     435                 :            :         int r;
     436                 :            : 
     437         [ -  + ]:         24 :         assert(u);
     438                 :            : 
     439         [ +  - ]:         24 :         if (!MANAGER_IS_SYSTEM(u->manager))
     440                 :         24 :                 return;
     441                 :            : 
     442         [ #  # ]:          0 :         if (sd_id128_is_null(u->invocation_id))
     443                 :          0 :                 return;
     444                 :            : 
     445                 :          0 :         r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
     446                 :            :                          "trusted.invocation_id",
     447                 :          0 :                          sd_id128_to_string(u->invocation_id, ids), 32,
     448                 :            :                          0);
     449         [ #  # ]:          0 :         if (r < 0)
     450         [ #  # ]:          0 :                 log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
     451                 :            : }
     452                 :            : 
     453                 :          0 : static int lookup_block_device(const char *p, dev_t *ret) {
     454                 :          0 :         dev_t rdev, dev = 0;
     455                 :            :         mode_t mode;
     456                 :            :         int r;
     457                 :            : 
     458         [ #  # ]:          0 :         assert(p);
     459         [ #  # ]:          0 :         assert(ret);
     460                 :            : 
     461                 :          0 :         r = device_path_parse_major_minor(p, &mode, &rdev);
     462         [ #  # ]:          0 :         if (r == -ENODEV) { /* not a parsable device node, need to go to disk */
     463                 :            :                 struct stat st;
     464         [ #  # ]:          0 :                 if (stat(p, &st) < 0)
     465         [ #  # ]:          0 :                         return log_warning_errno(errno, "Couldn't stat device '%s': %m", p);
     466                 :          0 :                 rdev = (dev_t)st.st_rdev;
     467                 :          0 :                 dev = (dev_t)st.st_dev;
     468                 :          0 :                 mode = st.st_mode;
     469         [ #  # ]:          0 :         } else if (r < 0)
     470         [ #  # ]:          0 :                 return log_warning_errno(r, "Failed to parse major/minor from path '%s': %m", p);
     471                 :            : 
     472         [ #  # ]:          0 :         if (S_ISCHR(mode)) {
     473         [ #  # ]:          0 :                 log_warning("Device node '%s' is a character device, but block device needed.", p);
     474                 :          0 :                 return -ENOTBLK;
     475         [ #  # ]:          0 :         } else if (S_ISBLK(mode))
     476                 :          0 :                 *ret = rdev;
     477         [ #  # ]:          0 :         else if (major(dev) != 0)
     478                 :          0 :                 *ret = dev; /* If this is not a device node then use the block device this file is stored on */
     479                 :            :         else {
     480                 :            :                 /* If this is btrfs, getting the backing block device is a bit harder */
     481                 :          0 :                 r = btrfs_get_block_device(p, ret);
     482   [ #  #  #  # ]:          0 :                 if (r < 0 && r != -ENOTTY)
     483         [ #  # ]:          0 :                         return log_warning_errno(r, "Failed to determine block device backing btrfs file system '%s': %m", p);
     484         [ #  # ]:          0 :                 if (r == -ENOTTY) {
     485         [ #  # ]:          0 :                         log_warning("'%s' is not a block device node, and file system block device cannot be determined or is not local.", p);
     486                 :          0 :                         return -ENODEV;
     487                 :            :                 }
     488                 :            :         }
     489                 :            : 
     490                 :            :         /* If this is a LUKS device, try to get the originating block device */
     491                 :          0 :         (void) block_get_originating(*ret, ret);
     492                 :            : 
     493                 :            :         /* If this is a partition, try to get the originating block device */
     494                 :          0 :         (void) block_get_whole_disk(*ret, ret);
     495                 :          0 :         return 0;
     496                 :            : }
     497                 :            : 
     498                 :          0 : static int whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
     499                 :            :         dev_t rdev;
     500                 :            :         mode_t mode;
     501                 :            :         int r;
     502                 :            : 
     503         [ #  # ]:          0 :         assert(path);
     504         [ #  # ]:          0 :         assert(acc);
     505                 :            : 
     506                 :            :         /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
     507                 :            :          * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
     508                 :            :          * means clients can use these path without the device node actually around */
     509                 :          0 :         r = device_path_parse_major_minor(node, &mode, &rdev);
     510         [ #  # ]:          0 :         if (r < 0) {
     511         [ #  # ]:          0 :                 if (r != -ENODEV)
     512         [ #  # ]:          0 :                         return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
     513                 :            : 
     514                 :            :                 struct stat st;
     515         [ #  # ]:          0 :                 if (stat(node, &st) < 0)
     516         [ #  # ]:          0 :                         return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
     517                 :            : 
     518   [ #  #  #  # ]:          0 :                 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
     519         [ #  # ]:          0 :                         log_warning("%s is not a device.", node);
     520                 :          0 :                         return -ENODEV;
     521                 :            :                 }
     522                 :          0 :                 rdev = (dev_t) st.st_rdev;
     523                 :          0 :                 mode = st.st_mode;
     524                 :            :         }
     525                 :            : 
     526         [ #  # ]:          0 :         if (cg_all_unified() > 0) {
     527         [ #  # ]:          0 :                 if (!prog)
     528                 :          0 :                         return 0;
     529                 :            : 
     530                 :          0 :                 return cgroup_bpf_whitelist_device(prog, S_ISCHR(mode) ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
     531         [ #  # ]:          0 :                                                    major(rdev), minor(rdev), acc);
     532                 :            : 
     533                 :            :         } else {
     534                 :            :                 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
     535                 :            : 
     536                 :          0 :                 sprintf(buf,
     537                 :            :                         "%c %u:%u %s",
     538         [ #  # ]:          0 :                         S_ISCHR(mode) ? 'c' : 'b',
     539                 :            :                         major(rdev), minor(rdev),
     540                 :            :                         acc);
     541                 :            : 
     542                 :            :                 /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL here. */
     543                 :            : 
     544                 :          0 :                 r = cg_set_attribute("devices", path, "devices.allow", buf);
     545         [ #  # ]:          0 :                 if (r < 0)
     546   [ #  #  #  #  :          0 :                         return log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
                   #  # ]
     547                 :            :                                               r, "Failed to set devices.allow on %s: %m", path);
     548                 :            : 
     549                 :          0 :                 return 0;
     550                 :            :         }
     551                 :            : }
     552                 :            : 
     553                 :          0 : static int whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
     554                 :          0 :         _cleanup_fclose_ FILE *f = NULL;
     555                 :            :         char buf[2+DECIMAL_STR_MAX(unsigned)+3+4];
     556                 :          0 :         bool good = false;
     557                 :            :         unsigned maj;
     558                 :            :         int r;
     559                 :            : 
     560         [ #  # ]:          0 :         assert(path);
     561         [ #  # ]:          0 :         assert(acc);
     562   [ #  #  #  # ]:          0 :         assert(IN_SET(type, 'b', 'c'));
     563                 :            : 
     564         [ #  # ]:          0 :         if (streq(name, "*")) {
     565                 :            :                 /* If the name is a wildcard, then apply this list to all devices of this type */
     566                 :            : 
     567         [ #  # ]:          0 :                 if (cg_all_unified() > 0) {
     568         [ #  # ]:          0 :                         if (!prog)
     569                 :          0 :                                 return 0;
     570                 :            : 
     571         [ #  # ]:          0 :                         (void) cgroup_bpf_whitelist_class(prog, type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK, acc);
     572                 :            :                 } else {
     573         [ #  # ]:          0 :                         xsprintf(buf, "%c *:* %s", type, acc);
     574                 :            : 
     575                 :          0 :                         r = cg_set_attribute("devices", path, "devices.allow", buf);
     576         [ #  # ]:          0 :                         if (r < 0)
     577   [ #  #  #  #  :          0 :                                 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
                   #  # ]
     578                 :            :                                                "Failed to set devices.allow on %s: %m", path);
     579                 :          0 :                         return 0;
     580                 :            :                 }
     581                 :            :         }
     582                 :            : 
     583   [ #  #  #  #  :          0 :         if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj)) {
             #  #  #  # ]
     584                 :            :                 /* The name is numeric and suitable as major. In that case, let's take is major, and create the entry
     585                 :            :                  * directly */
     586                 :            : 
     587         [ #  # ]:          0 :                 if (cg_all_unified() > 0) {
     588         [ #  # ]:          0 :                         if (!prog)
     589                 :          0 :                                 return 0;
     590                 :            : 
     591         [ #  # ]:          0 :                         (void) cgroup_bpf_whitelist_major(prog,
     592                 :            :                                                           type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
     593                 :            :                                                           maj, acc);
     594                 :            :                 } else {
     595         [ #  # ]:          0 :                         xsprintf(buf, "%c %u:* %s", type, maj, acc);
     596                 :            : 
     597                 :          0 :                         r = cg_set_attribute("devices", path, "devices.allow", buf);
     598         [ #  # ]:          0 :                         if (r < 0)
     599   [ #  #  #  #  :          0 :                                 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
                   #  # ]
     600                 :            :                                                "Failed to set devices.allow on %s: %m", path);
     601                 :            :                 }
     602                 :            : 
     603                 :          0 :                 return 0;
     604                 :            :         }
     605                 :            : 
     606                 :          0 :         f = fopen("/proc/devices", "re");
     607         [ #  # ]:          0 :         if (!f)
     608         [ #  # ]:          0 :                 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
     609                 :            : 
     610                 :          0 :         for (;;) {
     611   [ #  #  #  # ]:          0 :                 _cleanup_free_ char *line = NULL;
     612                 :            :                 char *w, *p;
     613                 :            : 
     614                 :          0 :                 r = read_line(f, LONG_LINE_MAX, &line);
     615         [ #  # ]:          0 :                 if (r < 0)
     616         [ #  # ]:          0 :                         return log_warning_errno(r, "Failed to read /proc/devices: %m");
     617         [ #  # ]:          0 :                 if (r == 0)
     618                 :          0 :                         break;
     619                 :            : 
     620   [ #  #  #  # ]:          0 :                 if (type == 'c' && streq(line, "Character devices:")) {
     621                 :          0 :                         good = true;
     622                 :          0 :                         continue;
     623                 :            :                 }
     624                 :            : 
     625   [ #  #  #  # ]:          0 :                 if (type == 'b' && streq(line, "Block devices:")) {
     626                 :          0 :                         good = true;
     627                 :          0 :                         continue;
     628                 :            :                 }
     629                 :            : 
     630         [ #  # ]:          0 :                 if (isempty(line)) {
     631                 :          0 :                         good = false;
     632                 :          0 :                         continue;
     633                 :            :                 }
     634                 :            : 
     635         [ #  # ]:          0 :                 if (!good)
     636                 :          0 :                         continue;
     637                 :            : 
     638                 :          0 :                 p = strstrip(line);
     639                 :            : 
     640                 :          0 :                 w = strpbrk(p, WHITESPACE);
     641         [ #  # ]:          0 :                 if (!w)
     642                 :          0 :                         continue;
     643                 :          0 :                 *w = 0;
     644                 :            : 
     645                 :          0 :                 r = safe_atou(p, &maj);
     646         [ #  # ]:          0 :                 if (r < 0)
     647                 :          0 :                         continue;
     648         [ #  # ]:          0 :                 if (maj <= 0)
     649                 :          0 :                         continue;
     650                 :            : 
     651                 :          0 :                 w++;
     652                 :          0 :                 w += strspn(w, WHITESPACE);
     653                 :            : 
     654         [ #  # ]:          0 :                 if (fnmatch(name, w, 0) != 0)
     655                 :          0 :                         continue;
     656                 :            : 
     657         [ #  # ]:          0 :                 if (cg_all_unified() > 0) {
     658         [ #  # ]:          0 :                         if (!prog)
     659                 :          0 :                                 continue;
     660                 :            : 
     661         [ #  # ]:          0 :                         (void) cgroup_bpf_whitelist_major(prog,
     662                 :            :                                                           type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK,
     663                 :            :                                                           maj, acc);
     664                 :            :                 } else {
     665                 :          0 :                         sprintf(buf,
     666                 :            :                                 "%c %u:* %s",
     667                 :            :                                 type,
     668                 :            :                                 maj,
     669                 :            :                                 acc);
     670                 :            : 
     671                 :            :                         /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
     672                 :            :                          * here. */
     673                 :            : 
     674                 :          0 :                         r = cg_set_attribute("devices", path, "devices.allow", buf);
     675         [ #  # ]:          0 :                         if (r < 0)
     676   [ #  #  #  #  :          0 :                                 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
                   #  # ]
     677                 :            :                                                r, "Failed to set devices.allow on %s: %m", path);
     678                 :            :                 }
     679                 :            :         }
     680                 :            : 
     681                 :          0 :         return 0;
     682                 :            : }
     683                 :            : 
     684                 :       2316 : static bool cgroup_context_has_cpu_weight(CGroupContext *c) {
     685         [ +  - ]:       4632 :         return c->cpu_weight != CGROUP_WEIGHT_INVALID ||
     686         [ -  + ]:       2316 :                 c->startup_cpu_weight != CGROUP_WEIGHT_INVALID;
     687                 :            : }
     688                 :            : 
     689                 :       2316 : static bool cgroup_context_has_cpu_shares(CGroupContext *c) {
     690         [ +  + ]:       4612 :         return c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
     691         [ -  + ]:       2296 :                 c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID;
     692                 :            : }
     693                 :            : 
     694                 :        122 : static uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state) {
     695   [ +  +  +  + ]:        122 :         if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
     696         [ -  + ]:         54 :             c->startup_cpu_weight != CGROUP_WEIGHT_INVALID)
     697                 :          0 :                 return c->startup_cpu_weight;
     698         [ -  + ]:        122 :         else if (c->cpu_weight != CGROUP_WEIGHT_INVALID)
     699                 :          0 :                 return c->cpu_weight;
     700                 :            :         else
     701                 :        122 :                 return CGROUP_WEIGHT_DEFAULT;
     702                 :            : }
     703                 :            : 
     704                 :          0 : static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state) {
     705   [ #  #  #  # ]:          0 :         if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
     706         [ #  # ]:          0 :             c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID)
     707                 :          0 :                 return c->startup_cpu_shares;
     708         [ #  # ]:          0 :         else if (c->cpu_shares != CGROUP_CPU_SHARES_INVALID)
     709                 :          0 :                 return c->cpu_shares;
     710                 :            :         else
     711                 :          0 :                 return CGROUP_CPU_SHARES_DEFAULT;
     712                 :            : }
     713                 :            : 
     714                 :         48 : usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period) {
     715                 :            :         /* kernel uses a minimum resolution of 1ms, so both period and (quota * period)
     716                 :            :          * need to be higher than that boundary. quota is specified in USecPerSec.
     717                 :            :          * Additionally, period must be at most max_period. */
     718         [ -  + ]:         48 :         assert(quota > 0);
     719                 :            : 
     720                 :         48 :         return MIN(MAX3(period, resolution, resolution * USEC_PER_SEC / quota), max_period);
     721                 :            : }
     722                 :            : 
     723                 :          0 : static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) {
     724                 :            :         usec_t new_period;
     725                 :            : 
     726         [ #  # ]:          0 :         if (quota == USEC_INFINITY)
     727                 :            :                 /* Always use default period for infinity quota. */
     728                 :          0 :                 return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
     729                 :            : 
     730         [ #  # ]:          0 :         if (period == USEC_INFINITY)
     731                 :            :                 /* Default period was requested. */
     732                 :          0 :                 period = CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
     733                 :            : 
     734                 :            :         /* Clamp to interval [1ms, 1s] */
     735                 :          0 :         new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC);
     736                 :            : 
     737         [ #  # ]:          0 :         if (new_period != period) {
     738                 :            :                 char v[FORMAT_TIMESPAN_MAX];
     739   [ #  #  #  #  :          0 :                 log_unit_full(u, u->warned_clamping_cpu_quota_period ? LOG_DEBUG : LOG_WARNING, 0,
                   #  # ]
     740                 :            :                               "Clamping CPU interval for cpu.max: period is now %s",
     741                 :            :                               format_timespan(v, sizeof(v), new_period, 1));
     742                 :          0 :                 u->warned_clamping_cpu_quota_period = true;
     743                 :            :         }
     744                 :            : 
     745                 :          0 :         return new_period;
     746                 :            : }
     747                 :            : 
     748                 :          0 : static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
     749                 :            :         char buf[DECIMAL_STR_MAX(uint64_t) + 2];
     750                 :            : 
     751         [ #  # ]:          0 :         xsprintf(buf, "%" PRIu64 "\n", weight);
     752                 :          0 :         (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf);
     753                 :          0 : }
     754                 :            : 
     755                 :          0 : static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) {
     756                 :            :         char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
     757                 :            : 
     758                 :          0 :         period = cgroup_cpu_adjust_period_and_log(u, period, quota);
     759         [ #  # ]:          0 :         if (quota != USEC_INFINITY)
     760         [ #  # ]:          0 :                 xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
     761                 :            :                          MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC), period);
     762                 :            :         else
     763         [ #  # ]:          0 :                 xsprintf(buf, "max " USEC_FMT "\n", period);
     764                 :          0 :         (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf);
     765                 :          0 : }
     766                 :            : 
     767                 :          0 : static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) {
     768                 :            :         char buf[DECIMAL_STR_MAX(uint64_t) + 2];
     769                 :            : 
     770         [ #  # ]:          0 :         xsprintf(buf, "%" PRIu64 "\n", shares);
     771                 :          0 :         (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf);
     772                 :          0 : }
     773                 :            : 
     774                 :          0 : static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota, usec_t period) {
     775                 :            :         char buf[DECIMAL_STR_MAX(usec_t) + 2];
     776                 :            : 
     777                 :          0 :         period = cgroup_cpu_adjust_period_and_log(u, period, quota);
     778                 :            : 
     779         [ #  # ]:          0 :         xsprintf(buf, USEC_FMT "\n", period);
     780                 :          0 :         (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf);
     781                 :            : 
     782         [ #  # ]:          0 :         if (quota != USEC_INFINITY) {
     783         [ #  # ]:          0 :                 xsprintf(buf, USEC_FMT "\n", MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC));
     784                 :          0 :                 (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf);
     785                 :            :         } else
     786                 :          0 :                 (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n");
     787                 :          0 : }
     788                 :            : 
     789                 :          0 : static uint64_t cgroup_cpu_shares_to_weight(uint64_t shares) {
     790         [ #  # ]:          0 :         return CLAMP(shares * CGROUP_WEIGHT_DEFAULT / CGROUP_CPU_SHARES_DEFAULT,
     791                 :            :                      CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
     792                 :            : }
     793                 :            : 
     794                 :          0 : static uint64_t cgroup_cpu_weight_to_shares(uint64_t weight) {
     795         [ #  # ]:          0 :         return CLAMP(weight * CGROUP_CPU_SHARES_DEFAULT / CGROUP_WEIGHT_DEFAULT,
     796                 :            :                      CGROUP_CPU_SHARES_MIN, CGROUP_CPU_SHARES_MAX);
     797                 :            : }
     798                 :            : 
     799                 :       2316 : static bool cgroup_context_has_io_config(CGroupContext *c) {
     800                 :       4632 :         return c->io_accounting ||
     801         [ +  + ]:       2316 :                 c->io_weight != CGROUP_WEIGHT_INVALID ||
     802         [ +  - ]:       2276 :                 c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
     803         [ +  - ]:       2276 :                 c->io_device_weights ||
     804   [ +  -  +  - ]:       6908 :                 c->io_device_latencies ||
     805         [ -  + ]:       2276 :                 c->io_device_limits;
     806                 :            : }
     807                 :            : 
     808                 :       2276 : static bool cgroup_context_has_blockio_config(CGroupContext *c) {
     809                 :       4552 :         return c->blockio_accounting ||
     810         [ +  - ]:       2276 :                 c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
     811         [ +  - ]:       2276 :                 c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
     812   [ +  -  +  - ]:       6828 :                 c->blockio_device_weights ||
     813         [ -  + ]:       2276 :                 c->blockio_device_bandwidths;
     814                 :            : }
     815                 :            : 
     816                 :          0 : static uint64_t cgroup_context_io_weight(CGroupContext *c, ManagerState state) {
     817   [ #  #  #  # ]:          0 :         if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
     818         [ #  # ]:          0 :             c->startup_io_weight != CGROUP_WEIGHT_INVALID)
     819                 :          0 :                 return c->startup_io_weight;
     820         [ #  # ]:          0 :         else if (c->io_weight != CGROUP_WEIGHT_INVALID)
     821                 :          0 :                 return c->io_weight;
     822                 :            :         else
     823                 :          0 :                 return CGROUP_WEIGHT_DEFAULT;
     824                 :            : }
     825                 :            : 
     826                 :          0 : static uint64_t cgroup_context_blkio_weight(CGroupContext *c, ManagerState state) {
     827   [ #  #  #  # ]:          0 :         if (IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) &&
     828         [ #  # ]:          0 :             c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
     829                 :          0 :                 return c->startup_blockio_weight;
     830         [ #  # ]:          0 :         else if (c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID)
     831                 :          0 :                 return c->blockio_weight;
     832                 :            :         else
     833                 :          0 :                 return CGROUP_BLKIO_WEIGHT_DEFAULT;
     834                 :            : }
     835                 :            : 
     836                 :          0 : static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight) {
     837         [ #  # ]:          0 :         return CLAMP(blkio_weight * CGROUP_WEIGHT_DEFAULT / CGROUP_BLKIO_WEIGHT_DEFAULT,
     838                 :            :                      CGROUP_WEIGHT_MIN, CGROUP_WEIGHT_MAX);
     839                 :            : }
     840                 :            : 
     841                 :          0 : static uint64_t cgroup_weight_io_to_blkio(uint64_t io_weight) {
     842         [ #  # ]:          0 :         return CLAMP(io_weight * CGROUP_BLKIO_WEIGHT_DEFAULT / CGROUP_WEIGHT_DEFAULT,
     843                 :            :                      CGROUP_BLKIO_WEIGHT_MIN, CGROUP_BLKIO_WEIGHT_MAX);
     844                 :            : }
     845                 :            : 
     846                 :          0 : static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_t io_weight) {
     847                 :            :         char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
     848                 :            :         dev_t dev;
     849                 :            :         int r;
     850                 :            : 
     851                 :          0 :         r = lookup_block_device(dev_path, &dev);
     852         [ #  # ]:          0 :         if (r < 0)
     853                 :          0 :                 return;
     854                 :            : 
     855         [ #  # ]:          0 :         xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), io_weight);
     856                 :          0 :         (void) set_attribute_and_warn(u, "io", "io.weight", buf);
     857                 :            : }
     858                 :            : 
     859                 :          0 : static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) {
     860                 :            :         char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
     861                 :            :         dev_t dev;
     862                 :            :         int r;
     863                 :            : 
     864                 :          0 :         r = lookup_block_device(dev_path, &dev);
     865         [ #  # ]:          0 :         if (r < 0)
     866                 :          0 :                 return;
     867                 :            : 
     868         [ #  # ]:          0 :         xsprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), blkio_weight);
     869                 :          0 :         (void) set_attribute_and_warn(u, "blkio", "blkio.weight_device", buf);
     870                 :            : }
     871                 :            : 
     872                 :          0 : static void cgroup_apply_io_device_latency(Unit *u, const char *dev_path, usec_t target) {
     873                 :            :         char buf[DECIMAL_STR_MAX(dev_t)*2+2+7+DECIMAL_STR_MAX(uint64_t)+1];
     874                 :            :         dev_t dev;
     875                 :            :         int r;
     876                 :            : 
     877                 :          0 :         r = lookup_block_device(dev_path, &dev);
     878         [ #  # ]:          0 :         if (r < 0)
     879                 :          0 :                 return;
     880                 :            : 
     881         [ #  # ]:          0 :         if (target != USEC_INFINITY)
     882         [ #  # ]:          0 :                 xsprintf(buf, "%u:%u target=%" PRIu64 "\n", major(dev), minor(dev), target);
     883                 :            :         else
     884         [ #  # ]:          0 :                 xsprintf(buf, "%u:%u target=max\n", major(dev), minor(dev));
     885                 :            : 
     886                 :          0 :         (void) set_attribute_and_warn(u, "io", "io.latency", buf);
     887                 :            : }
     888                 :            : 
     889                 :          0 : static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) {
     890                 :            :         char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)];
     891                 :            :         char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4];
     892                 :            :         CGroupIOLimitType type;
     893                 :            :         dev_t dev;
     894                 :            :         int r;
     895                 :            : 
     896                 :          0 :         r = lookup_block_device(dev_path, &dev);
     897         [ #  # ]:          0 :         if (r < 0)
     898                 :          0 :                 return;
     899                 :            : 
     900         [ #  # ]:          0 :         for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
     901         [ #  # ]:          0 :                 if (limits[type] != cgroup_io_limit_defaults[type])
     902         [ #  # ]:          0 :                         xsprintf(limit_bufs[type], "%" PRIu64, limits[type]);
     903                 :            :                 else
     904   [ #  #  #  # ]:          0 :                         xsprintf(limit_bufs[type], "%s", limits[type] == CGROUP_LIMIT_MAX ? "max" : "0");
     905                 :            : 
     906         [ #  # ]:          0 :         xsprintf(buf, "%u:%u rbps=%s wbps=%s riops=%s wiops=%s\n", major(dev), minor(dev),
     907                 :            :                  limit_bufs[CGROUP_IO_RBPS_MAX], limit_bufs[CGROUP_IO_WBPS_MAX],
     908                 :            :                  limit_bufs[CGROUP_IO_RIOPS_MAX], limit_bufs[CGROUP_IO_WIOPS_MAX]);
     909                 :          0 :         (void) set_attribute_and_warn(u, "io", "io.max", buf);
     910                 :            : }
     911                 :            : 
     912                 :          0 : static void cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, uint64_t rbps, uint64_t wbps) {
     913                 :            :         char buf[DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1];
     914                 :            :         dev_t dev;
     915                 :            :         int r;
     916                 :            : 
     917                 :          0 :         r = lookup_block_device(dev_path, &dev);
     918         [ #  # ]:          0 :         if (r < 0)
     919                 :          0 :                 return;
     920                 :            : 
     921                 :          0 :         sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), rbps);
     922                 :          0 :         (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.read_bps_device", buf);
     923                 :            : 
     924                 :          0 :         sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), wbps);
     925                 :          0 :         (void) set_attribute_and_warn(u, "blkio", "blkio.throttle.write_bps_device", buf);
     926                 :            : }
     927                 :            : 
     928                 :        220 : static bool unit_has_unified_memory_config(Unit *u) {
     929                 :            :         CGroupContext *c;
     930                 :            : 
     931         [ -  + ]:        220 :         assert(u);
     932                 :            : 
     933                 :        220 :         c = unit_get_cgroup_context(u);
     934         [ -  + ]:        220 :         assert(c);
     935                 :            : 
     936         [ +  - ]:        220 :         return c->memory_min > 0 || unit_get_ancestor_memory_low(u) > 0 ||
     937   [ +  -  +  -  :        660 :                c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX ||
                   +  - ]
     938         [ -  + ]:        220 :                c->memory_swap_max != CGROUP_LIMIT_MAX;
     939                 :            : }
     940                 :            : 
     941                 :          0 : static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
     942                 :          0 :         char buf[DECIMAL_STR_MAX(uint64_t) + 1] = "max\n";
     943                 :            : 
     944         [ #  # ]:          0 :         if (v != CGROUP_LIMIT_MAX)
     945         [ #  # ]:          0 :                 xsprintf(buf, "%" PRIu64 "\n", v);
     946                 :            : 
     947                 :          0 :         (void) set_attribute_and_warn(u, "memory", file, buf);
     948                 :          0 : }
     949                 :            : 
     950                 :          0 : static void cgroup_apply_firewall(Unit *u) {
     951         [ #  # ]:          0 :         assert(u);
     952                 :            : 
     953                 :            :         /* Best-effort: let's apply IP firewalling and/or accounting if that's enabled */
     954                 :            : 
     955         [ #  # ]:          0 :         if (bpf_firewall_compile(u) < 0)
     956                 :          0 :                 return;
     957                 :            : 
     958                 :          0 :         (void) bpf_firewall_load_custom(u);
     959                 :          0 :         (void) bpf_firewall_install(u);
     960                 :            : }
     961                 :            : 
     962                 :         24 : static void cgroup_context_apply(
     963                 :            :                 Unit *u,
     964                 :            :                 CGroupMask apply_mask,
     965                 :            :                 ManagerState state) {
     966                 :            : 
     967                 :            :         const char *path;
     968                 :            :         CGroupContext *c;
     969                 :            :         bool is_host_root, is_local_root;
     970                 :            :         int r;
     971                 :            : 
     972         [ -  + ]:         24 :         assert(u);
     973                 :            : 
     974                 :            :         /* Nothing to do? Exit early! */
     975         [ -  + ]:         24 :         if (apply_mask == 0)
     976                 :          0 :                 return;
     977                 :            : 
     978                 :            :         /* Some cgroup attributes are not supported on the host root cgroup, hence silently ignore them here. And other
     979                 :            :          * attributes should only be managed for cgroups further down the tree. */
     980                 :         24 :         is_local_root = unit_has_name(u, SPECIAL_ROOT_SLICE);
     981                 :         24 :         is_host_root = unit_has_host_root_cgroup(u);
     982                 :            : 
     983         [ -  + ]:         24 :         assert_se(c = unit_get_cgroup_context(u));
     984         [ -  + ]:         24 :         assert_se(path = u->cgroup_path);
     985                 :            : 
     986         [ +  - ]:         24 :         if (is_local_root) /* Make sure we don't try to display messages with an empty path. */
     987                 :         24 :                 path = "/";
     988                 :            : 
     989                 :            :         /* We generally ignore errors caused by read-only mounted cgroup trees (assuming we are running in a container
     990                 :            :          * then), and missing cgroups, i.e. EROFS and ENOENT. */
     991                 :            : 
     992                 :            :         /* In fully unified mode these attributes don't exist on the host cgroup root. On legacy the weights exist, but
     993                 :            :          * setting the weight makes very little sense on the host root cgroup, as there are no other cgroups at this
     994                 :            :          * level. The quota exists there too, but any attempt to write to it is refused with EINVAL. Inside of
     995                 :            :          * containers we want to leave control of these to the container manager (and if cgroup v2 delegation is used
     996                 :            :          * we couldn't even write to them if we wanted to). */
     997   [ -  +  #  # ]:         24 :         if ((apply_mask & CGROUP_MASK_CPU) && !is_local_root) {
     998                 :            : 
     999         [ #  # ]:          0 :                 if (cg_all_unified() > 0) {
    1000                 :            :                         uint64_t weight;
    1001                 :            : 
    1002         [ #  # ]:          0 :                         if (cgroup_context_has_cpu_weight(c))
    1003                 :          0 :                                 weight = cgroup_context_cpu_weight(c, state);
    1004         [ #  # ]:          0 :                         else if (cgroup_context_has_cpu_shares(c)) {
    1005                 :            :                                 uint64_t shares;
    1006                 :            : 
    1007                 :          0 :                                 shares = cgroup_context_cpu_shares(c, state);
    1008                 :          0 :                                 weight = cgroup_cpu_shares_to_weight(shares);
    1009                 :            : 
    1010         [ #  # ]:          0 :                                 log_cgroup_compat(u, "Applying [Startup]CPUShares=%" PRIu64 " as [Startup]CPUWeight=%" PRIu64 " on %s",
    1011                 :            :                                                   shares, weight, path);
    1012                 :            :                         } else
    1013                 :          0 :                                 weight = CGROUP_WEIGHT_DEFAULT;
    1014                 :            : 
    1015                 :          0 :                         cgroup_apply_unified_cpu_weight(u, weight);
    1016                 :          0 :                         cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
    1017                 :            : 
    1018                 :            :                 } else {
    1019                 :            :                         uint64_t shares;
    1020                 :            : 
    1021         [ #  # ]:          0 :                         if (cgroup_context_has_cpu_weight(c)) {
    1022                 :            :                                 uint64_t weight;
    1023                 :            : 
    1024                 :          0 :                                 weight = cgroup_context_cpu_weight(c, state);
    1025                 :          0 :                                 shares = cgroup_cpu_weight_to_shares(weight);
    1026                 :            : 
    1027         [ #  # ]:          0 :                                 log_cgroup_compat(u, "Applying [Startup]CPUWeight=%" PRIu64 " as [Startup]CPUShares=%" PRIu64 " on %s",
    1028                 :            :                                                   weight, shares, path);
    1029         [ #  # ]:          0 :                         } else if (cgroup_context_has_cpu_shares(c))
    1030                 :          0 :                                 shares = cgroup_context_cpu_shares(c, state);
    1031                 :            :                         else
    1032                 :          0 :                                 shares = CGROUP_CPU_SHARES_DEFAULT;
    1033                 :            : 
    1034                 :          0 :                         cgroup_apply_legacy_cpu_shares(u, shares);
    1035                 :          0 :                         cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
    1036                 :            :                 }
    1037                 :            :         }
    1038                 :            : 
    1039                 :            :         /* The 'io' controller attributes are not exported on the host's root cgroup (being a pure cgroup v2
    1040                 :            :          * controller), and in case of containers we want to leave control of these attributes to the container manager
    1041                 :            :          * (and we couldn't access that stuff anyway, even if we tried if proper delegation is used). */
    1042   [ -  +  #  # ]:         24 :         if ((apply_mask & CGROUP_MASK_IO) && !is_local_root) {
    1043                 :            :                 char buf[8+DECIMAL_STR_MAX(uint64_t)+1];
    1044                 :            :                 bool has_io, has_blockio;
    1045                 :            :                 uint64_t weight;
    1046                 :            : 
    1047                 :          0 :                 has_io = cgroup_context_has_io_config(c);
    1048                 :          0 :                 has_blockio = cgroup_context_has_blockio_config(c);
    1049                 :            : 
    1050         [ #  # ]:          0 :                 if (has_io)
    1051                 :          0 :                         weight = cgroup_context_io_weight(c, state);
    1052         [ #  # ]:          0 :                 else if (has_blockio) {
    1053                 :            :                         uint64_t blkio_weight;
    1054                 :            : 
    1055                 :          0 :                         blkio_weight = cgroup_context_blkio_weight(c, state);
    1056                 :          0 :                         weight = cgroup_weight_blkio_to_io(blkio_weight);
    1057                 :            : 
    1058         [ #  # ]:          0 :                         log_cgroup_compat(u, "Applying [Startup]BlockIOWeight=%" PRIu64 " as [Startup]IOWeight=%" PRIu64,
    1059                 :            :                                           blkio_weight, weight);
    1060                 :            :                 } else
    1061                 :          0 :                         weight = CGROUP_WEIGHT_DEFAULT;
    1062                 :            : 
    1063         [ #  # ]:          0 :                 xsprintf(buf, "default %" PRIu64 "\n", weight);
    1064                 :          0 :                 (void) set_attribute_and_warn(u, "io", "io.weight", buf);
    1065                 :            : 
    1066                 :            :                 /* FIXME: drop this when distro kernels properly support BFQ through "io.weight"
    1067                 :            :                  * See also: https://github.com/systemd/systemd/pull/13335 */
    1068         [ #  # ]:          0 :                 xsprintf(buf, "%" PRIu64 "\n", weight);
    1069                 :          0 :                 (void) set_attribute_and_warn(u, "io", "io.bfq.weight", buf);
    1070                 :            : 
    1071         [ #  # ]:          0 :                 if (has_io) {
    1072                 :            :                         CGroupIODeviceLatency *latency;
    1073                 :            :                         CGroupIODeviceLimit *limit;
    1074                 :            :                         CGroupIODeviceWeight *w;
    1075                 :            : 
    1076         [ #  # ]:          0 :                         LIST_FOREACH(device_weights, w, c->io_device_weights)
    1077                 :          0 :                                 cgroup_apply_io_device_weight(u, w->path, w->weight);
    1078                 :            : 
    1079         [ #  # ]:          0 :                         LIST_FOREACH(device_limits, limit, c->io_device_limits)
    1080                 :          0 :                                 cgroup_apply_io_device_limit(u, limit->path, limit->limits);
    1081                 :            : 
    1082         [ #  # ]:          0 :                         LIST_FOREACH(device_latencies, latency, c->io_device_latencies)
    1083                 :          0 :                                 cgroup_apply_io_device_latency(u, latency->path, latency->target_usec);
    1084                 :            : 
    1085         [ #  # ]:          0 :                 } else if (has_blockio) {
    1086                 :            :                         CGroupBlockIODeviceWeight *w;
    1087                 :            :                         CGroupBlockIODeviceBandwidth *b;
    1088                 :            : 
    1089         [ #  # ]:          0 :                         LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
    1090                 :          0 :                                 weight = cgroup_weight_blkio_to_io(w->weight);
    1091                 :            : 
    1092         [ #  # ]:          0 :                                 log_cgroup_compat(u, "Applying BlockIODeviceWeight=%" PRIu64 " as IODeviceWeight=%" PRIu64 " for %s",
    1093                 :            :                                                   w->weight, weight, w->path);
    1094                 :            : 
    1095                 :          0 :                                 cgroup_apply_io_device_weight(u, w->path, weight);
    1096                 :            :                         }
    1097                 :            : 
    1098         [ #  # ]:          0 :                         LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
    1099                 :            :                                 uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
    1100                 :            :                                 CGroupIOLimitType type;
    1101                 :            : 
    1102         [ #  # ]:          0 :                                 for (type = 0; type < _CGROUP_IO_LIMIT_TYPE_MAX; type++)
    1103                 :          0 :                                         limits[type] = cgroup_io_limit_defaults[type];
    1104                 :            : 
    1105                 :          0 :                                 limits[CGROUP_IO_RBPS_MAX] = b->rbps;
    1106                 :          0 :                                 limits[CGROUP_IO_WBPS_MAX] = b->wbps;
    1107                 :            : 
    1108         [ #  # ]:          0 :                                 log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax= for %s",
    1109                 :            :                                                   b->rbps, b->wbps, b->path);
    1110                 :            : 
    1111                 :          0 :                                 cgroup_apply_io_device_limit(u, b->path, limits);
    1112                 :            :                         }
    1113                 :            :                 }
    1114                 :            :         }
    1115                 :            : 
    1116         [ -  + ]:         24 :         if (apply_mask & CGROUP_MASK_BLKIO) {
    1117                 :            :                 bool has_io, has_blockio;
    1118                 :            : 
    1119                 :          0 :                 has_io = cgroup_context_has_io_config(c);
    1120                 :          0 :                 has_blockio = cgroup_context_has_blockio_config(c);
    1121                 :            : 
    1122                 :            :                 /* Applying a 'weight' never makes sense for the host root cgroup, and for containers this should be
    1123                 :            :                  * left to our container manager, too. */
    1124         [ #  # ]:          0 :                 if (!is_local_root) {
    1125                 :            :                         char buf[DECIMAL_STR_MAX(uint64_t)+1];
    1126                 :            :                         uint64_t weight;
    1127                 :            : 
    1128         [ #  # ]:          0 :                         if (has_io) {
    1129                 :            :                                 uint64_t io_weight;
    1130                 :            : 
    1131                 :          0 :                                 io_weight = cgroup_context_io_weight(c, state);
    1132                 :          0 :                                 weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
    1133                 :            : 
    1134         [ #  # ]:          0 :                                 log_cgroup_compat(u, "Applying [Startup]IOWeight=%" PRIu64 " as [Startup]BlockIOWeight=%" PRIu64,
    1135                 :            :                                                   io_weight, weight);
    1136         [ #  # ]:          0 :                         } else if (has_blockio)
    1137                 :          0 :                                 weight = cgroup_context_blkio_weight(c, state);
    1138                 :            :                         else
    1139                 :          0 :                                 weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
    1140                 :            : 
    1141         [ #  # ]:          0 :                         xsprintf(buf, "%" PRIu64 "\n", weight);
    1142                 :          0 :                         (void) set_attribute_and_warn(u, "blkio", "blkio.weight", buf);
    1143                 :            : 
    1144         [ #  # ]:          0 :                         if (has_io) {
    1145                 :            :                                 CGroupIODeviceWeight *w;
    1146                 :            : 
    1147         [ #  # ]:          0 :                                 LIST_FOREACH(device_weights, w, c->io_device_weights) {
    1148                 :          0 :                                         weight = cgroup_weight_io_to_blkio(w->weight);
    1149                 :            : 
    1150         [ #  # ]:          0 :                                         log_cgroup_compat(u, "Applying IODeviceWeight=%" PRIu64 " as BlockIODeviceWeight=%" PRIu64 " for %s",
    1151                 :            :                                                           w->weight, weight, w->path);
    1152                 :            : 
    1153                 :          0 :                                         cgroup_apply_blkio_device_weight(u, w->path, weight);
    1154                 :            :                                 }
    1155         [ #  # ]:          0 :                         } else if (has_blockio) {
    1156                 :            :                                 CGroupBlockIODeviceWeight *w;
    1157                 :            : 
    1158         [ #  # ]:          0 :                                 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
    1159                 :          0 :                                         cgroup_apply_blkio_device_weight(u, w->path, w->weight);
    1160                 :            :                         }
    1161                 :            :                 }
    1162                 :            : 
    1163                 :            :                 /* The bandwidth limits are something that make sense to be applied to the host's root but not container
    1164                 :            :                  * roots, as there we want the container manager to handle it */
    1165   [ #  #  #  # ]:          0 :                 if (is_host_root || !is_local_root) {
    1166         [ #  # ]:          0 :                         if (has_io) {
    1167                 :            :                                 CGroupIODeviceLimit *l;
    1168                 :            : 
    1169         [ #  # ]:          0 :                                 LIST_FOREACH(device_limits, l, c->io_device_limits) {
    1170         [ #  # ]:          0 :                                         log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth=%" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax= for %s",
    1171                 :            :                                                           l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path);
    1172                 :            : 
    1173                 :          0 :                                         cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]);
    1174                 :            :                                 }
    1175         [ #  # ]:          0 :                         } else if (has_blockio) {
    1176                 :            :                                 CGroupBlockIODeviceBandwidth *b;
    1177                 :            : 
    1178         [ #  # ]:          0 :                                 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths)
    1179                 :          0 :                                         cgroup_apply_blkio_device_limit(u, b->path, b->rbps, b->wbps);
    1180                 :            :                         }
    1181                 :            :                 }
    1182                 :            :         }
    1183                 :            : 
    1184                 :            :         /* In unified mode 'memory' attributes do not exist on the root cgroup. In legacy mode 'memory.limit_in_bytes'
    1185                 :            :          * exists on the root cgroup, but any writes to it are refused with EINVAL. And if we run in a container we
    1186                 :            :          * want to leave control to the container manager (and if proper cgroup v2 delegation is used we couldn't even
    1187                 :            :          * write to this if we wanted to.) */
    1188   [ +  -  -  + ]:         24 :         if ((apply_mask & CGROUP_MASK_MEMORY) && !is_local_root) {
    1189                 :            : 
    1190         [ #  # ]:          0 :                 if (cg_all_unified() > 0) {
    1191                 :          0 :                         uint64_t max, swap_max = CGROUP_LIMIT_MAX;
    1192                 :            : 
    1193         [ #  # ]:          0 :                         if (unit_has_unified_memory_config(u)) {
    1194                 :          0 :                                 max = c->memory_max;
    1195                 :          0 :                                 swap_max = c->memory_swap_max;
    1196                 :            :                         } else {
    1197                 :          0 :                                 max = c->memory_limit;
    1198                 :            : 
    1199         [ #  # ]:          0 :                                 if (max != CGROUP_LIMIT_MAX)
    1200         [ #  # ]:          0 :                                         log_cgroup_compat(u, "Applying MemoryLimit=%" PRIu64 " as MemoryMax=", max);
    1201                 :            :                         }
    1202                 :            : 
    1203                 :          0 :                         cgroup_apply_unified_memory_limit(u, "memory.min", c->memory_min);
    1204                 :          0 :                         cgroup_apply_unified_memory_limit(u, "memory.low", unit_get_ancestor_memory_low(u));
    1205                 :          0 :                         cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
    1206                 :          0 :                         cgroup_apply_unified_memory_limit(u, "memory.max", max);
    1207                 :          0 :                         cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
    1208                 :            : 
    1209                 :          0 :                         (void) set_attribute_and_warn(u, "memory", "memory.oom.group", one_zero(c->memory_oom_group));
    1210                 :            : 
    1211                 :            :                 } else {
    1212                 :            :                         char buf[DECIMAL_STR_MAX(uint64_t) + 1];
    1213                 :            :                         uint64_t val;
    1214                 :            : 
    1215         [ #  # ]:          0 :                         if (unit_has_unified_memory_config(u)) {
    1216                 :          0 :                                 val = c->memory_max;
    1217         [ #  # ]:          0 :                                 log_cgroup_compat(u, "Applying MemoryMax=%" PRIi64 " as MemoryLimit=", val);
    1218                 :            :                         } else
    1219                 :          0 :                                 val = c->memory_limit;
    1220                 :            : 
    1221         [ #  # ]:          0 :                         if (val == CGROUP_LIMIT_MAX)
    1222                 :          0 :                                 strncpy(buf, "-1\n", sizeof(buf));
    1223                 :            :                         else
    1224         [ #  # ]:          0 :                                 xsprintf(buf, "%" PRIu64 "\n", val);
    1225                 :            : 
    1226                 :          0 :                         (void) set_attribute_and_warn(u, "memory", "memory.limit_in_bytes", buf);
    1227                 :            :                 }
    1228                 :            :         }
    1229                 :            : 
    1230                 :            :         /* On cgroup v2 we can apply BPF everywhere. On cgroup v1 we apply it everywhere except for the root of
    1231                 :            :          * containers, where we leave this to the manager */
    1232   [ -  +  #  # ]:         24 :         if ((apply_mask & (CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES)) &&
    1233   [ #  #  #  # ]:          0 :             (is_host_root || cg_all_unified() > 0 || !is_local_root)) {
    1234                 :          0 :                 _cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
    1235                 :            :                 CGroupDeviceAllow *a;
    1236                 :            : 
    1237         [ #  # ]:          0 :                 if (cg_all_unified() > 0) {
    1238                 :          0 :                         r = cgroup_init_device_bpf(&prog, c->device_policy, c->device_allow);
    1239         [ #  # ]:          0 :                         if (r < 0)
    1240         [ #  # ]:          0 :                                 log_unit_warning_errno(u, r, "Failed to initialize device control bpf program: %m");
    1241                 :            :                 } else {
    1242                 :            :                         /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore EINVAL
    1243                 :            :                          * here. */
    1244                 :            : 
    1245   [ #  #  #  # ]:          0 :                         if (c->device_allow || c->device_policy != CGROUP_AUTO)
    1246                 :          0 :                                 r = cg_set_attribute("devices", path, "devices.deny", "a");
    1247                 :            :                         else
    1248                 :          0 :                                 r = cg_set_attribute("devices", path, "devices.allow", "a");
    1249         [ #  # ]:          0 :                         if (r < 0)
    1250   [ #  #  #  #  :          0 :                                 log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
          #  #  #  #  #  
                      # ]
    1251                 :            :                                               "Failed to reset devices.allow/devices.deny: %m");
    1252                 :            :                 }
    1253                 :            : 
    1254         [ #  # ]:          0 :                 if (c->device_policy == CGROUP_CLOSED ||
    1255   [ #  #  #  # ]:          0 :                     (c->device_policy == CGROUP_AUTO && c->device_allow)) {
    1256                 :            :                         static const char auto_devices[] =
    1257                 :            :                                 "/dev/null\0" "rwm\0"
    1258                 :            :                                 "/dev/zero\0" "rwm\0"
    1259                 :            :                                 "/dev/full\0" "rwm\0"
    1260                 :            :                                 "/dev/random\0" "rwm\0"
    1261                 :            :                                 "/dev/urandom\0" "rwm\0"
    1262                 :            :                                 "/dev/tty\0" "rwm\0"
    1263                 :            :                                 "/dev/ptmx\0" "rwm\0"
    1264                 :            :                                 /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
    1265                 :            :                                 "/run/systemd/inaccessible/chr\0" "rwm\0"
    1266                 :            :                                 "/run/systemd/inaccessible/blk\0" "rwm\0";
    1267                 :            : 
    1268                 :            :                         const char *x, *y;
    1269                 :            : 
    1270   [ #  #  #  #  :          0 :                         NULSTR_FOREACH_PAIR(x, y, auto_devices)
                   #  # ]
    1271                 :          0 :                                 (void) whitelist_device(prog, path, x, y);
    1272                 :            : 
    1273                 :            :                         /* PTS (/dev/pts) devices may not be duplicated, but accessed */
    1274                 :          0 :                         (void) whitelist_major(prog, path, "pts", 'c', "rw");
    1275                 :            :                 }
    1276                 :            : 
    1277         [ #  # ]:          0 :                 LIST_FOREACH(device_allow, a, c->device_allow) {
    1278                 :            :                         char acc[4], *val;
    1279                 :          0 :                         unsigned k = 0;
    1280                 :            : 
    1281         [ #  # ]:          0 :                         if (a->r)
    1282                 :          0 :                                 acc[k++] = 'r';
    1283         [ #  # ]:          0 :                         if (a->w)
    1284                 :          0 :                                 acc[k++] = 'w';
    1285         [ #  # ]:          0 :                         if (a->m)
    1286                 :          0 :                                 acc[k++] = 'm';
    1287                 :            : 
    1288         [ #  # ]:          0 :                         if (k == 0)
    1289                 :          0 :                                 continue;
    1290                 :            : 
    1291                 :          0 :                         acc[k++] = 0;
    1292                 :            : 
    1293         [ #  # ]:          0 :                         if (path_startswith(a->path, "/dev/"))
    1294                 :          0 :                                 (void) whitelist_device(prog, path, a->path, acc);
    1295         [ #  # ]:          0 :                         else if ((val = startswith(a->path, "block-")))
    1296                 :          0 :                                 (void) whitelist_major(prog, path, val, 'b', acc);
    1297         [ #  # ]:          0 :                         else if ((val = startswith(a->path, "char-")))
    1298                 :          0 :                                 (void) whitelist_major(prog, path, val, 'c', acc);
    1299                 :            :                         else
    1300         [ #  # ]:          0 :                                 log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
    1301                 :            :                 }
    1302                 :            : 
    1303                 :          0 :                 r = cgroup_apply_device_bpf(u, prog, c->device_policy, c->device_allow);
    1304         [ #  # ]:          0 :                 if (r < 0) {
    1305                 :            :                         static bool warned = false;
    1306                 :            : 
    1307   [ #  #  #  # ]:          0 :                         log_full_errno(warned ? LOG_DEBUG : LOG_WARNING, r,
    1308                 :            :                                  "Unit %s configures device ACL, but the local system doesn't seem to support the BPF-based device controller.\n"
    1309                 :            :                                  "Proceeding WITHOUT applying ACL (all devices will be accessible)!\n"
    1310                 :            :                                  "(This warning is only shown for the first loaded unit using device ACL.)", u->id);
    1311                 :            : 
    1312                 :          0 :                         warned = true;
    1313                 :            :                 }
    1314                 :            :         }
    1315                 :            : 
    1316         [ +  - ]:         24 :         if (apply_mask & CGROUP_MASK_PIDS) {
    1317                 :            : 
    1318         [ -  + ]:         24 :                 if (is_host_root) {
    1319                 :            :                         /* So, the "pids" controller does not expose anything on the root cgroup, in order not to
    1320                 :            :                          * replicate knobs exposed elsewhere needlessly. We abstract this away here however, and when
    1321                 :            :                          * the knobs of the root cgroup are modified propagate this to the relevant sysctls. There's a
    1322                 :            :                          * non-obvious asymmetry however: unlike the cgroup properties we don't really want to take
    1323                 :            :                          * exclusive ownership of the sysctls, but we still want to honour things if the user sets
    1324                 :            :                          * limits. Hence we employ sort of a one-way strategy: when the user sets a bounded limit
    1325                 :            :                          * through us it counts. When the user afterwards unsets it again (i.e. sets it to unbounded)
    1326                 :            :                          * it also counts. But if the user never set a limit through us (i.e. we are the default of
    1327                 :            :                          * "unbounded") we leave things unmodified. For this we manage a global boolean that we turn on
    1328                 :            :                          * the first time we set a limit. Note that this boolean is flushed out on manager reload,
    1329                 :            :                          * which is desirable so that there's an official way to release control of the sysctl from
    1330                 :            :                          * systemd: set the limit to unbounded and reload. */
    1331                 :            : 
    1332         [ #  # ]:          0 :                         if (c->tasks_max != CGROUP_LIMIT_MAX) {
    1333                 :          0 :                                 u->manager->sysctl_pid_max_changed = true;
    1334                 :          0 :                                 r = procfs_tasks_set_limit(c->tasks_max);
    1335         [ #  # ]:          0 :                         } else if (u->manager->sysctl_pid_max_changed)
    1336                 :          0 :                                 r = procfs_tasks_set_limit(TASKS_MAX);
    1337                 :            :                         else
    1338                 :          0 :                                 r = 0;
    1339         [ #  # ]:          0 :                         if (r < 0)
    1340   [ #  #  #  #  :          0 :                                 log_unit_full(u, LOG_LEVEL_CGROUP_WRITE(r), r,
          #  #  #  #  #  
                      # ]
    1341                 :            :                                               "Failed to write to tasks limit sysctls: %m");
    1342                 :            :                 }
    1343                 :            : 
    1344                 :            :                 /* The attribute itself is not available on the host root cgroup, and in the container case we want to
    1345                 :            :                  * leave it for the container manager. */
    1346         [ -  + ]:         24 :                 if (!is_local_root) {
    1347         [ #  # ]:          0 :                         if (c->tasks_max != CGROUP_LIMIT_MAX) {
    1348                 :            :                                 char buf[DECIMAL_STR_MAX(uint64_t) + 2];
    1349                 :            : 
    1350                 :          0 :                                 sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
    1351                 :          0 :                                 (void) set_attribute_and_warn(u, "pids", "pids.max", buf);
    1352                 :            :                         } else
    1353                 :          0 :                                 (void) set_attribute_and_warn(u, "pids", "pids.max", "max\n");
    1354                 :            :                 }
    1355                 :            :         }
    1356                 :            : 
    1357         [ -  + ]:         24 :         if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
    1358                 :          0 :                 cgroup_apply_firewall(u);
    1359                 :            : }
    1360                 :            : 
    1361                 :       2316 : static bool unit_get_needs_bpf_firewall(Unit *u) {
    1362                 :            :         CGroupContext *c;
    1363                 :            :         Unit *p;
    1364         [ -  + ]:       2316 :         assert(u);
    1365                 :            : 
    1366                 :       2316 :         c = unit_get_cgroup_context(u);
    1367         [ -  + ]:       2316 :         if (!c)
    1368                 :          0 :                 return false;
    1369                 :            : 
    1370         [ +  - ]:       2316 :         if (c->ip_accounting ||
    1371         [ +  - ]:       2316 :             c->ip_address_allow ||
    1372         [ +  - ]:       2316 :             c->ip_address_deny ||
    1373         [ +  - ]:       2316 :             c->ip_filters_ingress ||
    1374         [ -  + ]:       2316 :             c->ip_filters_egress)
    1375                 :          0 :                 return true;
    1376                 :            : 
    1377                 :            :         /* If any parent slice has an IP access list defined, it applies too */
    1378         [ +  + ]:       4648 :         for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
    1379                 :       2332 :                 c = unit_get_cgroup_context(p);
    1380         [ -  + ]:       2332 :                 if (!c)
    1381                 :          0 :                         return false;
    1382                 :            : 
    1383         [ +  - ]:       2332 :                 if (c->ip_address_allow ||
    1384         [ -  + ]:       2332 :                     c->ip_address_deny)
    1385                 :          0 :                         return true;
    1386                 :            :         }
    1387                 :            : 
    1388                 :       2316 :         return false;
    1389                 :            : }
    1390                 :            : 
    1391                 :       2316 : static CGroupMask unit_get_cgroup_mask(Unit *u) {
    1392                 :       2316 :         CGroupMask mask = 0;
    1393                 :            :         CGroupContext *c;
    1394                 :            : 
    1395         [ -  + ]:       2316 :         assert(u);
    1396                 :            : 
    1397                 :       2316 :         c = unit_get_cgroup_context(u);
    1398                 :            : 
    1399         [ -  + ]:       2316 :         assert(c);
    1400                 :            : 
    1401                 :            :         /* Figure out which controllers we need, based on the cgroup context object */
    1402                 :            : 
    1403         [ +  + ]:       2316 :         if (c->cpu_accounting)
    1404                 :         20 :                 mask |= get_cpu_accounting_mask();
    1405                 :            : 
    1406   [ +  -  +  + ]:       4632 :         if (cgroup_context_has_cpu_weight(c) ||
    1407                 :       2316 :             cgroup_context_has_cpu_shares(c) ||
    1408         [ -  + ]:       2296 :             c->cpu_quota_per_sec_usec != USEC_INFINITY)
    1409                 :         20 :                 mask |= CGROUP_MASK_CPU;
    1410                 :            : 
    1411   [ +  +  -  + ]:       2316 :         if (cgroup_context_has_io_config(c) || cgroup_context_has_blockio_config(c))
    1412                 :         40 :                 mask |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
    1413                 :            : 
    1414         [ +  + ]:       2316 :         if (c->memory_accounting ||
    1415   [ +  +  -  + ]:        460 :             c->memory_limit != CGROUP_LIMIT_MAX ||
    1416                 :        220 :             unit_has_unified_memory_config(u))
    1417                 :       2096 :                 mask |= CGROUP_MASK_MEMORY;
    1418                 :            : 
    1419         [ +  - ]:       2316 :         if (c->device_allow ||
    1420         [ -  + ]:       2316 :             c->device_policy != CGROUP_AUTO)
    1421                 :          0 :                 mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
    1422                 :            : 
    1423         [ +  + ]:       2316 :         if (c->tasks_accounting ||
    1424         [ -  + ]:        260 :             c->tasks_max != CGROUP_LIMIT_MAX)
    1425                 :       2056 :                 mask |= CGROUP_MASK_PIDS;
    1426                 :            : 
    1427                 :       2316 :         return CGROUP_MASK_EXTEND_JOINED(mask);
    1428                 :            : }
    1429                 :            : 
    1430                 :       2316 : static CGroupMask unit_get_bpf_mask(Unit *u) {
    1431                 :       2316 :         CGroupMask mask = 0;
    1432                 :            : 
    1433                 :            :         /* Figure out which controllers we need, based on the cgroup context, possibly taking into account children
    1434                 :            :          * too. */
    1435                 :            : 
    1436         [ -  + ]:       2316 :         if (unit_get_needs_bpf_firewall(u))
    1437                 :          0 :                 mask |= CGROUP_MASK_BPF_FIREWALL;
    1438                 :            : 
    1439                 :       2316 :         return mask;
    1440                 :            : }
    1441                 :            : 
    1442                 :       6396 : CGroupMask unit_get_own_mask(Unit *u) {
    1443                 :            :         CGroupContext *c;
    1444                 :            : 
    1445                 :            :         /* Returns the mask of controllers the unit needs for itself. If a unit is not properly loaded, return an empty
    1446                 :            :          * mask, as we shouldn't reflect it in the cgroup hierarchy then. */
    1447                 :            : 
    1448         [ +  + ]:       6396 :         if (u->load_state != UNIT_LOADED)
    1449                 :        744 :                 return 0;
    1450                 :            : 
    1451                 :       5652 :         c = unit_get_cgroup_context(u);
    1452         [ +  + ]:       5652 :         if (!c)
    1453                 :       3336 :                 return 0;
    1454                 :            : 
    1455                 :       2316 :         return (unit_get_cgroup_mask(u) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u);
    1456                 :            : }
    1457                 :            : 
    1458                 :       7124 : CGroupMask unit_get_delegate_mask(Unit *u) {
    1459                 :            :         CGroupContext *c;
    1460                 :            : 
    1461                 :            :         /* If delegation is turned on, then turn on selected controllers, unless we are on the legacy hierarchy and the
    1462                 :            :          * process we fork into is known to drop privileges, and hence shouldn't get access to the controllers.
    1463                 :            :          *
    1464                 :            :          * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */
    1465                 :            : 
    1466         [ +  - ]:       7124 :         if (!unit_cgroup_delegate(u))
    1467                 :       7124 :                 return 0;
    1468                 :            : 
    1469         [ #  # ]:          0 :         if (cg_all_unified() <= 0) {
    1470                 :            :                 ExecContext *e;
    1471                 :            : 
    1472                 :          0 :                 e = unit_get_exec_context(u);
    1473   [ #  #  #  # ]:          0 :                 if (e && !exec_context_maintains_privileges(e))
    1474                 :          0 :                         return 0;
    1475                 :            :         }
    1476                 :            : 
    1477         [ #  # ]:          0 :         assert_se(c = unit_get_cgroup_context(u));
    1478                 :          0 :         return CGROUP_MASK_EXTEND_JOINED(c->delegate_controllers);
    1479                 :            : }
    1480                 :            : 
    1481                 :       6580 : CGroupMask unit_get_members_mask(Unit *u) {
    1482         [ -  + ]:       6580 :         assert(u);
    1483                 :            : 
    1484                 :            :         /* Returns the mask of controllers all of the unit's children require, merged */
    1485                 :            : 
    1486         [ +  + ]:       6580 :         if (u->cgroup_members_mask_valid)
    1487                 :       5052 :                 return u->cgroup_members_mask; /* Use cached value if possible */
    1488                 :            : 
    1489                 :       1528 :         u->cgroup_members_mask = 0;
    1490                 :            : 
    1491         [ +  + ]:       1528 :         if (u->type == UNIT_SLICE) {
    1492                 :            :                 void *v;
    1493                 :            :                 Unit *member;
    1494                 :            :                 Iterator i;
    1495                 :            : 
    1496         [ +  + ]:       1448 :                 HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) {
    1497         [ +  + ]:       1384 :                         if (UNIT_DEREF(member->slice) == u)
    1498                 :       1372 :                                 u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
    1499                 :            :                 }
    1500                 :            :         }
    1501                 :            : 
    1502                 :       1528 :         u->cgroup_members_mask_valid = true;
    1503                 :       1528 :         return u->cgroup_members_mask;
    1504                 :            : }
    1505                 :            : 
    1506                 :        176 : CGroupMask unit_get_siblings_mask(Unit *u) {
    1507         [ -  + ]:        176 :         assert(u);
    1508                 :            : 
    1509                 :            :         /* Returns the mask of controllers all of the unit's siblings
    1510                 :            :          * require, i.e. the members mask of the unit's parent slice
    1511                 :            :          * if there is one. */
    1512                 :            : 
    1513         [ +  + ]:        176 :         if (UNIT_ISSET(u->slice))
    1514                 :        136 :                 return unit_get_members_mask(UNIT_DEREF(u->slice));
    1515                 :            : 
    1516                 :         40 :         return unit_get_subtree_mask(u); /* we are the top-level slice */
    1517                 :            : }
    1518                 :            : 
    1519                 :       4960 : CGroupMask unit_get_disable_mask(Unit *u) {
    1520                 :            :         CGroupContext *c;
    1521                 :            : 
    1522                 :       4960 :         c = unit_get_cgroup_context(u);
    1523         [ -  + ]:       4960 :         if (!c)
    1524                 :          0 :                 return 0;
    1525                 :            : 
    1526                 :       4960 :         return c->disable_controllers;
    1527                 :            : }
    1528                 :            : 
    1529                 :       4960 : CGroupMask unit_get_ancestor_disable_mask(Unit *u) {
    1530                 :            :         CGroupMask mask;
    1531                 :            : 
    1532         [ -  + ]:       4960 :         assert(u);
    1533                 :       4960 :         mask = unit_get_disable_mask(u);
    1534                 :            : 
    1535                 :            :         /* Returns the mask of controllers which are marked as forcibly
    1536                 :            :          * disabled in any ancestor unit or the unit in question. */
    1537                 :            : 
    1538         [ +  + ]:       4960 :         if (UNIT_ISSET(u->slice))
    1539                 :       2484 :                 mask |= unit_get_ancestor_disable_mask(UNIT_DEREF(u->slice));
    1540                 :            : 
    1541                 :       4960 :         return mask;
    1542                 :            : }
    1543                 :            : 
    1544                 :       1412 : CGroupMask unit_get_subtree_mask(Unit *u) {
    1545                 :            : 
    1546                 :            :         /* Returns the mask of this subtree, meaning of the group
    1547                 :            :          * itself and its children. */
    1548                 :            : 
    1549                 :       1412 :         return unit_get_own_mask(u) | unit_get_members_mask(u);
    1550                 :            : }
    1551                 :            : 
    1552                 :        112 : CGroupMask unit_get_target_mask(Unit *u) {
    1553                 :            :         CGroupMask mask;
    1554                 :            : 
    1555                 :            :         /* This returns the cgroup mask of all controllers to enable
    1556                 :            :          * for a specific cgroup, i.e. everything it needs itself,
    1557                 :            :          * plus all that its children need, plus all that its siblings
    1558                 :            :          * need. This is primarily useful on the legacy cgroup
    1559                 :            :          * hierarchy, where we need to duplicate each cgroup in each
    1560                 :            :          * hierarchy that shall be enabled for it. */
    1561                 :            : 
    1562                 :        112 :         mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
    1563                 :            : 
    1564         [ -  + ]:        112 :         if (mask & CGROUP_MASK_BPF_FIREWALL & ~u->manager->cgroup_supported)
    1565                 :          0 :                 emit_bpf_firewall_warning(u);
    1566                 :            : 
    1567                 :        112 :         mask &= u->manager->cgroup_supported;
    1568                 :        112 :         mask &= ~unit_get_ancestor_disable_mask(u);
    1569                 :            : 
    1570                 :        112 :         return mask;
    1571                 :            : }
    1572                 :            : 
    1573                 :         48 : CGroupMask unit_get_enable_mask(Unit *u) {
    1574                 :            :         CGroupMask mask;
    1575                 :            : 
    1576                 :            :         /* This returns the cgroup mask of all controllers to enable
    1577                 :            :          * for the children of a specific cgroup. This is primarily
    1578                 :            :          * useful for the unified cgroup hierarchy, where each cgroup
    1579                 :            :          * controls which controllers are enabled for its children. */
    1580                 :            : 
    1581                 :         48 :         mask = unit_get_members_mask(u);
    1582                 :         48 :         mask &= u->manager->cgroup_supported;
    1583                 :         48 :         mask &= ~unit_get_ancestor_disable_mask(u);
    1584                 :            : 
    1585                 :         48 :         return mask;
    1586                 :            : }
    1587                 :            : 
    1588                 :       9028 : void unit_invalidate_cgroup_members_masks(Unit *u) {
    1589         [ -  + ]:       9028 :         assert(u);
    1590                 :            : 
    1591                 :            :         /* Recurse invalidate the member masks cache all the way up the tree */
    1592                 :       9028 :         u->cgroup_members_mask_valid = false;
    1593                 :            : 
    1594         [ +  + ]:       9028 :         if (UNIT_ISSET(u->slice))
    1595                 :       1231 :                 unit_invalidate_cgroup_members_masks(UNIT_DEREF(u->slice));
    1596                 :       9028 : }
    1597                 :            : 
    1598                 :          0 : const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
    1599                 :            : 
    1600                 :            :         /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */
    1601                 :            : 
    1602         [ #  # ]:          0 :         while (u) {
    1603                 :            : 
    1604   [ #  #  #  # ]:          0 :                 if (u->cgroup_path &&
    1605                 :          0 :                     u->cgroup_realized &&
    1606         [ #  # ]:          0 :                     FLAGS_SET(u->cgroup_realized_mask, mask))
    1607                 :          0 :                         return u->cgroup_path;
    1608                 :            : 
    1609                 :          0 :                 u = UNIT_DEREF(u->slice);
    1610                 :            :         }
    1611                 :            : 
    1612                 :          0 :         return NULL;
    1613                 :            : }
    1614                 :            : 
    1615                 :          0 : static const char *migrate_callback(CGroupMask mask, void *userdata) {
    1616                 :          0 :         return unit_get_realized_cgroup_path(userdata, mask);
    1617                 :            : }
    1618                 :            : 
    1619                 :         48 : char *unit_default_cgroup_path(const Unit *u) {
    1620                 :         48 :         _cleanup_free_ char *escaped = NULL, *slice = NULL;
    1621                 :            :         int r;
    1622                 :            : 
    1623         [ -  + ]:         48 :         assert(u);
    1624                 :            : 
    1625         [ +  + ]:         48 :         if (unit_has_name(u, SPECIAL_ROOT_SLICE))
    1626                 :         24 :                 return strdup(u->manager->cgroup_root);
    1627                 :            : 
    1628   [ +  -  -  + ]:         24 :         if (UNIT_ISSET(u->slice) && !unit_has_name(UNIT_DEREF(u->slice), SPECIAL_ROOT_SLICE)) {
    1629                 :          0 :                 r = cg_slice_to_path(UNIT_DEREF(u->slice)->id, &slice);
    1630         [ #  # ]:          0 :                 if (r < 0)
    1631                 :          0 :                         return NULL;
    1632                 :            :         }
    1633                 :            : 
    1634                 :         24 :         escaped = cg_escape(u->id);
    1635         [ -  + ]:         24 :         if (!escaped)
    1636                 :          0 :                 return NULL;
    1637                 :            : 
    1638                 :         24 :         return path_join(empty_to_root(u->manager->cgroup_root), slice, escaped);
    1639                 :            : }
    1640                 :            : 
    1641                 :         48 : int unit_set_cgroup_path(Unit *u, const char *path) {
    1642                 :         48 :         _cleanup_free_ char *p = NULL;
    1643                 :            :         int r;
    1644                 :            : 
    1645         [ -  + ]:         48 :         assert(u);
    1646                 :            : 
    1647         [ -  + ]:         48 :         if (streq_ptr(u->cgroup_path, path))
    1648                 :          0 :                 return 0;
    1649                 :            : 
    1650         [ +  - ]:         48 :         if (path) {
    1651                 :         48 :                 p = strdup(path);
    1652         [ -  + ]:         48 :                 if (!p)
    1653                 :          0 :                         return -ENOMEM;
    1654                 :            :         }
    1655                 :            : 
    1656         [ +  - ]:         48 :         if (p) {
    1657                 :         48 :                 r = hashmap_put(u->manager->cgroup_unit, p, u);
    1658         [ -  + ]:         48 :                 if (r < 0)
    1659                 :          0 :                         return r;
    1660                 :            :         }
    1661                 :            : 
    1662                 :         48 :         unit_release_cgroup(u);
    1663                 :         48 :         u->cgroup_path = TAKE_PTR(p);
    1664                 :            : 
    1665                 :         48 :         return 1;
    1666                 :            : }
    1667                 :            : 
    1668                 :         24 : int unit_watch_cgroup(Unit *u) {
    1669                 :         24 :         _cleanup_free_ char *events = NULL;
    1670                 :            :         int r;
    1671                 :            : 
    1672         [ -  + ]:         24 :         assert(u);
    1673                 :            : 
    1674                 :            :         /* Watches the "cgroups.events" attribute of this unit's cgroup for "empty" events, but only if
    1675                 :            :          * cgroupv2 is available. */
    1676                 :            : 
    1677         [ -  + ]:         24 :         if (!u->cgroup_path)
    1678                 :          0 :                 return 0;
    1679                 :            : 
    1680         [ -  + ]:         24 :         if (u->cgroup_control_inotify_wd >= 0)
    1681                 :          0 :                 return 0;
    1682                 :            : 
    1683                 :            :         /* Only applies to the unified hierarchy */
    1684                 :         24 :         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
    1685         [ -  + ]:         24 :         if (r < 0)
    1686         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to determine whether the name=systemd hierarchy is unified: %m");
    1687         [ -  + ]:         24 :         if (r == 0)
    1688                 :          0 :                 return 0;
    1689                 :            : 
    1690                 :            :         /* No point in watch the top-level slice, it's never going to run empty. */
    1691         [ +  - ]:         24 :         if (unit_has_name(u, SPECIAL_ROOT_SLICE))
    1692                 :         24 :                 return 0;
    1693                 :            : 
    1694                 :          0 :         r = hashmap_ensure_allocated(&u->manager->cgroup_control_inotify_wd_unit, &trivial_hash_ops);
    1695         [ #  # ]:          0 :         if (r < 0)
    1696                 :          0 :                 return log_oom();
    1697                 :            : 
    1698                 :          0 :         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events", &events);
    1699         [ #  # ]:          0 :         if (r < 0)
    1700                 :          0 :                 return log_oom();
    1701                 :            : 
    1702                 :          0 :         u->cgroup_control_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
    1703         [ #  # ]:          0 :         if (u->cgroup_control_inotify_wd < 0) {
    1704                 :            : 
    1705         [ #  # ]:          0 :                 if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
    1706                 :            :                                       * is not an error */
    1707                 :          0 :                         return 0;
    1708                 :            : 
    1709         [ #  # ]:          0 :                 return log_unit_error_errno(u, errno, "Failed to add control inotify watch descriptor for control group %s: %m", u->cgroup_path);
    1710                 :            :         }
    1711                 :            : 
    1712                 :          0 :         r = hashmap_put(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd), u);
    1713         [ #  # ]:          0 :         if (r < 0)
    1714         [ #  # ]:          0 :                 return log_unit_error_errno(u, r, "Failed to add control inotify watch descriptor to hash map: %m");
    1715                 :            : 
    1716                 :          0 :         return 0;
    1717                 :            : }
    1718                 :            : 
    1719                 :         24 : int unit_watch_cgroup_memory(Unit *u) {
    1720                 :         24 :         _cleanup_free_ char *events = NULL;
    1721                 :            :         CGroupContext *c;
    1722                 :            :         int r;
    1723                 :            : 
    1724         [ -  + ]:         24 :         assert(u);
    1725                 :            : 
    1726                 :            :         /* Watches the "memory.events" attribute of this unit's cgroup for "oom_kill" events, but only if
    1727                 :            :          * cgroupv2 is available. */
    1728                 :            : 
    1729         [ -  + ]:         24 :         if (!u->cgroup_path)
    1730                 :          0 :                 return 0;
    1731                 :            : 
    1732                 :         24 :         c = unit_get_cgroup_context(u);
    1733         [ -  + ]:         24 :         if (!c)
    1734                 :          0 :                 return 0;
    1735                 :            : 
    1736                 :            :         /* The "memory.events" attribute is only available if the memory controller is on. Let's hence tie
    1737                 :            :          * this to memory accounting, in a way watching for OOM kills is a form of memory accounting after
    1738                 :            :          * all. */
    1739         [ -  + ]:         24 :         if (!c->memory_accounting)
    1740                 :          0 :                 return 0;
    1741                 :            : 
    1742                 :            :         /* Don't watch inner nodes, as the kernel doesn't report oom_kill events recursively currently, and
    1743                 :            :          * we also don't want to generate a log message for each parent cgroup of a process. */
    1744         [ +  - ]:         24 :         if (u->type == UNIT_SLICE)
    1745                 :         24 :                 return 0;
    1746                 :            : 
    1747         [ #  # ]:          0 :         if (u->cgroup_memory_inotify_wd >= 0)
    1748                 :          0 :                 return 0;
    1749                 :            : 
    1750                 :            :         /* Only applies to the unified hierarchy */
    1751                 :          0 :         r = cg_all_unified();
    1752         [ #  # ]:          0 :         if (r < 0)
    1753         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to determine whether the memory controller is unified: %m");
    1754         [ #  # ]:          0 :         if (r == 0)
    1755                 :          0 :                 return 0;
    1756                 :            : 
    1757                 :          0 :         r = hashmap_ensure_allocated(&u->manager->cgroup_memory_inotify_wd_unit, &trivial_hash_ops);
    1758         [ #  # ]:          0 :         if (r < 0)
    1759                 :          0 :                 return log_oom();
    1760                 :            : 
    1761                 :          0 :         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "memory.events", &events);
    1762         [ #  # ]:          0 :         if (r < 0)
    1763                 :          0 :                 return log_oom();
    1764                 :            : 
    1765                 :          0 :         u->cgroup_memory_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
    1766         [ #  # ]:          0 :         if (u->cgroup_memory_inotify_wd < 0) {
    1767                 :            : 
    1768         [ #  # ]:          0 :                 if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
    1769                 :            :                                       * is not an error */
    1770                 :          0 :                         return 0;
    1771                 :            : 
    1772         [ #  # ]:          0 :                 return log_unit_error_errno(u, errno, "Failed to add memory inotify watch descriptor for control group %s: %m", u->cgroup_path);
    1773                 :            :         }
    1774                 :            : 
    1775                 :          0 :         r = hashmap_put(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd), u);
    1776         [ #  # ]:          0 :         if (r < 0)
    1777         [ #  # ]:          0 :                 return log_unit_error_errno(u, r, "Failed to add memory inotify watch descriptor to hash map: %m");
    1778                 :            : 
    1779                 :          0 :         return 0;
    1780                 :            : }
    1781                 :            : 
    1782                 :         72 : int unit_pick_cgroup_path(Unit *u) {
    1783                 :         72 :         _cleanup_free_ char *path = NULL;
    1784                 :            :         int r;
    1785                 :            : 
    1786         [ -  + ]:         72 :         assert(u);
    1787                 :            : 
    1788         [ +  + ]:         72 :         if (u->cgroup_path)
    1789                 :         24 :                 return 0;
    1790                 :            : 
    1791         [ -  + ]:         48 :         if (!UNIT_HAS_CGROUP_CONTEXT(u))
    1792                 :          0 :                 return -EINVAL;
    1793                 :            : 
    1794                 :         48 :         path = unit_default_cgroup_path(u);
    1795         [ -  + ]:         48 :         if (!path)
    1796                 :          0 :                 return log_oom();
    1797                 :            : 
    1798                 :         48 :         r = unit_set_cgroup_path(u, path);
    1799         [ -  + ]:         48 :         if (r == -EEXIST)
    1800         [ #  # ]:          0 :                 return log_unit_error_errno(u, r, "Control group %s exists already.", path);
    1801         [ -  + ]:         48 :         if (r < 0)
    1802         [ #  # ]:          0 :                 return log_unit_error_errno(u, r, "Failed to set unit's control group path to %s: %m", path);
    1803                 :            : 
    1804                 :         48 :         return 0;
    1805                 :            : }
    1806                 :            : 
    1807                 :         48 : static int unit_create_cgroup(
    1808                 :            :                 Unit *u,
    1809                 :            :                 CGroupMask target_mask,
    1810                 :            :                 CGroupMask enable_mask,
    1811                 :            :                 ManagerState state) {
    1812                 :            : 
    1813                 :            :         bool created;
    1814                 :            :         int r;
    1815                 :            : 
    1816         [ -  + ]:         48 :         assert(u);
    1817                 :            : 
    1818         [ -  + ]:         48 :         if (!UNIT_HAS_CGROUP_CONTEXT(u))
    1819                 :          0 :                 return 0;
    1820                 :            : 
    1821                 :            :         /* Figure out our cgroup path */
    1822                 :         48 :         r = unit_pick_cgroup_path(u);
    1823         [ -  + ]:         48 :         if (r < 0)
    1824                 :          0 :                 return r;
    1825                 :            : 
    1826                 :            :         /* First, create our own group */
    1827                 :         48 :         r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
    1828         [ +  + ]:         48 :         if (r < 0)
    1829         [ +  - ]:         24 :                 return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", u->cgroup_path);
    1830                 :         24 :         created = r;
    1831                 :            : 
    1832                 :            :         /* Start watching it */
    1833                 :         24 :         (void) unit_watch_cgroup(u);
    1834                 :         24 :         (void) unit_watch_cgroup_memory(u);
    1835                 :            : 
    1836                 :            :         /* Preserve enabled controllers in delegated units, adjust others. */
    1837   [ +  -  -  +  :         24 :         if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) {
                   #  # ]
    1838                 :         24 :                 CGroupMask result_mask = 0;
    1839                 :            : 
    1840                 :            :                 /* Enable all controllers we need */
    1841                 :         24 :                 r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask);
    1842         [ -  + ]:         24 :                 if (r < 0)
    1843         [ #  # ]:          0 :                         log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path);
    1844                 :            : 
    1845                 :            :                 /* If we just turned off a controller, this might release the controller for our parent too, let's
    1846                 :            :                  * enqueue the parent for re-realization in that case again. */
    1847         [ -  + ]:         24 :                 if (UNIT_ISSET(u->slice)) {
    1848                 :            :                         CGroupMask turned_off;
    1849                 :            : 
    1850         [ #  # ]:          0 :                         turned_off = (u->cgroup_realized ? u->cgroup_enabled_mask & ~result_mask : 0);
    1851         [ #  # ]:          0 :                         if (turned_off != 0) {
    1852                 :            :                                 Unit *parent;
    1853                 :            : 
    1854                 :            :                                 /* Force the parent to propagate the enable mask to the kernel again, by invalidating
    1855                 :            :                                  * the controller we just turned off. */
    1856                 :            : 
    1857         [ #  # ]:          0 :                                 for (parent = UNIT_DEREF(u->slice); parent; parent = UNIT_DEREF(parent->slice))
    1858                 :          0 :                                         unit_invalidate_cgroup(parent, turned_off);
    1859                 :            :                         }
    1860                 :            :                 }
    1861                 :            : 
    1862                 :            :                 /* Remember what's actually enabled now */
    1863                 :         24 :                 u->cgroup_enabled_mask = result_mask;
    1864                 :            :         }
    1865                 :            : 
    1866                 :            :         /* Keep track that this is now realized */
    1867                 :         24 :         u->cgroup_realized = true;
    1868                 :         24 :         u->cgroup_realized_mask = target_mask;
    1869                 :            : 
    1870   [ -  +  #  # ]:         24 :         if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
    1871                 :            : 
    1872                 :            :                 /* Then, possibly move things over, but not if
    1873                 :            :                  * subgroups may contain processes, which is the case
    1874                 :            :                  * for slice and delegation units. */
    1875                 :          0 :                 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
    1876         [ #  # ]:          0 :                 if (r < 0)
    1877         [ #  # ]:          0 :                         log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path);
    1878                 :            :         }
    1879                 :            : 
    1880                 :            :         /* Set attributes */
    1881                 :         24 :         cgroup_context_apply(u, target_mask, state);
    1882                 :         24 :         cgroup_xattr_apply(u);
    1883                 :            : 
    1884                 :         24 :         return 0;
    1885                 :            : }
    1886                 :            : 
    1887                 :          0 : static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) {
    1888                 :          0 :         _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
    1889                 :            :         char *pp;
    1890                 :            :         int r;
    1891                 :            : 
    1892         [ #  # ]:          0 :         assert(u);
    1893                 :            : 
    1894         [ #  # ]:          0 :         if (MANAGER_IS_SYSTEM(u->manager))
    1895                 :          0 :                 return -EINVAL;
    1896                 :            : 
    1897         [ #  # ]:          0 :         if (!u->manager->system_bus)
    1898                 :          0 :                 return -EIO;
    1899                 :            : 
    1900         [ #  # ]:          0 :         if (!u->cgroup_path)
    1901                 :          0 :                 return -EINVAL;
    1902                 :            : 
    1903                 :            :         /* Determine this unit's cgroup path relative to our cgroup root */
    1904                 :          0 :         pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
    1905         [ #  # ]:          0 :         if (!pp)
    1906                 :          0 :                 return -EINVAL;
    1907                 :            : 
    1908   [ #  #  #  #  :          0 :         pp = strjoina("/", pp, suffix_path);
          #  #  #  #  #  
                #  #  # ]
    1909                 :          0 :         path_simplify(pp, false);
    1910                 :            : 
    1911                 :          0 :         r = sd_bus_call_method(u->manager->system_bus,
    1912                 :            :                                "org.freedesktop.systemd1",
    1913                 :            :                                "/org/freedesktop/systemd1",
    1914                 :            :                                "org.freedesktop.systemd1.Manager",
    1915                 :            :                                "AttachProcessesToUnit",
    1916                 :            :                                &error, NULL,
    1917                 :            :                                "ssau",
    1918                 :            :                                NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid);
    1919         [ #  # ]:          0 :         if (r < 0)
    1920         [ #  # ]:          0 :                 return log_unit_debug_errno(u, r, "Failed to attach unit process " PID_FMT " via the bus: %s", pid, bus_error_message(&error, r));
    1921                 :            : 
    1922                 :          0 :         return 0;
    1923                 :            : }
    1924                 :            : 
    1925                 :          0 : int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
    1926                 :            :         CGroupMask delegated_mask;
    1927                 :            :         const char *p;
    1928                 :            :         Iterator i;
    1929                 :            :         void *pidp;
    1930                 :            :         int r, q;
    1931                 :            : 
    1932         [ #  # ]:          0 :         assert(u);
    1933                 :            : 
    1934         [ #  # ]:          0 :         if (!UNIT_HAS_CGROUP_CONTEXT(u))
    1935                 :          0 :                 return -EINVAL;
    1936                 :            : 
    1937         [ #  # ]:          0 :         if (set_isempty(pids))
    1938                 :          0 :                 return 0;
    1939                 :            : 
    1940                 :            :         /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable.
    1941                 :            :          * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */
    1942                 :          0 :         r = bpf_firewall_load_custom(u);
    1943         [ #  # ]:          0 :         if (r < 0)
    1944                 :          0 :                 return r;
    1945                 :            : 
    1946                 :          0 :         r = unit_realize_cgroup(u);
    1947         [ #  # ]:          0 :         if (r < 0)
    1948                 :          0 :                 return r;
    1949                 :            : 
    1950         [ #  # ]:          0 :         if (isempty(suffix_path))
    1951                 :          0 :                 p = u->cgroup_path;
    1952                 :            :         else
    1953   [ #  #  #  #  :          0 :                 p = prefix_roota(u->cgroup_path, suffix_path);
          #  #  #  #  #  
          #  #  #  #  #  
                   #  # ]
    1954                 :            : 
    1955                 :          0 :         delegated_mask = unit_get_delegate_mask(u);
    1956                 :            : 
    1957                 :          0 :         r = 0;
    1958         [ #  # ]:          0 :         SET_FOREACH(pidp, pids, i) {
    1959                 :          0 :                 pid_t pid = PTR_TO_PID(pidp);
    1960                 :            :                 CGroupController c;
    1961                 :            : 
    1962                 :            :                 /* First, attach the PID to the main cgroup hierarchy */
    1963                 :          0 :                 q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid);
    1964         [ #  # ]:          0 :                 if (q < 0) {
    1965         [ #  # ]:          0 :                         log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup '%s': %m", pid, p);
    1966                 :            : 
    1967   [ #  #  #  #  :          0 :                         if (MANAGER_IS_USER(u->manager) && IN_SET(q, -EPERM, -EACCES)) {
                   #  # ]
    1968                 :            :                                 int z;
    1969                 :            : 
    1970                 :            :                                 /* If we are in a user instance, and we can't move the process ourselves due to
    1971                 :            :                                  * permission problems, let's ask the system instance about it instead. Since it's more
    1972                 :            :                                  * privileged it might be able to move the process across the leaves of a subtree who's
    1973                 :            :                                  * top node is not owned by us. */
    1974                 :            : 
    1975                 :          0 :                                 z = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path);
    1976         [ #  # ]:          0 :                                 if (z < 0)
    1977         [ #  # ]:          0 :                                         log_unit_debug_errno(u, z, "Couldn't move process " PID_FMT " to requested cgroup '%s' via the system bus either: %m", pid, p);
    1978                 :            :                                 else
    1979                 :          0 :                                         continue; /* When the bus thing worked via the bus we are fully done for this PID. */
    1980                 :            :                         }
    1981                 :            : 
    1982         [ #  # ]:          0 :                         if (r >= 0)
    1983                 :          0 :                                 r = q; /* Remember first error */
    1984                 :            : 
    1985                 :          0 :                         continue;
    1986                 :            :                 }
    1987                 :            : 
    1988                 :          0 :                 q = cg_all_unified();
    1989         [ #  # ]:          0 :                 if (q < 0)
    1990                 :          0 :                         return q;
    1991         [ #  # ]:          0 :                 if (q > 0)
    1992                 :          0 :                         continue;
    1993                 :            : 
    1994                 :            :                 /* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the
    1995                 :            :                  * innermost realized one */
    1996                 :            : 
    1997         [ #  # ]:          0 :                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
    1998                 :          0 :                         CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
    1999                 :            :                         const char *realized;
    2000                 :            : 
    2001         [ #  # ]:          0 :                         if (!(u->manager->cgroup_supported & bit))
    2002                 :          0 :                                 continue;
    2003                 :            : 
    2004                 :            :                         /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
    2005         [ #  # ]:          0 :                         if (delegated_mask & u->cgroup_realized_mask & bit) {
    2006                 :          0 :                                 q = cg_attach(cgroup_controller_to_string(c), p, pid);
    2007         [ #  # ]:          0 :                                 if (q >= 0)
    2008                 :          0 :                                         continue; /* Success! */
    2009                 :            : 
    2010         [ #  # ]:          0 :                                 log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m",
    2011                 :            :                                                      pid, p, cgroup_controller_to_string(c));
    2012                 :            :                         }
    2013                 :            : 
    2014                 :            :                         /* So this controller is either not delegate or realized, or something else weird happened. In
    2015                 :            :                          * that case let's attach the PID at least to the closest cgroup up the tree that is
    2016                 :            :                          * realized. */
    2017                 :          0 :                         realized = unit_get_realized_cgroup_path(u, bit);
    2018         [ #  # ]:          0 :                         if (!realized)
    2019                 :          0 :                                 continue; /* Not even realized in the root slice? Then let's not bother */
    2020                 :            : 
    2021                 :          0 :                         q = cg_attach(cgroup_controller_to_string(c), realized, pid);
    2022         [ #  # ]:          0 :                         if (q < 0)
    2023         [ #  # ]:          0 :                                 log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m",
    2024                 :            :                                                      pid, realized, cgroup_controller_to_string(c));
    2025                 :            :                 }
    2026                 :            :         }
    2027                 :            : 
    2028                 :          0 :         return r;
    2029                 :            : }
    2030                 :            : 
    2031                 :         24 : static bool unit_has_mask_realized(
    2032                 :            :                 Unit *u,
    2033                 :            :                 CGroupMask target_mask,
    2034                 :            :                 CGroupMask enable_mask) {
    2035                 :            : 
    2036         [ -  + ]:         24 :         assert(u);
    2037                 :            : 
    2038                 :            :         /* Returns true if this unit is fully realized. We check four things:
    2039                 :            :          *
    2040                 :            :          * 1. Whether the cgroup was created at all
    2041                 :            :          * 2. Whether the cgroup was created in all the hierarchies we need it to be created in (in case of cgroup v1)
    2042                 :            :          * 3. Whether the cgroup has all the right controllers enabled (in case of cgroup v2)
    2043                 :            :          * 4. Whether the invalidation mask is currently zero
    2044                 :            :          *
    2045                 :            :          * If you wonder why we mask the target realization and enable mask with CGROUP_MASK_V1/CGROUP_MASK_V2: note
    2046                 :            :          * that there are three sets of bitmasks: CGROUP_MASK_V1 (for real cgroup v1 controllers), CGROUP_MASK_V2 (for
    2047                 :            :          * real cgroup v2 controllers) and CGROUP_MASK_BPF (for BPF-based pseudo-controllers). Now, cgroup_realized_mask
    2048                 :            :          * is only matters for cgroup v1 controllers, and cgroup_enabled_mask only used for cgroup v2, and if they
    2049                 :            :          * differ in the others, we don't really care. (After all, the cgroup_enabled_mask tracks with controllers are
    2050                 :            :          * enabled through cgroup.subtree_control, and since the BPF pseudo-controllers don't show up there, they
    2051                 :            :          * simply don't matter. */
    2052                 :            : 
    2053                 :         24 :         return u->cgroup_realized &&
    2054         [ #  # ]:          0 :                 ((u->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
    2055   [ -  +  #  # ]:         24 :                 ((u->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
    2056         [ #  # ]:          0 :                 u->cgroup_invalidated_mask == 0;
    2057                 :            : }
    2058                 :            : 
    2059                 :          0 : static bool unit_has_mask_disables_realized(
    2060                 :            :                 Unit *u,
    2061                 :            :                 CGroupMask target_mask,
    2062                 :            :                 CGroupMask enable_mask) {
    2063                 :            : 
    2064         [ #  # ]:          0 :         assert(u);
    2065                 :            : 
    2066                 :            :         /* Returns true if all controllers which should be disabled are indeed disabled.
    2067                 :            :          *
    2068                 :            :          * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is
    2069                 :            :          * already removed. */
    2070                 :            : 
    2071         [ #  # ]:          0 :         return !u->cgroup_realized ||
    2072         [ #  # ]:          0 :                 (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
    2073         [ #  # ]:          0 :                  FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
    2074                 :            : }
    2075                 :            : 
    2076                 :         24 : static bool unit_has_mask_enables_realized(
    2077                 :            :                 Unit *u,
    2078                 :            :                 CGroupMask target_mask,
    2079                 :            :                 CGroupMask enable_mask) {
    2080                 :            : 
    2081         [ -  + ]:         24 :         assert(u);
    2082                 :            : 
    2083                 :            :         /* Returns true if all controllers which should be enabled are indeed enabled.
    2084                 :            :          *
    2085                 :            :          * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything
    2086                 :            :          * we want to add is already added. */
    2087                 :            : 
    2088                 :         24 :         return u->cgroup_realized &&
    2089   [ -  +  #  # ]:         24 :                 ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) &&
    2090         [ #  # ]:          0 :                 ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2);
    2091                 :            : }
    2092                 :            : 
    2093                 :        537 : void unit_add_to_cgroup_realize_queue(Unit *u) {
    2094         [ -  + ]:        537 :         assert(u);
    2095                 :            : 
    2096         [ +  + ]:        537 :         if (u->in_cgroup_realize_queue)
    2097                 :        477 :                 return;
    2098                 :            : 
    2099   [ -  +  +  + ]:         60 :         LIST_PREPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
    2100                 :         60 :         u->in_cgroup_realize_queue = true;
    2101                 :            : }
    2102                 :            : 
    2103                 :         24 : static void unit_remove_from_cgroup_realize_queue(Unit *u) {
    2104         [ -  + ]:         24 :         assert(u);
    2105                 :            : 
    2106         [ +  - ]:         24 :         if (!u->in_cgroup_realize_queue)
    2107                 :         24 :                 return;
    2108                 :            : 
    2109   [ #  #  #  #  :          0 :         LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u);
             #  #  #  # ]
    2110                 :          0 :         u->in_cgroup_realize_queue = false;
    2111                 :            : }
    2112                 :            : 
    2113                 :            : /* Controllers can only be enabled breadth-first, from the root of the
    2114                 :            :  * hierarchy downwards to the unit in question. */
    2115                 :         24 : static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) {
    2116                 :            :         CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
    2117                 :            :         int r;
    2118                 :            : 
    2119         [ -  + ]:         24 :         assert(u);
    2120                 :            : 
    2121                 :            :         /* First go deal with this unit's parent, or we won't be able to enable
    2122                 :            :          * any new controllers at this layer. */
    2123         [ -  + ]:         24 :         if (UNIT_ISSET(u->slice)) {
    2124                 :          0 :                 r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
    2125         [ #  # ]:          0 :                 if (r < 0)
    2126                 :          0 :                         return r;
    2127                 :            :         }
    2128                 :            : 
    2129                 :         24 :         target_mask = unit_get_target_mask(u);
    2130                 :         24 :         enable_mask = unit_get_enable_mask(u);
    2131                 :            : 
    2132                 :            :         /* We can only enable in this direction, don't try to disable anything.
    2133                 :            :          */
    2134         [ -  + ]:         24 :         if (unit_has_mask_enables_realized(u, target_mask, enable_mask))
    2135                 :          0 :                 return 0;
    2136                 :            : 
    2137                 :         24 :         new_target_mask = u->cgroup_realized_mask | target_mask;
    2138                 :         24 :         new_enable_mask = u->cgroup_enabled_mask | enable_mask;
    2139                 :            : 
    2140                 :         24 :         return unit_create_cgroup(u, new_target_mask, new_enable_mask, state);
    2141                 :            : }
    2142                 :            : 
    2143                 :            : /* Controllers can only be disabled depth-first, from the leaves of the
    2144                 :            :  * hierarchy upwards to the unit in question. */
    2145                 :         24 : static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
    2146                 :            :         Iterator i;
    2147                 :            :         Unit *m;
    2148                 :            :         void *v;
    2149                 :            : 
    2150         [ -  + ]:         24 :         assert(u);
    2151                 :            : 
    2152         [ +  - ]:         24 :         if (u->type != UNIT_SLICE)
    2153                 :         24 :                 return 0;
    2154                 :            : 
    2155         [ #  # ]:          0 :         HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) {
    2156                 :            :                 CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
    2157                 :            :                 int r;
    2158                 :            : 
    2159         [ #  # ]:          0 :                 if (UNIT_DEREF(m->slice) != u)
    2160                 :          0 :                         continue;
    2161                 :            : 
    2162                 :            :                 /* The cgroup for this unit might not actually be fully
    2163                 :            :                  * realised yet, in which case it isn't holding any controllers
    2164                 :            :                  * open anyway. */
    2165         [ #  # ]:          0 :                 if (!m->cgroup_path)
    2166                 :          0 :                         continue;
    2167                 :            : 
    2168                 :            :                 /* We must disable those below us first in order to release the
    2169                 :            :                  * controller. */
    2170         [ #  # ]:          0 :                 if (m->type == UNIT_SLICE)
    2171                 :          0 :                         (void) unit_realize_cgroup_now_disable(m, state);
    2172                 :            : 
    2173                 :          0 :                 target_mask = unit_get_target_mask(m);
    2174                 :          0 :                 enable_mask = unit_get_enable_mask(m);
    2175                 :            : 
    2176                 :            :                 /* We can only disable in this direction, don't try to enable
    2177                 :            :                  * anything. */
    2178         [ #  # ]:          0 :                 if (unit_has_mask_disables_realized(m, target_mask, enable_mask))
    2179                 :          0 :                         continue;
    2180                 :            : 
    2181                 :          0 :                 new_target_mask = m->cgroup_realized_mask & target_mask;
    2182                 :          0 :                 new_enable_mask = m->cgroup_enabled_mask & enable_mask;
    2183                 :            : 
    2184                 :          0 :                 r = unit_create_cgroup(m, new_target_mask, new_enable_mask, state);
    2185         [ #  # ]:          0 :                 if (r < 0)
    2186                 :          0 :                         return r;
    2187                 :            :         }
    2188                 :            : 
    2189                 :          0 :         return 0;
    2190                 :            : }
    2191                 :            : 
    2192                 :            : /* Check if necessary controllers and attributes for a unit are in place.
    2193                 :            :  *
    2194                 :            :  * - If so, do nothing.
    2195                 :            :  * - If not, create paths, move processes over, and set attributes.
    2196                 :            :  *
    2197                 :            :  * Controllers can only be *enabled* in a breadth-first way, and *disabled* in
    2198                 :            :  * a depth-first way. As such the process looks like this:
    2199                 :            :  *
    2200                 :            :  * Suppose we have a cgroup hierarchy which looks like this:
    2201                 :            :  *
    2202                 :            :  *             root
    2203                 :            :  *            /    \
    2204                 :            :  *           /      \
    2205                 :            :  *          /        \
    2206                 :            :  *         a          b
    2207                 :            :  *        / \        / \
    2208                 :            :  *       /   \      /   \
    2209                 :            :  *      c     d    e     f
    2210                 :            :  *     / \   / \  / \   / \
    2211                 :            :  *     h i   j k  l m   n o
    2212                 :            :  *
    2213                 :            :  * 1. We want to realise cgroup "d" now.
    2214                 :            :  * 2. cgroup "a" has DisableControllers=cpu in the associated unit.
    2215                 :            :  * 3. cgroup "k" just started requesting the memory controller.
    2216                 :            :  *
    2217                 :            :  * To make this work we must do the following in order:
    2218                 :            :  *
    2219                 :            :  * 1. Disable CPU controller in k, j
    2220                 :            :  * 2. Disable CPU controller in d
    2221                 :            :  * 3. Enable memory controller in root
    2222                 :            :  * 4. Enable memory controller in a
    2223                 :            :  * 5. Enable memory controller in d
    2224                 :            :  * 6. Enable memory controller in k
    2225                 :            :  *
    2226                 :            :  * Notice that we need to touch j in one direction, but not the other. We also
    2227                 :            :  * don't go beyond d when disabling -- it's up to "a" to get realized if it
    2228                 :            :  * wants to disable further. The basic rules are therefore:
    2229                 :            :  *
    2230                 :            :  * - If you're disabling something, you need to realise all of the cgroups from
    2231                 :            :  *   your recursive descendants to the root. This starts from the leaves.
    2232                 :            :  * - If you're enabling something, you need to realise from the root cgroup
    2233                 :            :  *   downwards, but you don't need to iterate your recursive descendants.
    2234                 :            :  *
    2235                 :            :  * Returns 0 on success and < 0 on failure. */
    2236                 :         24 : static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
    2237                 :            :         CGroupMask target_mask, enable_mask;
    2238                 :            :         int r;
    2239                 :            : 
    2240         [ -  + ]:         24 :         assert(u);
    2241                 :            : 
    2242                 :         24 :         unit_remove_from_cgroup_realize_queue(u);
    2243                 :            : 
    2244                 :         24 :         target_mask = unit_get_target_mask(u);
    2245                 :         24 :         enable_mask = unit_get_enable_mask(u);
    2246                 :            : 
    2247         [ -  + ]:         24 :         if (unit_has_mask_realized(u, target_mask, enable_mask))
    2248                 :          0 :                 return 0;
    2249                 :            : 
    2250                 :            :         /* Disable controllers below us, if there are any */
    2251                 :         24 :         r = unit_realize_cgroup_now_disable(u, state);
    2252         [ -  + ]:         24 :         if (r < 0)
    2253                 :          0 :                 return r;
    2254                 :            : 
    2255                 :            :         /* Enable controllers above us, if there are any */
    2256         [ +  - ]:         24 :         if (UNIT_ISSET(u->slice)) {
    2257                 :         24 :                 r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
    2258         [ -  + ]:         24 :                 if (r < 0)
    2259                 :          0 :                         return r;
    2260                 :            :         }
    2261                 :            : 
    2262                 :            :         /* Now actually deal with the cgroup we were trying to realise and set attributes */
    2263                 :         24 :         r = unit_create_cgroup(u, target_mask, enable_mask, state);
    2264         [ +  - ]:         24 :         if (r < 0)
    2265                 :         24 :                 return r;
    2266                 :            : 
    2267                 :            :         /* Now, reset the invalidation mask */
    2268                 :          0 :         u->cgroup_invalidated_mask = 0;
    2269                 :          0 :         return 0;
    2270                 :            : }
    2271                 :            : 
    2272                 :          0 : unsigned manager_dispatch_cgroup_realize_queue(Manager *m) {
    2273                 :            :         ManagerState state;
    2274                 :          0 :         unsigned n = 0;
    2275                 :            :         Unit *i;
    2276                 :            :         int r;
    2277                 :            : 
    2278         [ #  # ]:          0 :         assert(m);
    2279                 :            : 
    2280                 :          0 :         state = manager_state(m);
    2281                 :            : 
    2282         [ #  # ]:          0 :         while ((i = m->cgroup_realize_queue)) {
    2283         [ #  # ]:          0 :                 assert(i->in_cgroup_realize_queue);
    2284                 :            : 
    2285         [ #  # ]:          0 :                 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(i))) {
    2286                 :            :                         /* Maybe things changed, and the unit is not actually active anymore? */
    2287                 :          0 :                         unit_remove_from_cgroup_realize_queue(i);
    2288                 :          0 :                         continue;
    2289                 :            :                 }
    2290                 :            : 
    2291                 :          0 :                 r = unit_realize_cgroup_now(i, state);
    2292         [ #  # ]:          0 :                 if (r < 0)
    2293         [ #  # ]:          0 :                         log_warning_errno(r, "Failed to realize cgroups for queued unit %s, ignoring: %m", i->id);
    2294                 :            : 
    2295                 :          0 :                 n++;
    2296                 :            :         }
    2297                 :            : 
    2298                 :          0 :         return n;
    2299                 :            : }
    2300                 :            : 
    2301                 :         24 : static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) {
    2302                 :            :         Unit *slice;
    2303                 :            : 
    2304                 :            :         /* This adds the siblings of the specified unit and the
    2305                 :            :          * siblings of all parent units to the cgroup queue. (But
    2306                 :            :          * neither the specified unit itself nor the parents.) */
    2307                 :            : 
    2308         [ +  + ]:         48 :         while ((slice = UNIT_DEREF(u->slice))) {
    2309                 :            :                 Iterator i;
    2310                 :            :                 Unit *m;
    2311                 :            :                 void *v;
    2312                 :            : 
    2313         [ +  + ]:         48 :                 HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) {
    2314                 :            :                         /* Skip units that have a dependency on the slice
    2315                 :            :                          * but aren't actually in it. */
    2316         [ +  - ]:         24 :                         if (UNIT_DEREF(m->slice) != slice)
    2317                 :         24 :                                 continue;
    2318                 :            : 
    2319                 :            :                         /* No point in doing cgroup application for units
    2320                 :            :                          * without active processes. */
    2321         [ #  # ]:          0 :                         if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
    2322                 :          0 :                                 continue;
    2323                 :            : 
    2324                 :            :                         /* If the unit doesn't need any new controllers
    2325                 :            :                          * and has current ones realized, it doesn't need
    2326                 :            :                          * any changes. */
    2327         [ #  # ]:          0 :                         if (unit_has_mask_realized(m,
    2328                 :            :                                                    unit_get_target_mask(m),
    2329                 :            :                                                    unit_get_enable_mask(m)))
    2330                 :          0 :                                 continue;
    2331                 :            : 
    2332                 :          0 :                         unit_add_to_cgroup_realize_queue(m);
    2333                 :            :                 }
    2334                 :            : 
    2335                 :         24 :                 u = slice;
    2336                 :            :         }
    2337                 :         24 : }
    2338                 :            : 
    2339                 :         24 : int unit_realize_cgroup(Unit *u) {
    2340         [ -  + ]:         24 :         assert(u);
    2341                 :            : 
    2342         [ -  + ]:         24 :         if (!UNIT_HAS_CGROUP_CONTEXT(u))
    2343                 :          0 :                 return 0;
    2344                 :            : 
    2345                 :            :         /* So, here's the deal: when realizing the cgroups for this
    2346                 :            :          * unit, we need to first create all parents, but there's more
    2347                 :            :          * actually: for the weight-based controllers we also need to
    2348                 :            :          * make sure that all our siblings (i.e. units that are in the
    2349                 :            :          * same slice as we are) have cgroups, too. Otherwise, things
    2350                 :            :          * would become very uneven as each of their processes would
    2351                 :            :          * get as much resources as all our group together. This call
    2352                 :            :          * will synchronously create the parent cgroups, but will
    2353                 :            :          * defer work on the siblings to the next event loop
    2354                 :            :          * iteration. */
    2355                 :            : 
    2356                 :            :         /* Add all sibling slices to the cgroup queue. */
    2357                 :         24 :         unit_add_siblings_to_cgroup_realize_queue(u);
    2358                 :            : 
    2359                 :            :         /* And realize this one now (and apply the values) */
    2360                 :         24 :         return unit_realize_cgroup_now(u, manager_state(u->manager));
    2361                 :            : }
    2362                 :            : 
    2363                 :       8776 : void unit_release_cgroup(Unit *u) {
    2364         [ -  + ]:       8776 :         assert(u);
    2365                 :            : 
    2366                 :            :         /* Forgets all cgroup details for this cgroup — but does *not* destroy the cgroup. This is hence OK to call
    2367                 :            :          * when we close down everything for reexecution, where we really want to leave the cgroup in place. */
    2368                 :            : 
    2369         [ +  + ]:       8776 :         if (u->cgroup_path) {
    2370                 :         48 :                 (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
    2371                 :         48 :                 u->cgroup_path = mfree(u->cgroup_path);
    2372                 :            :         }
    2373                 :            : 
    2374         [ -  + ]:       8776 :         if (u->cgroup_control_inotify_wd >= 0) {
    2375         [ #  # ]:          0 :                 if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_control_inotify_wd) < 0)
    2376         [ #  # ]:          0 :                         log_unit_debug_errno(u, errno, "Failed to remove cgroup control inotify watch %i for %s, ignoring: %m", u->cgroup_control_inotify_wd, u->id);
    2377                 :            : 
    2378                 :          0 :                 (void) hashmap_remove(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd));
    2379                 :          0 :                 u->cgroup_control_inotify_wd = -1;
    2380                 :            :         }
    2381                 :            : 
    2382         [ -  + ]:       8776 :         if (u->cgroup_memory_inotify_wd >= 0) {
    2383         [ #  # ]:          0 :                 if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_memory_inotify_wd) < 0)
    2384         [ #  # ]:          0 :                         log_unit_debug_errno(u, errno, "Failed to remove cgroup memory inotify watch %i for %s, ignoring: %m", u->cgroup_memory_inotify_wd, u->id);
    2385                 :            : 
    2386                 :          0 :                 (void) hashmap_remove(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd));
    2387                 :          0 :                 u->cgroup_memory_inotify_wd = -1;
    2388                 :            :         }
    2389                 :       8776 : }
    2390                 :            : 
    2391                 :         28 : void unit_prune_cgroup(Unit *u) {
    2392                 :            :         int r;
    2393                 :            :         bool is_root_slice;
    2394                 :            : 
    2395         [ -  + ]:         28 :         assert(u);
    2396                 :            : 
    2397                 :            :         /* Removes the cgroup, if empty and possible, and stops watching it. */
    2398                 :            : 
    2399         [ +  - ]:         28 :         if (!u->cgroup_path)
    2400                 :         28 :                 return;
    2401                 :            : 
    2402                 :          0 :         (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
    2403                 :            : 
    2404                 :          0 :         is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
    2405                 :            : 
    2406                 :          0 :         r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
    2407         [ #  # ]:          0 :         if (r < 0)
    2408                 :            :                 /* One reason we could have failed here is, that the cgroup still contains a process.
    2409                 :            :                  * However, if the cgroup becomes removable at a later time, it might be removed when
    2410                 :            :                  * the containing slice is stopped. So even if we failed now, this unit shouldn't assume
    2411                 :            :                  * that the cgroup is still realized the next time it is started. Do not return early
    2412                 :            :                  * on error, continue cleanup. */
    2413   [ #  #  #  #  :          0 :                 log_unit_full(u, r == -EBUSY ? LOG_DEBUG : LOG_WARNING, r, "Failed to destroy cgroup %s, ignoring: %m", u->cgroup_path);
                   #  # ]
    2414                 :            : 
    2415         [ #  # ]:          0 :         if (is_root_slice)
    2416                 :          0 :                 return;
    2417                 :            : 
    2418                 :          0 :         unit_release_cgroup(u);
    2419                 :            : 
    2420                 :          0 :         u->cgroup_realized = false;
    2421                 :          0 :         u->cgroup_realized_mask = 0;
    2422                 :          0 :         u->cgroup_enabled_mask = 0;
    2423                 :            : 
    2424                 :          0 :         u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
    2425                 :            : }
    2426                 :            : 
    2427                 :          0 : int unit_search_main_pid(Unit *u, pid_t *ret) {
    2428                 :          0 :         _cleanup_fclose_ FILE *f = NULL;
    2429                 :          0 :         pid_t pid = 0, npid;
    2430                 :            :         int r;
    2431                 :            : 
    2432         [ #  # ]:          0 :         assert(u);
    2433         [ #  # ]:          0 :         assert(ret);
    2434                 :            : 
    2435         [ #  # ]:          0 :         if (!u->cgroup_path)
    2436                 :          0 :                 return -ENXIO;
    2437                 :            : 
    2438                 :          0 :         r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f);
    2439         [ #  # ]:          0 :         if (r < 0)
    2440                 :          0 :                 return r;
    2441                 :            : 
    2442         [ #  # ]:          0 :         while (cg_read_pid(f, &npid) > 0)  {
    2443                 :            : 
    2444         [ #  # ]:          0 :                 if (npid == pid)
    2445                 :          0 :                         continue;
    2446                 :            : 
    2447         [ #  # ]:          0 :                 if (pid_is_my_child(npid) == 0)
    2448                 :          0 :                         continue;
    2449                 :            : 
    2450         [ #  # ]:          0 :                 if (pid != 0)
    2451                 :            :                         /* Dang, there's more than one daemonized PID
    2452                 :            :                         in this group, so we don't know what process
    2453                 :            :                         is the main process. */
    2454                 :            : 
    2455                 :          0 :                         return -ENODATA;
    2456                 :            : 
    2457                 :          0 :                 pid = npid;
    2458                 :            :         }
    2459                 :            : 
    2460                 :          0 :         *ret = pid;
    2461                 :          0 :         return 0;
    2462                 :            : }
    2463                 :            : 
    2464                 :          0 : static int unit_watch_pids_in_path(Unit *u, const char *path) {
    2465                 :          0 :         _cleanup_closedir_ DIR *d = NULL;
    2466                 :          0 :         _cleanup_fclose_ FILE *f = NULL;
    2467                 :          0 :         int ret = 0, r;
    2468                 :            : 
    2469         [ #  # ]:          0 :         assert(u);
    2470         [ #  # ]:          0 :         assert(path);
    2471                 :            : 
    2472                 :          0 :         r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
    2473         [ #  # ]:          0 :         if (r < 0)
    2474                 :          0 :                 ret = r;
    2475                 :            :         else {
    2476                 :            :                 pid_t pid;
    2477                 :            : 
    2478         [ #  # ]:          0 :                 while ((r = cg_read_pid(f, &pid)) > 0) {
    2479                 :          0 :                         r = unit_watch_pid(u, pid, false);
    2480   [ #  #  #  # ]:          0 :                         if (r < 0 && ret >= 0)
    2481                 :          0 :                                 ret = r;
    2482                 :            :                 }
    2483                 :            : 
    2484   [ #  #  #  # ]:          0 :                 if (r < 0 && ret >= 0)
    2485                 :          0 :                         ret = r;
    2486                 :            :         }
    2487                 :            : 
    2488                 :          0 :         r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
    2489         [ #  # ]:          0 :         if (r < 0) {
    2490         [ #  # ]:          0 :                 if (ret >= 0)
    2491                 :          0 :                         ret = r;
    2492                 :            :         } else {
    2493                 :            :                 char *fn;
    2494                 :            : 
    2495         [ #  # ]:          0 :                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
    2496         [ #  # ]:          0 :                         _cleanup_free_ char *p = NULL;
    2497                 :            : 
    2498                 :          0 :                         p = path_join(empty_to_root(path), fn);
    2499                 :          0 :                         free(fn);
    2500                 :            : 
    2501         [ #  # ]:          0 :                         if (!p)
    2502                 :          0 :                                 return -ENOMEM;
    2503                 :            : 
    2504                 :          0 :                         r = unit_watch_pids_in_path(u, p);
    2505   [ #  #  #  # ]:          0 :                         if (r < 0 && ret >= 0)
    2506                 :          0 :                                 ret = r;
    2507                 :            :                 }
    2508                 :            : 
    2509   [ #  #  #  # ]:          0 :                 if (r < 0 && ret >= 0)
    2510                 :          0 :                         ret = r;
    2511                 :            :         }
    2512                 :            : 
    2513                 :          0 :         return ret;
    2514                 :            : }
    2515                 :            : 
    2516                 :          0 : int unit_synthesize_cgroup_empty_event(Unit *u) {
    2517                 :            :         int r;
    2518                 :            : 
    2519         [ #  # ]:          0 :         assert(u);
    2520                 :            : 
    2521                 :            :         /* Enqueue a synthetic cgroup empty event if this unit doesn't watch any PIDs anymore. This is compatibility
    2522                 :            :          * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
    2523                 :            :          * get as notification source as soon as we stopped having any useful PIDs to watch for. */
    2524                 :            : 
    2525         [ #  # ]:          0 :         if (!u->cgroup_path)
    2526                 :          0 :                 return -ENOENT;
    2527                 :            : 
    2528                 :          0 :         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
    2529         [ #  # ]:          0 :         if (r < 0)
    2530                 :          0 :                 return r;
    2531         [ #  # ]:          0 :         if (r > 0) /* On unified we have reliable notifications, and don't need this */
    2532                 :          0 :                 return 0;
    2533                 :            : 
    2534         [ #  # ]:          0 :         if (!set_isempty(u->pids))
    2535                 :          0 :                 return 0;
    2536                 :            : 
    2537                 :          0 :         unit_add_to_cgroup_empty_queue(u);
    2538                 :          0 :         return 0;
    2539                 :            : }
    2540                 :            : 
    2541                 :          0 : int unit_watch_all_pids(Unit *u) {
    2542                 :            :         int r;
    2543                 :            : 
    2544         [ #  # ]:          0 :         assert(u);
    2545                 :            : 
    2546                 :            :         /* Adds all PIDs from our cgroup to the set of PIDs we
    2547                 :            :          * watch. This is a fallback logic for cases where we do not
    2548                 :            :          * get reliable cgroup empty notifications: we try to use
    2549                 :            :          * SIGCHLD as replacement. */
    2550                 :            : 
    2551         [ #  # ]:          0 :         if (!u->cgroup_path)
    2552                 :          0 :                 return -ENOENT;
    2553                 :            : 
    2554                 :          0 :         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
    2555         [ #  # ]:          0 :         if (r < 0)
    2556                 :          0 :                 return r;
    2557         [ #  # ]:          0 :         if (r > 0) /* On unified we can use proper notifications */
    2558                 :          0 :                 return 0;
    2559                 :            : 
    2560                 :          0 :         return unit_watch_pids_in_path(u, u->cgroup_path);
    2561                 :            : }
    2562                 :            : 
    2563                 :          0 : static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
    2564                 :          0 :         Manager *m = userdata;
    2565                 :            :         Unit *u;
    2566                 :            :         int r;
    2567                 :            : 
    2568         [ #  # ]:          0 :         assert(s);
    2569         [ #  # ]:          0 :         assert(m);
    2570                 :            : 
    2571                 :          0 :         u = m->cgroup_empty_queue;
    2572         [ #  # ]:          0 :         if (!u)
    2573                 :          0 :                 return 0;
    2574                 :            : 
    2575         [ #  # ]:          0 :         assert(u->in_cgroup_empty_queue);
    2576                 :          0 :         u->in_cgroup_empty_queue = false;
    2577   [ #  #  #  #  :          0 :         LIST_REMOVE(cgroup_empty_queue, m->cgroup_empty_queue, u);
             #  #  #  # ]
    2578                 :            : 
    2579         [ #  # ]:          0 :         if (m->cgroup_empty_queue) {
    2580                 :            :                 /* More stuff queued, let's make sure we remain enabled */
    2581                 :          0 :                 r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
    2582         [ #  # ]:          0 :                 if (r < 0)
    2583         [ #  # ]:          0 :                         log_debug_errno(r, "Failed to reenable cgroup empty event source, ignoring: %m");
    2584                 :            :         }
    2585                 :            : 
    2586                 :          0 :         unit_add_to_gc_queue(u);
    2587                 :            : 
    2588         [ #  # ]:          0 :         if (UNIT_VTABLE(u)->notify_cgroup_empty)
    2589                 :          0 :                 UNIT_VTABLE(u)->notify_cgroup_empty(u);
    2590                 :            : 
    2591                 :          0 :         return 0;
    2592                 :            : }
    2593                 :            : 
    2594                 :          0 : void unit_add_to_cgroup_empty_queue(Unit *u) {
    2595                 :            :         int r;
    2596                 :            : 
    2597         [ #  # ]:          0 :         assert(u);
    2598                 :            : 
    2599                 :            :         /* Note that there are four different ways how cgroup empty events reach us:
    2600                 :            :          *
    2601                 :            :          * 1. On the unified hierarchy we get an inotify event on the cgroup
    2602                 :            :          *
    2603                 :            :          * 2. On the legacy hierarchy, when running in system mode, we get a datagram on the cgroup agent socket
    2604                 :            :          *
    2605                 :            :          * 3. On the legacy hierarchy, when running in user mode, we get a D-Bus signal on the system bus
    2606                 :            :          *
    2607                 :            :          * 4. On the legacy hierarchy, in service units we start watching all processes of the cgroup for SIGCHLD as
    2608                 :            :          *    soon as we get one SIGCHLD, to deal with unreliable cgroup notifications.
    2609                 :            :          *
    2610                 :            :          * Regardless which way we got the notification, we'll verify it here, and then add it to a separate
    2611                 :            :          * queue. This queue will be dispatched at a lower priority than the SIGCHLD handler, so that we always use
    2612                 :            :          * SIGCHLD if we can get it first, and only use the cgroup empty notifications if there's no SIGCHLD pending
    2613                 :            :          * (which might happen if the cgroup doesn't contain processes that are our own child, which is typically the
    2614                 :            :          * case for scope units). */
    2615                 :            : 
    2616         [ #  # ]:          0 :         if (u->in_cgroup_empty_queue)
    2617                 :          0 :                 return;
    2618                 :            : 
    2619                 :            :         /* Let's verify that the cgroup is really empty */
    2620         [ #  # ]:          0 :         if (!u->cgroup_path)
    2621                 :          0 :                 return;
    2622                 :          0 :         r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
    2623         [ #  # ]:          0 :         if (r < 0) {
    2624         [ #  # ]:          0 :                 log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", u->cgroup_path);
    2625                 :          0 :                 return;
    2626                 :            :         }
    2627         [ #  # ]:          0 :         if (r == 0)
    2628                 :          0 :                 return;
    2629                 :            : 
    2630   [ #  #  #  # ]:          0 :         LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
    2631                 :          0 :         u->in_cgroup_empty_queue = true;
    2632                 :            : 
    2633                 :            :         /* Trigger the defer event */
    2634                 :          0 :         r = sd_event_source_set_enabled(u->manager->cgroup_empty_event_source, SD_EVENT_ONESHOT);
    2635         [ #  # ]:          0 :         if (r < 0)
    2636         [ #  # ]:          0 :                 log_debug_errno(r, "Failed to enable cgroup empty event source: %m");
    2637                 :            : }
    2638                 :            : 
    2639                 :          0 : int unit_check_oom(Unit *u) {
    2640                 :          0 :         _cleanup_free_ char *oom_kill = NULL;
    2641                 :            :         bool increased;
    2642                 :            :         uint64_t c;
    2643                 :            :         int r;
    2644                 :            : 
    2645         [ #  # ]:          0 :         if (!u->cgroup_path)
    2646                 :          0 :                 return 0;
    2647                 :            : 
    2648                 :          0 :         r = cg_get_keyed_attribute("memory", u->cgroup_path, "memory.events", STRV_MAKE("oom_kill"), &oom_kill);
    2649         [ #  # ]:          0 :         if (r < 0)
    2650         [ #  # ]:          0 :                 return log_unit_debug_errno(u, r, "Failed to read oom_kill field of memory.events cgroup attribute: %m");
    2651                 :            : 
    2652                 :          0 :         r = safe_atou64(oom_kill, &c);
    2653         [ #  # ]:          0 :         if (r < 0)
    2654         [ #  # ]:          0 :                 return log_unit_debug_errno(u, r, "Failed to parse oom_kill field: %m");
    2655                 :            : 
    2656                 :          0 :         increased = c > u->oom_kill_last;
    2657                 :          0 :         u->oom_kill_last = c;
    2658                 :            : 
    2659         [ #  # ]:          0 :         if (!increased)
    2660                 :          0 :                 return 0;
    2661                 :            : 
    2662                 :          0 :         log_struct(LOG_NOTICE,
    2663                 :            :                    "MESSAGE_ID=" SD_MESSAGE_UNIT_OUT_OF_MEMORY_STR,
    2664                 :            :                    LOG_UNIT_ID(u),
    2665                 :            :                    LOG_UNIT_INVOCATION_ID(u),
    2666                 :            :                    LOG_UNIT_MESSAGE(u, "A process of this unit has been killed by the OOM killer."));
    2667                 :            : 
    2668         [ #  # ]:          0 :         if (UNIT_VTABLE(u)->notify_cgroup_oom)
    2669                 :          0 :                 UNIT_VTABLE(u)->notify_cgroup_oom(u);
    2670                 :            : 
    2671                 :          0 :         return 1;
    2672                 :            : }
    2673                 :            : 
    2674                 :          0 : static int on_cgroup_oom_event(sd_event_source *s, void *userdata) {
    2675                 :          0 :         Manager *m = userdata;
    2676                 :            :         Unit *u;
    2677                 :            :         int r;
    2678                 :            : 
    2679         [ #  # ]:          0 :         assert(s);
    2680         [ #  # ]:          0 :         assert(m);
    2681                 :            : 
    2682                 :          0 :         u = m->cgroup_oom_queue;
    2683         [ #  # ]:          0 :         if (!u)
    2684                 :          0 :                 return 0;
    2685                 :            : 
    2686         [ #  # ]:          0 :         assert(u->in_cgroup_oom_queue);
    2687                 :          0 :         u->in_cgroup_oom_queue = false;
    2688   [ #  #  #  #  :          0 :         LIST_REMOVE(cgroup_oom_queue, m->cgroup_oom_queue, u);
             #  #  #  # ]
    2689                 :            : 
    2690         [ #  # ]:          0 :         if (m->cgroup_oom_queue) {
    2691                 :            :                 /* More stuff queued, let's make sure we remain enabled */
    2692                 :          0 :                 r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
    2693         [ #  # ]:          0 :                 if (r < 0)
    2694         [ #  # ]:          0 :                         log_debug_errno(r, "Failed to reenable cgroup oom event source, ignoring: %m");
    2695                 :            :         }
    2696                 :            : 
    2697                 :          0 :         (void) unit_check_oom(u);
    2698                 :          0 :         return 0;
    2699                 :            : }
    2700                 :            : 
    2701                 :          0 : static void unit_add_to_cgroup_oom_queue(Unit *u) {
    2702                 :            :         int r;
    2703                 :            : 
    2704         [ #  # ]:          0 :         assert(u);
    2705                 :            : 
    2706         [ #  # ]:          0 :         if (u->in_cgroup_oom_queue)
    2707                 :          0 :                 return;
    2708         [ #  # ]:          0 :         if (!u->cgroup_path)
    2709                 :          0 :                 return;
    2710                 :            : 
    2711   [ #  #  #  # ]:          0 :         LIST_PREPEND(cgroup_oom_queue, u->manager->cgroup_oom_queue, u);
    2712                 :          0 :         u->in_cgroup_oom_queue = true;
    2713                 :            : 
    2714                 :            :         /* Trigger the defer event */
    2715         [ #  # ]:          0 :         if (!u->manager->cgroup_oom_event_source) {
    2716         [ #  # ]:          0 :                 _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL;
    2717                 :            : 
    2718                 :          0 :                 r = sd_event_add_defer(u->manager->event, &s, on_cgroup_oom_event, u->manager);
    2719         [ #  # ]:          0 :                 if (r < 0) {
    2720         [ #  # ]:          0 :                         log_error_errno(r, "Failed to create cgroup oom event source: %m");
    2721                 :          0 :                         return;
    2722                 :            :                 }
    2723                 :            : 
    2724                 :          0 :                 r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_NORMAL-8);
    2725         [ #  # ]:          0 :                 if (r < 0) {
    2726         [ #  # ]:          0 :                         log_error_errno(r, "Failed to set priority of cgroup oom event source: %m");
    2727                 :          0 :                         return;
    2728                 :            :                 }
    2729                 :            : 
    2730                 :          0 :                 (void) sd_event_source_set_description(s, "cgroup-oom");
    2731                 :          0 :                 u->manager->cgroup_oom_event_source = TAKE_PTR(s);
    2732                 :            :         }
    2733                 :            : 
    2734                 :          0 :         r = sd_event_source_set_enabled(u->manager->cgroup_oom_event_source, SD_EVENT_ONESHOT);
    2735         [ #  # ]:          0 :         if (r < 0)
    2736         [ #  # ]:          0 :                 log_error_errno(r, "Failed to enable cgroup oom event source: %m");
    2737                 :            : }
    2738                 :            : 
    2739                 :          0 : static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
    2740                 :          0 :         Manager *m = userdata;
    2741                 :            : 
    2742         [ #  # ]:          0 :         assert(s);
    2743         [ #  # ]:          0 :         assert(fd >= 0);
    2744         [ #  # ]:          0 :         assert(m);
    2745                 :            : 
    2746                 :          0 :         for (;;) {
    2747                 :            :                 union inotify_event_buffer buffer;
    2748                 :            :                 struct inotify_event *e;
    2749                 :            :                 ssize_t l;
    2750                 :            : 
    2751                 :          0 :                 l = read(fd, &buffer, sizeof(buffer));
    2752         [ #  # ]:          0 :                 if (l < 0) {
    2753   [ #  #  #  # ]:          0 :                         if (IN_SET(errno, EINTR, EAGAIN))
    2754                 :          0 :                                 return 0;
    2755                 :            : 
    2756         [ #  # ]:          0 :                         return log_error_errno(errno, "Failed to read control group inotify events: %m");
    2757                 :            :                 }
    2758                 :            : 
    2759         [ #  # ]:          0 :                 FOREACH_INOTIFY_EVENT(e, buffer, l) {
    2760                 :            :                         Unit *u;
    2761                 :            : 
    2762         [ #  # ]:          0 :                         if (e->wd < 0)
    2763                 :            :                                 /* Queue overflow has no watch descriptor */
    2764                 :          0 :                                 continue;
    2765                 :            : 
    2766         [ #  # ]:          0 :                         if (e->mask & IN_IGNORED)
    2767                 :            :                                 /* The watch was just removed */
    2768                 :          0 :                                 continue;
    2769                 :            : 
    2770                 :            :                         /* Note that inotify might deliver events for a watch even after it was removed,
    2771                 :            :                          * because it was queued before the removal. Let's ignore this here safely. */
    2772                 :            : 
    2773                 :          0 :                         u = hashmap_get(m->cgroup_control_inotify_wd_unit, INT_TO_PTR(e->wd));
    2774         [ #  # ]:          0 :                         if (u)
    2775                 :          0 :                                 unit_add_to_cgroup_empty_queue(u);
    2776                 :            : 
    2777                 :          0 :                         u = hashmap_get(m->cgroup_memory_inotify_wd_unit, INT_TO_PTR(e->wd));
    2778         [ #  # ]:          0 :                         if (u)
    2779                 :          0 :                                 unit_add_to_cgroup_oom_queue(u);
    2780                 :            :                 }
    2781                 :            :         }
    2782                 :            : }
    2783                 :            : 
    2784                 :         44 : static int cg_bpf_mask_supported(CGroupMask *ret) {
    2785                 :         44 :         CGroupMask mask = 0;
    2786                 :            :         int r;
    2787                 :            : 
    2788                 :            :         /* BPF-based firewall */
    2789                 :         44 :         r = bpf_firewall_supported();
    2790         [ -  + ]:         44 :         if (r > 0)
    2791                 :          0 :                 mask |= CGROUP_MASK_BPF_FIREWALL;
    2792                 :            : 
    2793                 :            :         /* BPF-based device access control */
    2794                 :         44 :         r = bpf_devices_supported();
    2795         [ -  + ]:         44 :         if (r > 0)
    2796                 :          0 :                 mask |= CGROUP_MASK_BPF_DEVICES;
    2797                 :            : 
    2798                 :         44 :         *ret = mask;
    2799                 :         44 :         return 0;
    2800                 :            : }
    2801                 :            : 
    2802                 :         44 : int manager_setup_cgroup(Manager *m) {
    2803                 :         44 :         _cleanup_free_ char *path = NULL;
    2804                 :            :         const char *scope_path;
    2805                 :            :         CGroupController c;
    2806                 :            :         int r, all_unified;
    2807                 :            :         CGroupMask mask;
    2808                 :            :         char *e;
    2809                 :            : 
    2810         [ -  + ]:         44 :         assert(m);
    2811                 :            : 
    2812                 :            :         /* 1. Determine hierarchy */
    2813                 :         44 :         m->cgroup_root = mfree(m->cgroup_root);
    2814                 :         44 :         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
    2815         [ -  + ]:         44 :         if (r < 0)
    2816         [ #  # ]:          0 :                 return log_error_errno(r, "Cannot determine cgroup we are running in: %m");
    2817                 :            : 
    2818                 :            :         /* Chop off the init scope, if we are already located in it */
    2819                 :         44 :         e = endswith(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
    2820                 :            : 
    2821                 :            :         /* LEGACY: Also chop off the system slice if we are in
    2822                 :            :          * it. This is to support live upgrades from older systemd
    2823                 :            :          * versions where PID 1 was moved there. Also see
    2824                 :            :          * cg_get_root_path(). */
    2825   [ +  -  -  + ]:         44 :         if (!e && MANAGER_IS_SYSTEM(m)) {
    2826                 :          0 :                 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
    2827         [ #  # ]:          0 :                 if (!e)
    2828                 :          0 :                         e = endswith(m->cgroup_root, "/system"); /* even more legacy */
    2829                 :            :         }
    2830         [ -  + ]:         44 :         if (e)
    2831                 :          0 :                 *e = 0;
    2832                 :            : 
    2833                 :            :         /* And make sure to store away the root value without trailing slash, even for the root dir, so that we can
    2834                 :            :          * easily prepend it everywhere. */
    2835                 :         44 :         delete_trailing_chars(m->cgroup_root, "/");
    2836                 :            : 
    2837                 :            :         /* 2. Show data */
    2838                 :         44 :         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
    2839         [ -  + ]:         44 :         if (r < 0)
    2840         [ #  # ]:          0 :                 return log_error_errno(r, "Cannot find cgroup mount point: %m");
    2841                 :            : 
    2842                 :         44 :         r = cg_unified_flush();
    2843         [ -  + ]:         44 :         if (r < 0)
    2844         [ #  # ]:          0 :                 return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m");
    2845                 :            : 
    2846                 :         44 :         all_unified = cg_all_unified();
    2847         [ -  + ]:         44 :         if (all_unified < 0)
    2848         [ #  # ]:          0 :                 return log_error_errno(all_unified, "Couldn't determine whether we are in all unified mode: %m");
    2849         [ -  + ]:         44 :         if (all_unified > 0)
    2850         [ #  # ]:          0 :                 log_debug("Unified cgroup hierarchy is located at %s.", path);
    2851                 :            :         else {
    2852                 :         44 :                 r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
    2853         [ -  + ]:         44 :                 if (r < 0)
    2854         [ #  # ]:          0 :                         return log_error_errno(r, "Failed to determine whether systemd's own controller is in unified mode: %m");
    2855         [ +  - ]:         44 :                 if (r > 0)
    2856         [ +  + ]:         44 :                         log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path);
    2857                 :            :                 else
    2858         [ #  # ]:          0 :                         log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
    2859                 :            :         }
    2860                 :            : 
    2861                 :            :         /* 3. Allocate cgroup empty defer event source */
    2862                 :         44 :         m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source);
    2863                 :         44 :         r = sd_event_add_defer(m->event, &m->cgroup_empty_event_source, on_cgroup_empty_event, m);
    2864         [ -  + ]:         44 :         if (r < 0)
    2865         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to create cgroup empty event source: %m");
    2866                 :            : 
    2867                 :            :         /* Schedule cgroup empty checks early, but after having processed service notification messages or
    2868                 :            :          * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of
    2869                 :            :          * notification, and we collected the metadata the notification and SIGCHLD stuff offers first. */
    2870                 :         44 :         r = sd_event_source_set_priority(m->cgroup_empty_event_source, SD_EVENT_PRIORITY_NORMAL-5);
    2871         [ -  + ]:         44 :         if (r < 0)
    2872         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to set priority of cgroup empty event source: %m");
    2873                 :            : 
    2874                 :         44 :         r = sd_event_source_set_enabled(m->cgroup_empty_event_source, SD_EVENT_OFF);
    2875         [ -  + ]:         44 :         if (r < 0)
    2876         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to disable cgroup empty event source: %m");
    2877                 :            : 
    2878                 :         44 :         (void) sd_event_source_set_description(m->cgroup_empty_event_source, "cgroup-empty");
    2879                 :            : 
    2880                 :            :         /* 4. Install notifier inotify object, or agent */
    2881         [ +  - ]:         44 :         if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
    2882                 :            : 
    2883                 :            :                 /* In the unified hierarchy we can get cgroup empty notifications via inotify. */
    2884                 :            : 
    2885                 :         44 :                 m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
    2886                 :         44 :                 safe_close(m->cgroup_inotify_fd);
    2887                 :            : 
    2888                 :         44 :                 m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
    2889         [ -  + ]:         44 :                 if (m->cgroup_inotify_fd < 0)
    2890         [ #  # ]:          0 :                         return log_error_errno(errno, "Failed to create control group inotify object: %m");
    2891                 :            : 
    2892                 :         44 :                 r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
    2893         [ -  + ]:         44 :                 if (r < 0)
    2894         [ #  # ]:          0 :                         return log_error_errno(r, "Failed to watch control group inotify object: %m");
    2895                 :            : 
    2896                 :            :                 /* Process cgroup empty notifications early. Note that when this event is dispatched it'll
    2897                 :            :                  * just add the unit to a cgroup empty queue, hence let's run earlier than that. Also see
    2898                 :            :                  * handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
    2899                 :         44 :                 r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-9);
    2900         [ -  + ]:         44 :                 if (r < 0)
    2901         [ #  # ]:          0 :                         return log_error_errno(r, "Failed to set priority of inotify event source: %m");
    2902                 :            : 
    2903                 :         44 :                 (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
    2904                 :            : 
    2905   [ #  #  #  #  :          0 :         } else if (MANAGER_IS_SYSTEM(m) && manager_owns_host_root_cgroup(m) && !MANAGER_IS_TEST_RUN(m)) {
                   #  # ]
    2906                 :            : 
    2907                 :            :                 /* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
    2908                 :            :                  * since it does not generate events when control groups with children run empty. */
    2909                 :            : 
    2910                 :          0 :                 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
    2911         [ #  # ]:          0 :                 if (r < 0)
    2912         [ #  # ]:          0 :                         log_warning_errno(r, "Failed to install release agent, ignoring: %m");
    2913         [ #  # ]:          0 :                 else if (r > 0)
    2914         [ #  # ]:          0 :                         log_debug("Installed release agent.");
    2915         [ #  # ]:          0 :                 else if (r == 0)
    2916         [ #  # ]:          0 :                         log_debug("Release agent already installed.");
    2917                 :            :         }
    2918                 :            : 
    2919                 :            :         /* 5. Make sure we are in the special "init.scope" unit in the root slice. */
    2920   [ +  +  +  -  :        220 :         scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
          -  +  -  +  +  
                +  +  - ]
    2921                 :         44 :         r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
    2922         [ -  + ]:         44 :         if (r >= 0) {
    2923                 :            :                 /* Also, move all other userspace processes remaining in the root cgroup into that scope. */
    2924                 :          0 :                 r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
    2925         [ #  # ]:          0 :                 if (r < 0)
    2926         [ #  # ]:          0 :                         log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
    2927                 :            : 
    2928                 :            :                 /* 6. And pin it, so that it cannot be unmounted */
    2929                 :          0 :                 safe_close(m->pin_cgroupfs_fd);
    2930                 :          0 :                 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
    2931         [ #  # ]:          0 :                 if (m->pin_cgroupfs_fd < 0)
    2932         [ #  # ]:          0 :                         return log_error_errno(errno, "Failed to open pin file: %m");
    2933                 :            : 
    2934         [ -  + ]:         44 :         } else if (!MANAGER_IS_TEST_RUN(m))
    2935         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
    2936                 :            : 
    2937                 :            :         /* 7. Always enable hierarchical support if it exists... */
    2938   [ +  -  -  + ]:         44 :         if (!all_unified && !MANAGER_IS_TEST_RUN(m))
    2939                 :          0 :                 (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
    2940                 :            : 
    2941                 :            :         /* 8. Figure out which controllers are supported */
    2942                 :         44 :         r = cg_mask_supported(&m->cgroup_supported);
    2943         [ -  + ]:         44 :         if (r < 0)
    2944         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to determine supported controllers: %m");
    2945                 :            : 
    2946                 :            :         /* 9. Figure out which bpf-based pseudo-controllers are supported */
    2947                 :         44 :         r = cg_bpf_mask_supported(&mask);
    2948         [ -  + ]:         44 :         if (r < 0)
    2949         [ #  # ]:          0 :                 return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m");
    2950                 :         44 :         m->cgroup_supported |= mask;
    2951                 :            : 
    2952                 :            :         /* 10. Log which controllers are supported */
    2953         [ +  + ]:        440 :         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
    2954         [ +  + ]:        396 :                 log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
    2955                 :            : 
    2956                 :         44 :         return 0;
    2957                 :            : }
    2958                 :            : 
    2959                 :         56 : void manager_shutdown_cgroup(Manager *m, bool delete) {
    2960         [ -  + ]:         56 :         assert(m);
    2961                 :            : 
    2962                 :            :         /* We can't really delete the group, since we are in it. But
    2963                 :            :          * let's trim it. */
    2964   [ -  +  #  #  :         56 :         if (delete && m->cgroup_root && m->test_run_flags != MANAGER_TEST_RUN_MINIMAL)
                   #  # ]
    2965                 :          0 :                 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
    2966                 :            : 
    2967                 :         56 :         m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source);
    2968                 :            : 
    2969                 :         56 :         m->cgroup_control_inotify_wd_unit = hashmap_free(m->cgroup_control_inotify_wd_unit);
    2970                 :         56 :         m->cgroup_memory_inotify_wd_unit = hashmap_free(m->cgroup_memory_inotify_wd_unit);
    2971                 :            : 
    2972                 :         56 :         m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
    2973                 :         56 :         m->cgroup_inotify_fd = safe_close(m->cgroup_inotify_fd);
    2974                 :            : 
    2975                 :         56 :         m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
    2976                 :            : 
    2977                 :         56 :         m->cgroup_root = mfree(m->cgroup_root);
    2978                 :         56 : }
    2979                 :            : 
    2980                 :          8 : Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
    2981                 :            :         char *p;
    2982                 :            :         Unit *u;
    2983                 :            : 
    2984         [ -  + ]:          8 :         assert(m);
    2985         [ -  + ]:          8 :         assert(cgroup);
    2986                 :            : 
    2987                 :          8 :         u = hashmap_get(m->cgroup_unit, cgroup);
    2988         [ -  + ]:          8 :         if (u)
    2989                 :          0 :                 return u;
    2990                 :            : 
    2991                 :          8 :         p = strdupa(cgroup);
    2992                 :         16 :         for (;;) {
    2993                 :            :                 char *e;
    2994                 :            : 
    2995                 :         24 :                 e = strrchr(p, '/');
    2996   [ +  -  +  + ]:         24 :                 if (!e || e == p)
    2997                 :          8 :                         return hashmap_get(m->cgroup_unit, SPECIAL_ROOT_SLICE);
    2998                 :            : 
    2999                 :         16 :                 *e = 0;
    3000                 :            : 
    3001                 :         16 :                 u = hashmap_get(m->cgroup_unit, p);
    3002         [ -  + ]:         16 :                 if (u)
    3003                 :          0 :                         return u;
    3004                 :            :         }
    3005                 :            : }
    3006                 :            : 
    3007                 :          8 : Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) {
    3008                 :          8 :         _cleanup_free_ char *cgroup = NULL;
    3009                 :            : 
    3010         [ -  + ]:          8 :         assert(m);
    3011                 :            : 
    3012         [ -  + ]:          8 :         if (!pid_is_valid(pid))
    3013                 :          0 :                 return NULL;
    3014                 :            : 
    3015         [ -  + ]:          8 :         if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup) < 0)
    3016                 :          0 :                 return NULL;
    3017                 :            : 
    3018                 :          8 :         return manager_get_unit_by_cgroup(m, cgroup);
    3019                 :            : }
    3020                 :            : 
    3021                 :          0 : Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
    3022                 :            :         Unit *u, **array;
    3023                 :            : 
    3024         [ #  # ]:          0 :         assert(m);
    3025                 :            : 
    3026                 :            :         /* Note that a process might be owned by multiple units, we return only one here, which is good enough for most
    3027                 :            :          * cases, though not strictly correct. We prefer the one reported by cgroup membership, as that's the most
    3028                 :            :          * relevant one as children of the process will be assigned to that one, too, before all else. */
    3029                 :            : 
    3030         [ #  # ]:          0 :         if (!pid_is_valid(pid))
    3031                 :          0 :                 return NULL;
    3032                 :            : 
    3033         [ #  # ]:          0 :         if (pid == getpid_cached())
    3034                 :          0 :                 return hashmap_get(m->units, SPECIAL_INIT_SCOPE);
    3035                 :            : 
    3036                 :          0 :         u = manager_get_unit_by_pid_cgroup(m, pid);
    3037         [ #  # ]:          0 :         if (u)
    3038                 :          0 :                 return u;
    3039                 :            : 
    3040                 :          0 :         u = hashmap_get(m->watch_pids, PID_TO_PTR(pid));
    3041         [ #  # ]:          0 :         if (u)
    3042                 :          0 :                 return u;
    3043                 :            : 
    3044                 :          0 :         array = hashmap_get(m->watch_pids, PID_TO_PTR(-pid));
    3045         [ #  # ]:          0 :         if (array)
    3046                 :          0 :                 return array[0];
    3047                 :            : 
    3048                 :          0 :         return NULL;
    3049                 :            : }
    3050                 :            : 
    3051                 :          0 : int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
    3052                 :            :         Unit *u;
    3053                 :            : 
    3054         [ #  # ]:          0 :         assert(m);
    3055         [ #  # ]:          0 :         assert(cgroup);
    3056                 :            : 
    3057                 :            :         /* Called on the legacy hierarchy whenever we get an explicit cgroup notification from the cgroup agent process
    3058                 :            :          * or from the --system instance */
    3059                 :            : 
    3060         [ #  # ]:          0 :         log_debug("Got cgroup empty notification for: %s", cgroup);
    3061                 :            : 
    3062                 :          0 :         u = manager_get_unit_by_cgroup(m, cgroup);
    3063         [ #  # ]:          0 :         if (!u)
    3064                 :          0 :                 return 0;
    3065                 :            : 
    3066                 :          0 :         unit_add_to_cgroup_empty_queue(u);
    3067                 :          0 :         return 1;
    3068                 :            : }
    3069                 :            : 
    3070                 :          0 : int unit_get_memory_current(Unit *u, uint64_t *ret) {
    3071                 :          0 :         _cleanup_free_ char *v = NULL;
    3072                 :            :         int r;
    3073                 :            : 
    3074         [ #  # ]:          0 :         assert(u);
    3075         [ #  # ]:          0 :         assert(ret);
    3076                 :            : 
    3077   [ #  #  #  # ]:          0 :         if (!UNIT_CGROUP_BOOL(u, memory_accounting))
    3078                 :          0 :                 return -ENODATA;
    3079                 :            : 
    3080         [ #  # ]:          0 :         if (!u->cgroup_path)
    3081                 :          0 :                 return -ENODATA;
    3082                 :            : 
    3083                 :            :         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
    3084         [ #  # ]:          0 :         if (unit_has_host_root_cgroup(u))
    3085                 :          0 :                 return procfs_memory_get_used(ret);
    3086                 :            : 
    3087         [ #  # ]:          0 :         if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
    3088                 :          0 :                 return -ENODATA;
    3089                 :            : 
    3090                 :          0 :         r = cg_all_unified();
    3091         [ #  # ]:          0 :         if (r < 0)
    3092                 :          0 :                 return r;
    3093         [ #  # ]:          0 :         if (r > 0)
    3094                 :          0 :                 r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v);
    3095                 :            :         else
    3096                 :          0 :                 r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v);
    3097         [ #  # ]:          0 :         if (r == -ENOENT)
    3098                 :          0 :                 return -ENODATA;
    3099         [ #  # ]:          0 :         if (r < 0)
    3100                 :          0 :                 return r;
    3101                 :            : 
    3102                 :          0 :         return safe_atou64(v, ret);
    3103                 :            : }
    3104                 :            : 
    3105                 :          0 : int unit_get_tasks_current(Unit *u, uint64_t *ret) {
    3106                 :          0 :         _cleanup_free_ char *v = NULL;
    3107                 :            :         int r;
    3108                 :            : 
    3109         [ #  # ]:          0 :         assert(u);
    3110         [ #  # ]:          0 :         assert(ret);
    3111                 :            : 
    3112   [ #  #  #  # ]:          0 :         if (!UNIT_CGROUP_BOOL(u, tasks_accounting))
    3113                 :          0 :                 return -ENODATA;
    3114                 :            : 
    3115         [ #  # ]:          0 :         if (!u->cgroup_path)
    3116                 :          0 :                 return -ENODATA;
    3117                 :            : 
    3118                 :            :         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
    3119         [ #  # ]:          0 :         if (unit_has_host_root_cgroup(u))
    3120                 :          0 :                 return procfs_tasks_get_current(ret);
    3121                 :            : 
    3122         [ #  # ]:          0 :         if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
    3123                 :          0 :                 return -ENODATA;
    3124                 :            : 
    3125                 :          0 :         r = cg_get_attribute("pids", u->cgroup_path, "pids.current", &v);
    3126         [ #  # ]:          0 :         if (r == -ENOENT)
    3127                 :          0 :                 return -ENODATA;
    3128         [ #  # ]:          0 :         if (r < 0)
    3129                 :          0 :                 return r;
    3130                 :            : 
    3131                 :          0 :         return safe_atou64(v, ret);
    3132                 :            : }
    3133                 :            : 
    3134                 :         24 : static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
    3135                 :         24 :         _cleanup_free_ char *v = NULL;
    3136                 :            :         uint64_t ns;
    3137                 :            :         int r;
    3138                 :            : 
    3139         [ -  + ]:         24 :         assert(u);
    3140         [ -  + ]:         24 :         assert(ret);
    3141                 :            : 
    3142         [ -  + ]:         24 :         if (!u->cgroup_path)
    3143                 :          0 :                 return -ENODATA;
    3144                 :            : 
    3145                 :            :         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
    3146         [ -  + ]:         24 :         if (unit_has_host_root_cgroup(u))
    3147                 :          0 :                 return procfs_cpu_get_usage(ret);
    3148                 :            : 
    3149                 :            :         /* Requisite controllers for CPU accounting are not enabled */
    3150         [ +  - ]:         24 :         if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
    3151                 :         24 :                 return -ENODATA;
    3152                 :            : 
    3153                 :          0 :         r = cg_all_unified();
    3154         [ #  # ]:          0 :         if (r < 0)
    3155                 :          0 :                 return r;
    3156         [ #  # ]:          0 :         if (r > 0) {
    3157         [ #  # ]:          0 :                 _cleanup_free_ char *val = NULL;
    3158                 :            :                 uint64_t us;
    3159                 :            : 
    3160                 :          0 :                 r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
    3161   [ #  #  #  # ]:          0 :                 if (IN_SET(r, -ENOENT, -ENXIO))
    3162                 :          0 :                         return -ENODATA;
    3163         [ #  # ]:          0 :                 if (r < 0)
    3164                 :          0 :                         return r;
    3165                 :            : 
    3166                 :          0 :                 r = safe_atou64(val, &us);
    3167         [ #  # ]:          0 :                 if (r < 0)
    3168                 :          0 :                         return r;
    3169                 :            : 
    3170                 :          0 :                 ns = us * NSEC_PER_USEC;
    3171                 :            :         } else {
    3172                 :          0 :                 r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
    3173         [ #  # ]:          0 :                 if (r == -ENOENT)
    3174                 :          0 :                         return -ENODATA;
    3175         [ #  # ]:          0 :                 if (r < 0)
    3176                 :          0 :                         return r;
    3177                 :            : 
    3178                 :          0 :                 r = safe_atou64(v, &ns);
    3179         [ #  # ]:          0 :                 if (r < 0)
    3180                 :          0 :                         return r;
    3181                 :            :         }
    3182                 :            : 
    3183                 :          0 :         *ret = ns;
    3184                 :          0 :         return 0;
    3185                 :            : }
    3186                 :            : 
    3187                 :         28 : int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
    3188                 :            :         nsec_t ns;
    3189                 :            :         int r;
    3190                 :            : 
    3191         [ -  + ]:         28 :         assert(u);
    3192                 :            : 
    3193                 :            :         /* Retrieve the current CPU usage counter. This will subtract the CPU counter taken when the unit was
    3194                 :            :          * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
    3195                 :            :          * call this function with a NULL return value. */
    3196                 :            : 
    3197   [ -  +  +  - ]:         28 :         if (!UNIT_CGROUP_BOOL(u, cpu_accounting))
    3198                 :         28 :                 return -ENODATA;
    3199                 :            : 
    3200                 :          0 :         r = unit_get_cpu_usage_raw(u, &ns);
    3201   [ #  #  #  # ]:          0 :         if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
    3202                 :            :                 /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
    3203                 :            :                  * cached value. */
    3204                 :            : 
    3205         [ #  # ]:          0 :                 if (ret)
    3206                 :          0 :                         *ret = u->cpu_usage_last;
    3207                 :          0 :                 return 0;
    3208                 :            :         }
    3209         [ #  # ]:          0 :         if (r < 0)
    3210                 :          0 :                 return r;
    3211                 :            : 
    3212         [ #  # ]:          0 :         if (ns > u->cpu_usage_base)
    3213                 :          0 :                 ns -= u->cpu_usage_base;
    3214                 :            :         else
    3215                 :          0 :                 ns = 0;
    3216                 :            : 
    3217                 :          0 :         u->cpu_usage_last = ns;
    3218         [ #  # ]:          0 :         if (ret)
    3219                 :          0 :                 *ret = ns;
    3220                 :            : 
    3221                 :          0 :         return 0;
    3222                 :            : }
    3223                 :            : 
    3224                 :        112 : int unit_get_ip_accounting(
    3225                 :            :                 Unit *u,
    3226                 :            :                 CGroupIPAccountingMetric metric,
    3227                 :            :                 uint64_t *ret) {
    3228                 :            : 
    3229                 :            :         uint64_t value;
    3230                 :            :         int fd, r;
    3231                 :            : 
    3232         [ -  + ]:        112 :         assert(u);
    3233         [ -  + ]:        112 :         assert(metric >= 0);
    3234         [ -  + ]:        112 :         assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
    3235         [ -  + ]:        112 :         assert(ret);
    3236                 :            : 
    3237   [ -  +  +  - ]:        112 :         if (!UNIT_CGROUP_BOOL(u, ip_accounting))
    3238                 :        112 :                 return -ENODATA;
    3239                 :            : 
    3240         [ #  # ]:          0 :         fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
    3241         [ #  # ]:          0 :                 u->ip_accounting_ingress_map_fd :
    3242                 :            :                 u->ip_accounting_egress_map_fd;
    3243         [ #  # ]:          0 :         if (fd < 0)
    3244                 :          0 :                 return -ENODATA;
    3245                 :            : 
    3246   [ #  #  #  # ]:          0 :         if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
    3247                 :          0 :                 r = bpf_firewall_read_accounting(fd, &value, NULL);
    3248                 :            :         else
    3249                 :          0 :                 r = bpf_firewall_read_accounting(fd, NULL, &value);
    3250         [ #  # ]:          0 :         if (r < 0)
    3251                 :          0 :                 return r;
    3252                 :            : 
    3253                 :            :         /* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
    3254                 :            :          * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
    3255                 :            :          * ip_accounting_extra[] field, and add them in here transparently. */
    3256                 :            : 
    3257                 :          0 :         *ret = value + u->ip_accounting_extra[metric];
    3258                 :            : 
    3259                 :          0 :         return r;
    3260                 :            : }
    3261                 :            : 
    3262                 :         24 : static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_ACCOUNTING_METRIC_MAX]) {
    3263                 :            :         static const char *const field_names[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
    3264                 :            :                 [CGROUP_IO_READ_BYTES]       = "rbytes=",
    3265                 :            :                 [CGROUP_IO_WRITE_BYTES]      = "wbytes=",
    3266                 :            :                 [CGROUP_IO_READ_OPERATIONS]  = "rios=",
    3267                 :            :                 [CGROUP_IO_WRITE_OPERATIONS] = "wios=",
    3268                 :            :         };
    3269                 :         24 :         uint64_t acc[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {};
    3270                 :         24 :         _cleanup_free_ char *path = NULL;
    3271                 :         24 :         _cleanup_fclose_ FILE *f = NULL;
    3272                 :            :         int r;
    3273                 :            : 
    3274         [ -  + ]:         24 :         assert(u);
    3275                 :            : 
    3276         [ -  + ]:         24 :         if (!u->cgroup_path)
    3277                 :          0 :                 return -ENODATA;
    3278                 :            : 
    3279         [ -  + ]:         24 :         if (unit_has_host_root_cgroup(u))
    3280                 :          0 :                 return -ENODATA; /* TODO: return useful data for the top-level cgroup */
    3281                 :            : 
    3282                 :         24 :         r = cg_all_unified();
    3283         [ -  + ]:         24 :         if (r < 0)
    3284                 :          0 :                 return r;
    3285         [ +  - ]:         24 :         if (r == 0) /* TODO: support cgroupv1 */
    3286                 :         24 :                 return -ENODATA;
    3287                 :            : 
    3288         [ #  # ]:          0 :         if (!FLAGS_SET(u->cgroup_realized_mask, CGROUP_MASK_IO))
    3289                 :          0 :                 return -ENODATA;
    3290                 :            : 
    3291                 :          0 :         r = cg_get_path("io", u->cgroup_path, "io.stat", &path);
    3292         [ #  # ]:          0 :         if (r < 0)
    3293                 :          0 :                 return r;
    3294                 :            : 
    3295                 :          0 :         f = fopen(path, "re");
    3296         [ #  # ]:          0 :         if (!f)
    3297                 :          0 :                 return -errno;
    3298                 :            : 
    3299                 :          0 :         for (;;) {
    3300      [ #  #  # ]:          0 :                 _cleanup_free_ char *line = NULL;
    3301                 :            :                 const char *p;
    3302                 :            : 
    3303                 :          0 :                 r = read_line(f, LONG_LINE_MAX, &line);
    3304         [ #  # ]:          0 :                 if (r < 0)
    3305                 :          0 :                         return r;
    3306         [ #  # ]:          0 :                 if (r == 0)
    3307                 :          0 :                         break;
    3308                 :            : 
    3309                 :          0 :                 p = line;
    3310                 :          0 :                 p += strcspn(p, WHITESPACE); /* Skip over device major/minor */
    3311                 :          0 :                 p += strspn(p, WHITESPACE);  /* Skip over following whitespace */
    3312                 :            : 
    3313                 :          0 :                 for (;;) {
    3314      [ #  #  # ]:          0 :                         _cleanup_free_ char *word = NULL;
    3315                 :            : 
    3316                 :          0 :                         r = extract_first_word(&p, &word, NULL, EXTRACT_RETAIN_ESCAPE);
    3317         [ #  # ]:          0 :                         if (r < 0)
    3318                 :          0 :                                 return r;
    3319         [ #  # ]:          0 :                         if (r == 0)
    3320                 :          0 :                                 break;
    3321                 :            : 
    3322         [ #  # ]:          0 :                         for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) {
    3323                 :            :                                 const char *x;
    3324                 :            : 
    3325                 :          0 :                                 x = startswith(word, field_names[i]);
    3326         [ #  # ]:          0 :                                 if (x) {
    3327                 :            :                                         uint64_t w;
    3328                 :            : 
    3329                 :          0 :                                         r = safe_atou64(x, &w);
    3330         [ #  # ]:          0 :                                         if (r < 0)
    3331                 :          0 :                                                 return r;
    3332                 :            : 
    3333                 :            :                                         /* Sum up the stats of all devices */
    3334                 :          0 :                                         acc[i] += w;
    3335                 :          0 :                                         break;
    3336                 :            :                                 }
    3337                 :            :                         }
    3338                 :            :                 }
    3339                 :            :         }
    3340                 :            : 
    3341                 :          0 :         memcpy(ret, acc, sizeof(acc));
    3342                 :          0 :         return 0;
    3343                 :            : }
    3344                 :            : 
    3345                 :        112 : int unit_get_io_accounting(
    3346                 :            :                 Unit *u,
    3347                 :            :                 CGroupIOAccountingMetric metric,
    3348                 :            :                 bool allow_cache,
    3349                 :            :                 uint64_t *ret) {
    3350                 :            : 
    3351                 :            :         uint64_t raw[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
    3352                 :            :         int r;
    3353                 :            : 
    3354                 :            :         /* Retrieve an IO account parameter. This will subtract the counter when the unit was started. */
    3355                 :            : 
    3356   [ -  +  +  - ]:        112 :         if (!UNIT_CGROUP_BOOL(u, io_accounting))
    3357                 :        112 :                 return -ENODATA;
    3358                 :            : 
    3359   [ #  #  #  # ]:          0 :         if (allow_cache && u->io_accounting_last[metric] != UINT64_MAX)
    3360                 :          0 :                 goto done;
    3361                 :            : 
    3362                 :          0 :         r = unit_get_io_accounting_raw(u, raw);
    3363   [ #  #  #  # ]:          0 :         if (r == -ENODATA && u->io_accounting_last[metric] != UINT64_MAX)
    3364                 :          0 :                 goto done;
    3365         [ #  # ]:          0 :         if (r < 0)
    3366                 :          0 :                 return r;
    3367                 :            : 
    3368         [ #  # ]:          0 :         for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) {
    3369                 :            :                 /* Saturated subtraction */
    3370         [ #  # ]:          0 :                 if (raw[i] > u->io_accounting_base[i])
    3371                 :          0 :                         u->io_accounting_last[i] = raw[i] - u->io_accounting_base[i];
    3372                 :            :                 else
    3373                 :          0 :                         u->io_accounting_last[i] = 0;
    3374                 :            :         }
    3375                 :            : 
    3376                 :          0 : done:
    3377         [ #  # ]:          0 :         if (ret)
    3378                 :          0 :                 *ret = u->io_accounting_last[metric];
    3379                 :            : 
    3380                 :          0 :         return 0;
    3381                 :            : }
    3382                 :            : 
    3383                 :         24 : int unit_reset_cpu_accounting(Unit *u) {
    3384                 :            :         int r;
    3385                 :            : 
    3386         [ -  + ]:         24 :         assert(u);
    3387                 :            : 
    3388                 :         24 :         u->cpu_usage_last = NSEC_INFINITY;
    3389                 :            : 
    3390                 :         24 :         r = unit_get_cpu_usage_raw(u, &u->cpu_usage_base);
    3391         [ +  - ]:         24 :         if (r < 0) {
    3392                 :         24 :                 u->cpu_usage_base = 0;
    3393                 :         24 :                 return r;
    3394                 :            :         }
    3395                 :            : 
    3396                 :          0 :         return 0;
    3397                 :            : }
    3398                 :            : 
    3399                 :         24 : int unit_reset_ip_accounting(Unit *u) {
    3400                 :         24 :         int r = 0, q = 0;
    3401                 :            : 
    3402         [ -  + ]:         24 :         assert(u);
    3403                 :            : 
    3404         [ -  + ]:         24 :         if (u->ip_accounting_ingress_map_fd >= 0)
    3405                 :          0 :                 r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
    3406                 :            : 
    3407         [ -  + ]:         24 :         if (u->ip_accounting_egress_map_fd >= 0)
    3408                 :          0 :                 q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
    3409                 :            : 
    3410         [ +  - ]:         24 :         zero(u->ip_accounting_extra);
    3411                 :            : 
    3412         [ -  + ]:         24 :         return r < 0 ? r : q;
    3413                 :            : }
    3414                 :            : 
    3415                 :         24 : int unit_reset_io_accounting(Unit *u) {
    3416                 :            :         int r;
    3417                 :            : 
    3418         [ -  + ]:         24 :         assert(u);
    3419                 :            : 
    3420         [ +  + ]:        120 :         for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
    3421                 :         96 :                 u->io_accounting_last[i] = UINT64_MAX;
    3422                 :            : 
    3423                 :         24 :         r = unit_get_io_accounting_raw(u, u->io_accounting_base);
    3424         [ +  - ]:         24 :         if (r < 0) {
    3425         [ +  - ]:         24 :                 zero(u->io_accounting_base);
    3426                 :         24 :                 return r;
    3427                 :            :         }
    3428                 :            : 
    3429                 :          0 :         return 0;
    3430                 :            : }
    3431                 :            : 
    3432                 :         24 : int unit_reset_accounting(Unit *u) {
    3433                 :            :         int r, q, v;
    3434                 :            : 
    3435         [ -  + ]:         24 :         assert(u);
    3436                 :            : 
    3437                 :         24 :         r = unit_reset_cpu_accounting(u);
    3438                 :         24 :         q = unit_reset_io_accounting(u);
    3439                 :         24 :         v = unit_reset_ip_accounting(u);
    3440                 :            : 
    3441   [ -  +  #  # ]:         24 :         return r < 0 ? r : q < 0 ? q : v;
    3442                 :            : }
    3443                 :            : 
    3444                 :          0 : void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
    3445         [ #  # ]:          0 :         assert(u);
    3446                 :            : 
    3447         [ #  # ]:          0 :         if (!UNIT_HAS_CGROUP_CONTEXT(u))
    3448                 :          0 :                 return;
    3449                 :            : 
    3450         [ #  # ]:          0 :         if (m == 0)
    3451                 :          0 :                 return;
    3452                 :            : 
    3453                 :            :         /* always invalidate compat pairs together */
    3454         [ #  # ]:          0 :         if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
    3455                 :          0 :                 m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
    3456                 :            : 
    3457         [ #  # ]:          0 :         if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
    3458                 :          0 :                 m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
    3459                 :            : 
    3460         [ #  # ]:          0 :         if (FLAGS_SET(u->cgroup_invalidated_mask, m)) /* NOP? */
    3461                 :          0 :                 return;
    3462                 :            : 
    3463                 :          0 :         u->cgroup_invalidated_mask |= m;
    3464                 :          0 :         unit_add_to_cgroup_realize_queue(u);
    3465                 :            : }
    3466                 :            : 
    3467                 :          0 : void unit_invalidate_cgroup_bpf(Unit *u) {
    3468         [ #  # ]:          0 :         assert(u);
    3469                 :            : 
    3470         [ #  # ]:          0 :         if (!UNIT_HAS_CGROUP_CONTEXT(u))
    3471                 :          0 :                 return;
    3472                 :            : 
    3473         [ #  # ]:          0 :         if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
    3474                 :          0 :                 return;
    3475                 :            : 
    3476                 :          0 :         u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
    3477                 :          0 :         unit_add_to_cgroup_realize_queue(u);
    3478                 :            : 
    3479                 :            :         /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
    3480                 :            :          * list of our children includes our own. */
    3481         [ #  # ]:          0 :         if (u->type == UNIT_SLICE) {
    3482                 :            :                 Unit *member;
    3483                 :            :                 Iterator i;
    3484                 :            :                 void *v;
    3485                 :            : 
    3486         [ #  # ]:          0 :                 HASHMAP_FOREACH_KEY(v, member, u->dependencies[UNIT_BEFORE], i) {
    3487         [ #  # ]:          0 :                         if (UNIT_DEREF(member->slice) == u)
    3488                 :          0 :                                 unit_invalidate_cgroup_bpf(member);
    3489                 :            :                 }
    3490                 :            :         }
    3491                 :            : }
    3492                 :            : 
    3493                 :       7148 : bool unit_cgroup_delegate(Unit *u) {
    3494                 :            :         CGroupContext *c;
    3495                 :            : 
    3496         [ -  + ]:       7148 :         assert(u);
    3497                 :            : 
    3498         [ +  + ]:       7148 :         if (!UNIT_VTABLE(u)->can_delegate)
    3499                 :       6344 :                 return false;
    3500                 :            : 
    3501                 :        804 :         c = unit_get_cgroup_context(u);
    3502         [ -  + ]:        804 :         if (!c)
    3503                 :          0 :                 return false;
    3504                 :            : 
    3505                 :        804 :         return c->delegate;
    3506                 :            : }
    3507                 :            : 
    3508                 :          4 : void manager_invalidate_startup_units(Manager *m) {
    3509                 :            :         Iterator i;
    3510                 :            :         Unit *u;
    3511                 :            : 
    3512         [ -  + ]:          4 :         assert(m);
    3513                 :            : 
    3514         [ -  + ]:          4 :         SET_FOREACH(u, m->startup_units, i)
    3515                 :          0 :                 unit_invalidate_cgroup(u, CGROUP_MASK_CPU|CGROUP_MASK_IO|CGROUP_MASK_BLKIO);
    3516                 :          4 : }
    3517                 :            : 
    3518                 :        448 : static int unit_get_nice(Unit *u) {
    3519                 :            :         ExecContext *ec;
    3520                 :            : 
    3521                 :        448 :         ec = unit_get_exec_context(u);
    3522         [ +  + ]:        448 :         return ec ? ec->nice : 0;
    3523                 :            : }
    3524                 :            : 
    3525                 :        448 : static uint64_t unit_get_cpu_weight(Unit *u) {
    3526                 :        448 :         ManagerState state = manager_state(u->manager);
    3527                 :            :         CGroupContext *cc;
    3528                 :            : 
    3529                 :        448 :         cc = unit_get_cgroup_context(u);
    3530         [ +  + ]:        448 :         return cc ? cgroup_context_cpu_weight(cc, state) : CGROUP_WEIGHT_DEFAULT;
    3531                 :            : }
    3532                 :            : 
    3533                 :        505 : int compare_job_priority(const void *a, const void *b) {
    3534                 :        505 :         const Job *x = a, *y = b;
    3535                 :            :         int nice_x, nice_y;
    3536                 :            :         uint64_t weight_x, weight_y;
    3537                 :            :         int ret;
    3538                 :            : 
    3539   [ +  +  +  + ]:        505 :         if ((ret = CMP(x->unit->type, y->unit->type)) != 0)
    3540                 :        281 :                 return -ret;
    3541                 :            : 
    3542                 :        224 :         weight_x = unit_get_cpu_weight(x->unit);
    3543                 :        224 :         weight_y = unit_get_cpu_weight(y->unit);
    3544                 :            : 
    3545   [ +  -  -  + ]:        224 :         if ((ret = CMP(weight_x, weight_y)) != 0)
    3546                 :          0 :                 return -ret;
    3547                 :            : 
    3548                 :        224 :         nice_x = unit_get_nice(x->unit);
    3549                 :        224 :         nice_y = unit_get_nice(y->unit);
    3550                 :            : 
    3551   [ +  -  -  + ]:        224 :         if ((ret = CMP(nice_x, nice_y)) != 0)
    3552                 :          0 :                 return ret;
    3553                 :            : 
    3554                 :        224 :         return strcmp(x->unit->id, y->unit->id);
    3555                 :            : }
    3556                 :            : 
    3557                 :            : static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
    3558                 :            :         [CGROUP_AUTO] = "auto",
    3559                 :            :         [CGROUP_CLOSED] = "closed",
    3560                 :            :         [CGROUP_STRICT] = "strict",
    3561                 :            : };
    3562                 :            : 
    3563   [ +  +  +  + ]:        768 : DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);

Generated by: LCOV version 1.14