1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <unistd.h>
4 
5 #include "cgroup-setup.h"
6 #include "cgroup-util.h"
7 #include "errno-util.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "fs-util.h"
11 #include "mkdir.h"
12 #include "parse-util.h"
13 #include "path-util.h"
14 #include "proc-cmdline.h"
15 #include "process-util.h"
16 #include "recurse-dir.h"
17 #include "stdio-util.h"
18 #include "string-util.h"
19 #include "user-util.h"
20 #include "virt.h"
21 
cg_any_controller_used_for_v1(void)22 static int cg_any_controller_used_for_v1(void) {
23         _cleanup_free_ char *buf = NULL;
24         _cleanup_strv_free_ char **lines = NULL;
25         int r;
26 
27         r = read_full_virtual_file("/proc/cgroups", &buf, NULL);
28         if (r < 0)
29                 return log_debug_errno(r, "Could not read /proc/cgroups, ignoring: %m");
30 
31         r = strv_split_newlines_full(&lines, buf, 0);
32         if (r < 0)
33                 return r;
34 
35         /* The intention of this is to check if the fully unified cgroup tree setup is possible, meaning all
36          * enabled kernel cgroup controllers are currently not in use by cgroup1.  For reference:
37          * https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups-
38          *
39          * Note that this is typically only useful to check inside a container where we don't know what
40          * cgroup tree setup is in use by the host; if the host is using legacy or hybrid, we can't use
41          * unified since some or all controllers would be missing. This is not the best way to detect this,
42          * as whatever container manager created our container should have mounted /sys/fs/cgroup
43          * appropriately, but in case that wasn't done, we try to detect if it's possible for us to use
44          * unified cgroups. */
45         STRV_FOREACH(line, lines) {
46                 _cleanup_free_ char *name = NULL, *hierarchy_id = NULL, *num = NULL, *enabled = NULL;
47 
48                 /* Skip header line */
49                 if (startswith(*line, "#"))
50                         continue;
51 
52                 const char *p = *line;
53                 r = extract_many_words(&p, NULL, 0, &name, &hierarchy_id, &num, &enabled, NULL);
54                 if (r < 0)
55                         return log_debug_errno(r, "Error parsing /proc/cgroups line, ignoring: %m");
56                 else if (r < 4) {
57                         log_debug("Invalid /proc/cgroups line, ignoring.");
58                         continue;
59                 }
60 
61                 /* Ignore disabled controllers. */
62                 if (streq(enabled, "0"))
63                         continue;
64 
65                 /* Ignore controllers we don't care about. */
66                 if (cgroup_controller_from_string(name) < 0)
67                         continue;
68 
69                 /* Since the unified cgroup doesn't use multiple hierarchies, if any controller has a
70                  * non-zero hierarchy_id that means it's in use already in a legacy (or hybrid) cgroup v1
71                  * hierarchy, and can't be used in a unified cgroup. */
72                 if (!streq(hierarchy_id, "0")) {
73                         log_debug("Cgroup controller %s in use by legacy v1 hierarchy.", name);
74                         return 1;
75                 }
76         }
77 
78         return 0;
79 }
80 
cg_is_unified_wanted(void)81 bool cg_is_unified_wanted(void) {
82         static thread_local int wanted = -1;
83         bool b;
84         const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
85         _cleanup_free_ char *c = NULL;
86         int r;
87 
88         /* If we have a cached value, return that. */
89         if (wanted >= 0)
90                 return wanted;
91 
92         /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
93         r = cg_unified_cached(true);
94         if (r >= 0)
95                 return (wanted = r >= CGROUP_UNIFIED_ALL);
96 
97         /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
98         r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
99         if (r > 0)
100                 return (wanted = b);
101 
102         /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
103          * use hybrid or legacy hierarchy. */
104         r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
105         if (r > 0 && streq_ptr(c, "all"))
106                 return (wanted = true);
107 
108         /* If any controller is in use as v1, don't use unified. */
109         if (cg_any_controller_used_for_v1() > 0)
110                 return (wanted = false);
111 
112         return (wanted = is_default);
113 }
114 
cg_is_legacy_wanted(void)115 bool cg_is_legacy_wanted(void) {
116         static thread_local int wanted = -1;
117 
118         /* If we have a cached value, return that. */
119         if (wanted >= 0)
120                 return wanted;
121 
122         /* Check if we have cgroup v2 already mounted. */
123         if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
124                 return (wanted = false);
125 
126         /* Otherwise, assume that at least partial legacy is wanted,
127          * since cgroup v2 should already be mounted at this point. */
128         return (wanted = true);
129 }
130 
cg_is_hybrid_wanted(void)131 bool cg_is_hybrid_wanted(void) {
132         static thread_local int wanted = -1;
133         int r;
134         bool b;
135         const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
136         /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
137          * because if we get called, it means that unified hierarchy was not mounted. */
138 
139         /* If we have a cached value, return that. */
140         if (wanted >= 0)
141                 return wanted;
142 
143         /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
144         if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
145                 return (wanted = false);
146 
147         /* Otherwise, let's see what the kernel command line has to say.  Since checking is expensive, cache
148          * a non-error result. */
149         r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
150 
151         /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
152          * negation. */
153         return (wanted = r > 0 ? !b : is_default);
154 }
155 
cg_weight_parse(const char * s,uint64_t * ret)156 int cg_weight_parse(const char *s, uint64_t *ret) {
157         uint64_t u;
158         int r;
159 
160         if (isempty(s)) {
161                 *ret = CGROUP_WEIGHT_INVALID;
162                 return 0;
163         }
164 
165         r = safe_atou64(s, &u);
166         if (r < 0)
167                 return r;
168 
169         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
170                 return -ERANGE;
171 
172         *ret = u;
173         return 0;
174 }
175 
cg_cpu_shares_parse(const char * s,uint64_t * ret)176 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
177         uint64_t u;
178         int r;
179 
180         if (isempty(s)) {
181                 *ret = CGROUP_CPU_SHARES_INVALID;
182                 return 0;
183         }
184 
185         r = safe_atou64(s, &u);
186         if (r < 0)
187                 return r;
188 
189         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
190                 return -ERANGE;
191 
192         *ret = u;
193         return 0;
194 }
195 
cg_blkio_weight_parse(const char * s,uint64_t * ret)196 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
197         uint64_t u;
198         int r;
199 
200         if (isempty(s)) {
201                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
202                 return 0;
203         }
204 
205         r = safe_atou64(s, &u);
206         if (r < 0)
207                 return r;
208 
209         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
210                 return -ERANGE;
211 
212         *ret = u;
213         return 0;
214 }
215 
trim_cb(RecurseDirEvent event,const char * path,int dir_fd,int inode_fd,const struct dirent * de,const struct statx * sx,void * userdata)216 static int trim_cb(
217                 RecurseDirEvent event,
218                 const char *path,
219                 int dir_fd,
220                 int inode_fd,
221                 const struct dirent *de,
222                 const struct statx *sx,
223                 void *userdata) {
224 
225         /* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
226         if (event == RECURSE_DIR_LEAVE &&
227             de->d_type == DT_DIR &&
228             unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
229             !IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
230                 log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
231 
232         return RECURSE_DIR_CONTINUE;
233 }
234 
cg_trim(const char * controller,const char * path,bool delete_root)235 int cg_trim(const char *controller, const char *path, bool delete_root) {
236         _cleanup_free_ char *fs = NULL;
237         int r, q;
238 
239         assert(path);
240         assert(controller);
241 
242         r = cg_get_path(controller, path, NULL, &fs);
243         if (r < 0)
244                 return r;
245 
246         r = recurse_dir_at(
247                         AT_FDCWD,
248                         fs,
249                         /* statx_mask= */ 0,
250                         /* n_depth_max= */ UINT_MAX,
251                         RECURSE_DIR_ENSURE_TYPE,
252                         trim_cb,
253                         NULL);
254         if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
255                 r = 0;
256         else if (r < 0)
257                 log_debug_errno(r, "Failed to iterate through cgroup %s: %m", path);
258 
259         /* If we shall delete the top-level cgroup, then propagate the faiure to do so (except if it is
260          * already gone anyway). Also, let's debug log about this failure, except if the error code is an
261          * expected one. */
262         if (delete_root && !empty_or_root(path) &&
263             rmdir(fs) < 0 && errno != ENOENT) {
264                 if (!IN_SET(errno, ENOTEMPTY, EBUSY))
265                         log_debug_errno(errno, "Failed to trim cgroup %s: %m", path);
266                 if (r >= 0)
267                         r = -errno;
268         }
269 
270         q = cg_hybrid_unified();
271         if (q < 0)
272                 return q;
273         if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER))
274                 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
275 
276         return r;
277 }
278 
279 /* Create a cgroup in the hierarchy of controller.
280  * Returns 0 if the group already existed, 1 on success, negative otherwise.
281  */
cg_create(const char * controller,const char * path)282 int cg_create(const char *controller, const char *path) {
283         _cleanup_free_ char *fs = NULL;
284         int r;
285 
286         r = cg_get_path_and_check(controller, path, NULL, &fs);
287         if (r < 0)
288                 return r;
289 
290         r = mkdir_parents(fs, 0755);
291         if (r < 0)
292                 return r;
293 
294         r = RET_NERRNO(mkdir(fs, 0755));
295         if (r == -EEXIST)
296                 return 0;
297         if (r < 0)
298                 return r;
299 
300         r = cg_hybrid_unified();
301         if (r < 0)
302                 return r;
303 
304         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
305                 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
306                 if (r < 0)
307                         log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
308         }
309 
310         return 1;
311 }
312 
cg_create_and_attach(const char * controller,const char * path,pid_t pid)313 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
314         int r, q;
315 
316         assert(pid >= 0);
317 
318         r = cg_create(controller, path);
319         if (r < 0)
320                 return r;
321 
322         q = cg_attach(controller, path, pid);
323         if (q < 0)
324                 return q;
325 
326         /* This does not remove the cgroup on failure */
327         return r;
328 }
329 
cg_attach(const char * controller,const char * path,pid_t pid)330 int cg_attach(const char *controller, const char *path, pid_t pid) {
331         _cleanup_free_ char *fs = NULL;
332         char c[DECIMAL_STR_MAX(pid_t) + 2];
333         int r;
334 
335         assert(path);
336         assert(pid >= 0);
337 
338         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
339         if (r < 0)
340                 return r;
341 
342         if (pid == 0)
343                 pid = getpid_cached();
344 
345         xsprintf(c, PID_FMT "\n", pid);
346 
347         r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
348         if (r == -EOPNOTSUPP && cg_is_threaded(controller, path) > 0)
349                 /* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
350                 return -EUCLEAN;
351         if (r < 0)
352                 return r;
353 
354         r = cg_hybrid_unified();
355         if (r < 0)
356                 return r;
357 
358         if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
359                 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
360                 if (r < 0)
361                         log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
362         }
363 
364         return 0;
365 }
366 
cg_attach_fallback(const char * controller,const char * path,pid_t pid)367 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
368         int r;
369 
370         assert(controller);
371         assert(path);
372         assert(pid >= 0);
373 
374         r = cg_attach(controller, path, pid);
375         if (r < 0) {
376                 char prefix[strlen(path) + 1];
377 
378                 /* This didn't work? Then let's try all prefixes of
379                  * the destination */
380 
381                 PATH_FOREACH_PREFIX(prefix, path) {
382                         int q;
383 
384                         q = cg_attach(controller, prefix, pid);
385                         if (q >= 0)
386                                 return q;
387                 }
388         }
389 
390         return r;
391 }
392 
cg_set_access(const char * controller,const char * path,uid_t uid,gid_t gid)393 int cg_set_access(
394                 const char *controller,
395                 const char *path,
396                 uid_t uid,
397                 gid_t gid) {
398 
399         struct Attribute {
400                 const char *name;
401                 bool fatal;
402         };
403 
404         /* cgroup v1, aka legacy/non-unified */
405         static const struct Attribute legacy_attributes[] = {
406                 { "cgroup.procs",           true  },
407                 { "tasks",                  false },
408                 { "cgroup.clone_children",  false },
409                 {},
410         };
411 
412         /* cgroup v2, aka unified */
413         static const struct Attribute unified_attributes[] = {
414                 { "cgroup.procs",           true  },
415                 { "cgroup.subtree_control", true  },
416                 { "cgroup.threads",         false },
417                 {},
418         };
419 
420         static const struct Attribute* const attributes[] = {
421                 [false] = legacy_attributes,
422                 [true]  = unified_attributes,
423         };
424 
425         _cleanup_free_ char *fs = NULL;
426         const struct Attribute *i;
427         int r, unified;
428 
429         assert(path);
430 
431         if (uid == UID_INVALID && gid == GID_INVALID)
432                 return 0;
433 
434         unified = cg_unified_controller(controller);
435         if (unified < 0)
436                 return unified;
437 
438         /* Configure access to the cgroup itself */
439         r = cg_get_path(controller, path, NULL, &fs);
440         if (r < 0)
441                 return r;
442 
443         r = chmod_and_chown(fs, 0755, uid, gid);
444         if (r < 0)
445                 return r;
446 
447         /* Configure access to the cgroup's attributes */
448         for (i = attributes[unified]; i->name; i++) {
449                 fs = mfree(fs);
450 
451                 r = cg_get_path(controller, path, i->name, &fs);
452                 if (r < 0)
453                         return r;
454 
455                 r = chmod_and_chown(fs, 0644, uid, gid);
456                 if (r < 0) {
457                         if (i->fatal)
458                                 return r;
459 
460                         log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
461                 }
462         }
463 
464         if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
465                 r = cg_hybrid_unified();
466                 if (r < 0)
467                         return r;
468                 if (r > 0) {
469                         /* Always propagate access mode from unified to legacy controller */
470                         r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
471                         if (r < 0)
472                                 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
473                 }
474         }
475 
476         return 0;
477 }
478 
cg_migrate(const char * cfrom,const char * pfrom,const char * cto,const char * pto,CGroupFlags flags)479 int cg_migrate(
480                 const char *cfrom,
481                 const char *pfrom,
482                 const char *cto,
483                 const char *pto,
484                 CGroupFlags flags) {
485 
486         bool done = false;
487         _cleanup_set_free_ Set *s = NULL;
488         int r, ret = 0;
489         pid_t my_pid;
490 
491         assert(cfrom);
492         assert(pfrom);
493         assert(cto);
494         assert(pto);
495 
496         s = set_new(NULL);
497         if (!s)
498                 return -ENOMEM;
499 
500         my_pid = getpid_cached();
501 
502         do {
503                 _cleanup_fclose_ FILE *f = NULL;
504                 pid_t pid = 0;
505                 done = true;
506 
507                 r = cg_enumerate_processes(cfrom, pfrom, &f);
508                 if (r < 0) {
509                         if (ret >= 0 && r != -ENOENT)
510                                 return r;
511 
512                         return ret;
513                 }
514 
515                 while ((r = cg_read_pid(f, &pid)) > 0) {
516 
517                         /* This might do weird stuff if we aren't a
518                          * single-threaded program. However, we
519                          * luckily know we are not */
520                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
521                                 continue;
522 
523                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
524                                 continue;
525 
526                         /* Ignore kernel threads. Since they can only
527                          * exist in the root cgroup, we only check for
528                          * them there. */
529                         if (cfrom &&
530                             empty_or_root(pfrom) &&
531                             is_kernel_thread(pid) > 0)
532                                 continue;
533 
534                         r = cg_attach(cto, pto, pid);
535                         if (r < 0) {
536                                 if (ret >= 0 && r != -ESRCH)
537                                         ret = r;
538                         } else if (ret == 0)
539                                 ret = 1;
540 
541                         done = false;
542 
543                         r = set_put(s, PID_TO_PTR(pid));
544                         if (r < 0) {
545                                 if (ret >= 0)
546                                         return r;
547 
548                                 return ret;
549                         }
550                 }
551 
552                 if (r < 0) {
553                         if (ret >= 0)
554                                 return r;
555 
556                         return ret;
557                 }
558         } while (!done);
559 
560         return ret;
561 }
562 
cg_migrate_recursive(const char * cfrom,const char * pfrom,const char * cto,const char * pto,CGroupFlags flags)563 int cg_migrate_recursive(
564                 const char *cfrom,
565                 const char *pfrom,
566                 const char *cto,
567                 const char *pto,
568                 CGroupFlags flags) {
569 
570         _cleanup_closedir_ DIR *d = NULL;
571         int r, ret = 0;
572         char *fn;
573 
574         assert(cfrom);
575         assert(pfrom);
576         assert(cto);
577         assert(pto);
578 
579         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
580 
581         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
582         if (r < 0) {
583                 if (ret >= 0 && r != -ENOENT)
584                         return r;
585 
586                 return ret;
587         }
588 
589         while ((r = cg_read_subgroup(d, &fn)) > 0) {
590                 _cleanup_free_ char *p = NULL;
591 
592                 p = path_join(empty_to_root(pfrom), fn);
593                 free(fn);
594                 if (!p)
595                         return -ENOMEM;
596 
597                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
598                 if (r != 0 && ret >= 0)
599                         ret = r;
600         }
601 
602         if (r < 0 && ret >= 0)
603                 ret = r;
604 
605         if (flags & CGROUP_REMOVE) {
606                 r = cg_rmdir(cfrom, pfrom);
607                 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
608                         return r;
609         }
610 
611         return ret;
612 }
613 
cg_migrate_recursive_fallback(const char * cfrom,const char * pfrom,const char * cto,const char * pto,CGroupFlags flags)614 int cg_migrate_recursive_fallback(
615                 const char *cfrom,
616                 const char *pfrom,
617                 const char *cto,
618                 const char *pto,
619                 CGroupFlags flags) {
620 
621         int r;
622 
623         assert(cfrom);
624         assert(pfrom);
625         assert(cto);
626         assert(pto);
627 
628         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
629         if (r < 0) {
630                 char prefix[strlen(pto) + 1];
631 
632                 /* This didn't work? Then let's try all prefixes of the destination */
633 
634                 PATH_FOREACH_PREFIX(prefix, pto) {
635                         int q;
636 
637                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
638                         if (q >= 0)
639                                 return q;
640                 }
641         }
642 
643         return r;
644 }
645 
cg_create_everywhere(CGroupMask supported,CGroupMask mask,const char * path)646 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
647         CGroupController c;
648         CGroupMask done;
649         bool created;
650         int r;
651 
652         /* This one will create a cgroup in our private tree, but also
653          * duplicate it in the trees specified in mask, and remove it
654          * in all others.
655          *
656          * Returns 0 if the group already existed in the systemd hierarchy,
657          * 1 on success, negative otherwise.
658          */
659 
660         /* First create the cgroup in our own hierarchy. */
661         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
662         if (r < 0)
663                 return r;
664         created = r;
665 
666         /* If we are in the unified hierarchy, we are done now */
667         r = cg_all_unified();
668         if (r < 0)
669                 return r;
670         if (r > 0)
671                 return created;
672 
673         supported &= CGROUP_MASK_V1;
674         mask = CGROUP_MASK_EXTEND_JOINED(mask);
675         done = 0;
676 
677         /* Otherwise, do the same in the other hierarchies */
678         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
679                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
680                 const char *n;
681 
682                 if (!FLAGS_SET(supported, bit))
683                         continue;
684 
685                 if (FLAGS_SET(done, bit))
686                         continue;
687 
688                 n = cgroup_controller_to_string(c);
689                 if (FLAGS_SET(mask, bit))
690                         (void) cg_create(n, path);
691 
692                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
693         }
694 
695         return created;
696 }
697 
cg_attach_everywhere(CGroupMask supported,const char * path,pid_t pid,cg_migrate_callback_t path_callback,void * userdata)698 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
699         int r;
700 
701         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
702         if (r < 0)
703                 return r;
704 
705         r = cg_all_unified();
706         if (r < 0)
707                 return r;
708         if (r > 0)
709                 return 0;
710 
711         supported &= CGROUP_MASK_V1;
712         CGroupMask done = 0;
713 
714         for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
715                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
716                 const char *p = NULL;
717 
718                 if (!FLAGS_SET(supported, bit))
719                         continue;
720 
721                 if (FLAGS_SET(done, bit))
722                         continue;
723 
724                 if (path_callback)
725                         p = path_callback(bit, userdata);
726                 if (!p)
727                         p = path;
728 
729                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
730                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
731         }
732 
733         return 0;
734 }
735 
cg_migrate_v1_controllers(CGroupMask supported,CGroupMask mask,const char * from,cg_migrate_callback_t to_callback,void * userdata)736 int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
737         CGroupController c;
738         CGroupMask done;
739         int r = 0, q;
740 
741         assert(to_callback);
742 
743         supported &= CGROUP_MASK_V1;
744         mask = CGROUP_MASK_EXTEND_JOINED(mask);
745         done = 0;
746 
747         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
748                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
749                 const char *to = NULL;
750 
751                 if (!FLAGS_SET(supported, bit))
752                         continue;
753 
754                 if (FLAGS_SET(done, bit))
755                         continue;
756 
757                 if (!FLAGS_SET(mask, bit))
758                         continue;
759 
760                 to = to_callback(bit, userdata);
761 
762                 /* Remember first error and try continuing */
763                 q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
764                 r = (r < 0) ? r : q;
765         }
766 
767         return r;
768 }
769 
cg_trim_everywhere(CGroupMask supported,const char * path,bool delete_root)770 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
771         int r, q;
772 
773         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
774         if (r < 0)
775                 return r;
776 
777         q = cg_all_unified();
778         if (q < 0)
779                 return q;
780         if (q > 0)
781                 return r;
782 
783         return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
784 }
785 
cg_trim_v1_controllers(CGroupMask supported,CGroupMask mask,const char * path,bool delete_root)786 int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
787         CGroupController c;
788         CGroupMask done;
789         int r = 0, q;
790 
791         supported &= CGROUP_MASK_V1;
792         mask = CGROUP_MASK_EXTEND_JOINED(mask);
793         done = 0;
794 
795         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
796                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
797 
798                 if (!FLAGS_SET(supported, bit))
799                         continue;
800 
801                 if (FLAGS_SET(done, bit))
802                         continue;
803 
804                 if (FLAGS_SET(mask, bit)) {
805                         /* Remember first error and try continuing */
806                         q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
807                         r = (r < 0) ? r : q;
808                 }
809                 done |= CGROUP_MASK_EXTEND_JOINED(bit);
810         }
811 
812         return r;
813 }
814 
cg_enable_everywhere(CGroupMask supported,CGroupMask mask,const char * p,CGroupMask * ret_result_mask)815 int cg_enable_everywhere(
816                 CGroupMask supported,
817                 CGroupMask mask,
818                 const char *p,
819                 CGroupMask *ret_result_mask) {
820 
821         _cleanup_fclose_ FILE *f = NULL;
822         _cleanup_free_ char *fs = NULL;
823         CGroupController c;
824         CGroupMask ret = 0;
825         int r;
826 
827         assert(p);
828 
829         if (supported == 0) {
830                 if (ret_result_mask)
831                         *ret_result_mask = 0;
832                 return 0;
833         }
834 
835         r = cg_all_unified();
836         if (r < 0)
837                 return r;
838         if (r == 0) {
839                 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
840                  * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
841                  * caller tends to use the returned mask later on to compare if all controllers where properly joined,
842                  * and if not requeues realization. This use is the primary purpose of the return value, hence let's
843                  * minimize surprises here and reduce triggers for re-realization by always saying we fully
844                  * succeeded.) */
845                 if (ret_result_mask)
846                         *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
847                                                                                * CGROUP_MASK_V2: The 'supported' mask
848                                                                                * might contain pure-V1 or BPF
849                                                                                * controllers, and we never want to
850                                                                                * claim that we could enable those with
851                                                                                * cgroup.subtree_control */
852                 return 0;
853         }
854 
855         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
856         if (r < 0)
857                 return r;
858 
859         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
860                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
861                 const char *n;
862 
863                 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
864                         continue;
865 
866                 if (!FLAGS_SET(supported, bit))
867                         continue;
868 
869                 n = cgroup_controller_to_string(c);
870                 {
871                         char s[1 + strlen(n) + 1];
872 
873                         s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
874                         strcpy(s + 1, n);
875 
876                         if (!f) {
877                                 f = fopen(fs, "we");
878                                 if (!f)
879                                         return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
880                         }
881 
882                         r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
883                         if (r < 0) {
884                                 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
885                                                 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
886                                 clearerr(f);
887 
888                                 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
889                                  * happens for example when we attempt to turn off a controller up in the tree that is
890                                  * used down in the tree. */
891                                 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
892                                                                            * only here, and not follow the same logic
893                                                                            * for other errors such as EINVAL or
894                                                                            * EOPNOTSUPP or anything else. That's
895                                                                            * because EBUSY indicates that the
896                                                                            * controllers is currently enabled and
897                                                                            * cannot be disabled because something down
898                                                                            * the hierarchy is still using it. Any other
899                                                                            * error most likely means something like "I
900                                                                            * never heard of this controller" or
901                                                                            * similar. In the former case it's hence
902                                                                            * safe to assume the controller is still on
903                                                                            * after the failed operation, while in the
904                                                                            * latter case it's safer to assume the
905                                                                            * controller is unknown and hence certainly
906                                                                            * not enabled. */
907                                         ret |= bit;
908                         } else {
909                                 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
910                                 if (FLAGS_SET(mask, bit))
911                                         ret |= bit;
912                         }
913                 }
914         }
915 
916         /* Let's return the precise set of controllers now enabled for the cgroup. */
917         if (ret_result_mask)
918                 *ret_result_mask = ret;
919 
920         return 0;
921 }
922