1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <unistd.h>
4
5 #include "cgroup-setup.h"
6 #include "cgroup-util.h"
7 #include "errno-util.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "fs-util.h"
11 #include "mkdir.h"
12 #include "parse-util.h"
13 #include "path-util.h"
14 #include "proc-cmdline.h"
15 #include "process-util.h"
16 #include "recurse-dir.h"
17 #include "stdio-util.h"
18 #include "string-util.h"
19 #include "user-util.h"
20 #include "virt.h"
21
cg_any_controller_used_for_v1(void)22 static int cg_any_controller_used_for_v1(void) {
23 _cleanup_free_ char *buf = NULL;
24 _cleanup_strv_free_ char **lines = NULL;
25 int r;
26
27 r = read_full_virtual_file("/proc/cgroups", &buf, NULL);
28 if (r < 0)
29 return log_debug_errno(r, "Could not read /proc/cgroups, ignoring: %m");
30
31 r = strv_split_newlines_full(&lines, buf, 0);
32 if (r < 0)
33 return r;
34
35 /* The intention of this is to check if the fully unified cgroup tree setup is possible, meaning all
36 * enabled kernel cgroup controllers are currently not in use by cgroup1. For reference:
37 * https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups-
38 *
39 * Note that this is typically only useful to check inside a container where we don't know what
40 * cgroup tree setup is in use by the host; if the host is using legacy or hybrid, we can't use
41 * unified since some or all controllers would be missing. This is not the best way to detect this,
42 * as whatever container manager created our container should have mounted /sys/fs/cgroup
43 * appropriately, but in case that wasn't done, we try to detect if it's possible for us to use
44 * unified cgroups. */
45 STRV_FOREACH(line, lines) {
46 _cleanup_free_ char *name = NULL, *hierarchy_id = NULL, *num = NULL, *enabled = NULL;
47
48 /* Skip header line */
49 if (startswith(*line, "#"))
50 continue;
51
52 const char *p = *line;
53 r = extract_many_words(&p, NULL, 0, &name, &hierarchy_id, &num, &enabled, NULL);
54 if (r < 0)
55 return log_debug_errno(r, "Error parsing /proc/cgroups line, ignoring: %m");
56 else if (r < 4) {
57 log_debug("Invalid /proc/cgroups line, ignoring.");
58 continue;
59 }
60
61 /* Ignore disabled controllers. */
62 if (streq(enabled, "0"))
63 continue;
64
65 /* Ignore controllers we don't care about. */
66 if (cgroup_controller_from_string(name) < 0)
67 continue;
68
69 /* Since the unified cgroup doesn't use multiple hierarchies, if any controller has a
70 * non-zero hierarchy_id that means it's in use already in a legacy (or hybrid) cgroup v1
71 * hierarchy, and can't be used in a unified cgroup. */
72 if (!streq(hierarchy_id, "0")) {
73 log_debug("Cgroup controller %s in use by legacy v1 hierarchy.", name);
74 return 1;
75 }
76 }
77
78 return 0;
79 }
80
cg_is_unified_wanted(void)81 bool cg_is_unified_wanted(void) {
82 static thread_local int wanted = -1;
83 bool b;
84 const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
85 _cleanup_free_ char *c = NULL;
86 int r;
87
88 /* If we have a cached value, return that. */
89 if (wanted >= 0)
90 return wanted;
91
92 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
93 r = cg_unified_cached(true);
94 if (r >= 0)
95 return (wanted = r >= CGROUP_UNIFIED_ALL);
96
97 /* If we were explicitly passed systemd.unified_cgroup_hierarchy, respect that. */
98 r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
99 if (r > 0)
100 return (wanted = b);
101
102 /* If we passed cgroup_no_v1=all with no other instructions, it seems highly unlikely that we want to
103 * use hybrid or legacy hierarchy. */
104 r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
105 if (r > 0 && streq_ptr(c, "all"))
106 return (wanted = true);
107
108 /* If any controller is in use as v1, don't use unified. */
109 if (cg_any_controller_used_for_v1() > 0)
110 return (wanted = false);
111
112 return (wanted = is_default);
113 }
114
cg_is_legacy_wanted(void)115 bool cg_is_legacy_wanted(void) {
116 static thread_local int wanted = -1;
117
118 /* If we have a cached value, return that. */
119 if (wanted >= 0)
120 return wanted;
121
122 /* Check if we have cgroup v2 already mounted. */
123 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
124 return (wanted = false);
125
126 /* Otherwise, assume that at least partial legacy is wanted,
127 * since cgroup v2 should already be mounted at this point. */
128 return (wanted = true);
129 }
130
cg_is_hybrid_wanted(void)131 bool cg_is_hybrid_wanted(void) {
132 static thread_local int wanted = -1;
133 int r;
134 bool b;
135 const bool is_default = DEFAULT_HIERARCHY >= CGROUP_UNIFIED_SYSTEMD;
136 /* We default to true if the default is "hybrid", obviously, but also when the default is "unified",
137 * because if we get called, it means that unified hierarchy was not mounted. */
138
139 /* If we have a cached value, return that. */
140 if (wanted >= 0)
141 return wanted;
142
143 /* If the hierarchy is already mounted, then follow whatever was chosen for it. */
144 if (cg_unified_cached(true) == CGROUP_UNIFIED_ALL)
145 return (wanted = false);
146
147 /* Otherwise, let's see what the kernel command line has to say. Since checking is expensive, cache
148 * a non-error result. */
149 r = proc_cmdline_get_bool("systemd.legacy_systemd_cgroup_controller", &b);
150
151 /* The meaning of the kernel option is reversed wrt. to the return value of this function, hence the
152 * negation. */
153 return (wanted = r > 0 ? !b : is_default);
154 }
155
cg_weight_parse(const char * s,uint64_t * ret)156 int cg_weight_parse(const char *s, uint64_t *ret) {
157 uint64_t u;
158 int r;
159
160 if (isempty(s)) {
161 *ret = CGROUP_WEIGHT_INVALID;
162 return 0;
163 }
164
165 r = safe_atou64(s, &u);
166 if (r < 0)
167 return r;
168
169 if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
170 return -ERANGE;
171
172 *ret = u;
173 return 0;
174 }
175
cg_cpu_shares_parse(const char * s,uint64_t * ret)176 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
177 uint64_t u;
178 int r;
179
180 if (isempty(s)) {
181 *ret = CGROUP_CPU_SHARES_INVALID;
182 return 0;
183 }
184
185 r = safe_atou64(s, &u);
186 if (r < 0)
187 return r;
188
189 if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
190 return -ERANGE;
191
192 *ret = u;
193 return 0;
194 }
195
cg_blkio_weight_parse(const char * s,uint64_t * ret)196 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
197 uint64_t u;
198 int r;
199
200 if (isempty(s)) {
201 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
202 return 0;
203 }
204
205 r = safe_atou64(s, &u);
206 if (r < 0)
207 return r;
208
209 if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
210 return -ERANGE;
211
212 *ret = u;
213 return 0;
214 }
215
trim_cb(RecurseDirEvent event,const char * path,int dir_fd,int inode_fd,const struct dirent * de,const struct statx * sx,void * userdata)216 static int trim_cb(
217 RecurseDirEvent event,
218 const char *path,
219 int dir_fd,
220 int inode_fd,
221 const struct dirent *de,
222 const struct statx *sx,
223 void *userdata) {
224
225 /* Failures to delete inner cgroup we ignore (but debug log in case error code is unexpected) */
226 if (event == RECURSE_DIR_LEAVE &&
227 de->d_type == DT_DIR &&
228 unlinkat(dir_fd, de->d_name, AT_REMOVEDIR) < 0 &&
229 !IN_SET(errno, ENOENT, ENOTEMPTY, EBUSY))
230 log_debug_errno(errno, "Failed to trim inner cgroup %s, ignoring: %m", path);
231
232 return RECURSE_DIR_CONTINUE;
233 }
234
cg_trim(const char * controller,const char * path,bool delete_root)235 int cg_trim(const char *controller, const char *path, bool delete_root) {
236 _cleanup_free_ char *fs = NULL;
237 int r, q;
238
239 assert(path);
240 assert(controller);
241
242 r = cg_get_path(controller, path, NULL, &fs);
243 if (r < 0)
244 return r;
245
246 r = recurse_dir_at(
247 AT_FDCWD,
248 fs,
249 /* statx_mask= */ 0,
250 /* n_depth_max= */ UINT_MAX,
251 RECURSE_DIR_ENSURE_TYPE,
252 trim_cb,
253 NULL);
254 if (r == -ENOENT) /* non-existing is the ultimate trimming, hence no error */
255 r = 0;
256 else if (r < 0)
257 log_debug_errno(r, "Failed to iterate through cgroup %s: %m", path);
258
259 /* If we shall delete the top-level cgroup, then propagate the faiure to do so (except if it is
260 * already gone anyway). Also, let's debug log about this failure, except if the error code is an
261 * expected one. */
262 if (delete_root && !empty_or_root(path) &&
263 rmdir(fs) < 0 && errno != ENOENT) {
264 if (!IN_SET(errno, ENOTEMPTY, EBUSY))
265 log_debug_errno(errno, "Failed to trim cgroup %s: %m", path);
266 if (r >= 0)
267 r = -errno;
268 }
269
270 q = cg_hybrid_unified();
271 if (q < 0)
272 return q;
273 if (q > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER))
274 (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, delete_root);
275
276 return r;
277 }
278
279 /* Create a cgroup in the hierarchy of controller.
280 * Returns 0 if the group already existed, 1 on success, negative otherwise.
281 */
cg_create(const char * controller,const char * path)282 int cg_create(const char *controller, const char *path) {
283 _cleanup_free_ char *fs = NULL;
284 int r;
285
286 r = cg_get_path_and_check(controller, path, NULL, &fs);
287 if (r < 0)
288 return r;
289
290 r = mkdir_parents(fs, 0755);
291 if (r < 0)
292 return r;
293
294 r = RET_NERRNO(mkdir(fs, 0755));
295 if (r == -EEXIST)
296 return 0;
297 if (r < 0)
298 return r;
299
300 r = cg_hybrid_unified();
301 if (r < 0)
302 return r;
303
304 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
305 r = cg_create(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path);
306 if (r < 0)
307 log_warning_errno(r, "Failed to create compat systemd cgroup %s: %m", path);
308 }
309
310 return 1;
311 }
312
cg_create_and_attach(const char * controller,const char * path,pid_t pid)313 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
314 int r, q;
315
316 assert(pid >= 0);
317
318 r = cg_create(controller, path);
319 if (r < 0)
320 return r;
321
322 q = cg_attach(controller, path, pid);
323 if (q < 0)
324 return q;
325
326 /* This does not remove the cgroup on failure */
327 return r;
328 }
329
cg_attach(const char * controller,const char * path,pid_t pid)330 int cg_attach(const char *controller, const char *path, pid_t pid) {
331 _cleanup_free_ char *fs = NULL;
332 char c[DECIMAL_STR_MAX(pid_t) + 2];
333 int r;
334
335 assert(path);
336 assert(pid >= 0);
337
338 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
339 if (r < 0)
340 return r;
341
342 if (pid == 0)
343 pid = getpid_cached();
344
345 xsprintf(c, PID_FMT "\n", pid);
346
347 r = write_string_file(fs, c, WRITE_STRING_FILE_DISABLE_BUFFER);
348 if (r == -EOPNOTSUPP && cg_is_threaded(controller, path) > 0)
349 /* When the threaded mode is used, we cannot read/write the file. Let's return recognizable error. */
350 return -EUCLEAN;
351 if (r < 0)
352 return r;
353
354 r = cg_hybrid_unified();
355 if (r < 0)
356 return r;
357
358 if (r > 0 && streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
359 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, pid);
360 if (r < 0)
361 log_warning_errno(r, "Failed to attach "PID_FMT" to compat systemd cgroup %s: %m", pid, path);
362 }
363
364 return 0;
365 }
366
cg_attach_fallback(const char * controller,const char * path,pid_t pid)367 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
368 int r;
369
370 assert(controller);
371 assert(path);
372 assert(pid >= 0);
373
374 r = cg_attach(controller, path, pid);
375 if (r < 0) {
376 char prefix[strlen(path) + 1];
377
378 /* This didn't work? Then let's try all prefixes of
379 * the destination */
380
381 PATH_FOREACH_PREFIX(prefix, path) {
382 int q;
383
384 q = cg_attach(controller, prefix, pid);
385 if (q >= 0)
386 return q;
387 }
388 }
389
390 return r;
391 }
392
cg_set_access(const char * controller,const char * path,uid_t uid,gid_t gid)393 int cg_set_access(
394 const char *controller,
395 const char *path,
396 uid_t uid,
397 gid_t gid) {
398
399 struct Attribute {
400 const char *name;
401 bool fatal;
402 };
403
404 /* cgroup v1, aka legacy/non-unified */
405 static const struct Attribute legacy_attributes[] = {
406 { "cgroup.procs", true },
407 { "tasks", false },
408 { "cgroup.clone_children", false },
409 {},
410 };
411
412 /* cgroup v2, aka unified */
413 static const struct Attribute unified_attributes[] = {
414 { "cgroup.procs", true },
415 { "cgroup.subtree_control", true },
416 { "cgroup.threads", false },
417 {},
418 };
419
420 static const struct Attribute* const attributes[] = {
421 [false] = legacy_attributes,
422 [true] = unified_attributes,
423 };
424
425 _cleanup_free_ char *fs = NULL;
426 const struct Attribute *i;
427 int r, unified;
428
429 assert(path);
430
431 if (uid == UID_INVALID && gid == GID_INVALID)
432 return 0;
433
434 unified = cg_unified_controller(controller);
435 if (unified < 0)
436 return unified;
437
438 /* Configure access to the cgroup itself */
439 r = cg_get_path(controller, path, NULL, &fs);
440 if (r < 0)
441 return r;
442
443 r = chmod_and_chown(fs, 0755, uid, gid);
444 if (r < 0)
445 return r;
446
447 /* Configure access to the cgroup's attributes */
448 for (i = attributes[unified]; i->name; i++) {
449 fs = mfree(fs);
450
451 r = cg_get_path(controller, path, i->name, &fs);
452 if (r < 0)
453 return r;
454
455 r = chmod_and_chown(fs, 0644, uid, gid);
456 if (r < 0) {
457 if (i->fatal)
458 return r;
459
460 log_debug_errno(r, "Failed to set access on cgroup %s, ignoring: %m", fs);
461 }
462 }
463
464 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER)) {
465 r = cg_hybrid_unified();
466 if (r < 0)
467 return r;
468 if (r > 0) {
469 /* Always propagate access mode from unified to legacy controller */
470 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER_LEGACY, path, uid, gid);
471 if (r < 0)
472 log_debug_errno(r, "Failed to set access on compatibility systemd cgroup %s, ignoring: %m", path);
473 }
474 }
475
476 return 0;
477 }
478
cg_migrate(const char * cfrom,const char * pfrom,const char * cto,const char * pto,CGroupFlags flags)479 int cg_migrate(
480 const char *cfrom,
481 const char *pfrom,
482 const char *cto,
483 const char *pto,
484 CGroupFlags flags) {
485
486 bool done = false;
487 _cleanup_set_free_ Set *s = NULL;
488 int r, ret = 0;
489 pid_t my_pid;
490
491 assert(cfrom);
492 assert(pfrom);
493 assert(cto);
494 assert(pto);
495
496 s = set_new(NULL);
497 if (!s)
498 return -ENOMEM;
499
500 my_pid = getpid_cached();
501
502 do {
503 _cleanup_fclose_ FILE *f = NULL;
504 pid_t pid = 0;
505 done = true;
506
507 r = cg_enumerate_processes(cfrom, pfrom, &f);
508 if (r < 0) {
509 if (ret >= 0 && r != -ENOENT)
510 return r;
511
512 return ret;
513 }
514
515 while ((r = cg_read_pid(f, &pid)) > 0) {
516
517 /* This might do weird stuff if we aren't a
518 * single-threaded program. However, we
519 * luckily know we are not */
520 if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
521 continue;
522
523 if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
524 continue;
525
526 /* Ignore kernel threads. Since they can only
527 * exist in the root cgroup, we only check for
528 * them there. */
529 if (cfrom &&
530 empty_or_root(pfrom) &&
531 is_kernel_thread(pid) > 0)
532 continue;
533
534 r = cg_attach(cto, pto, pid);
535 if (r < 0) {
536 if (ret >= 0 && r != -ESRCH)
537 ret = r;
538 } else if (ret == 0)
539 ret = 1;
540
541 done = false;
542
543 r = set_put(s, PID_TO_PTR(pid));
544 if (r < 0) {
545 if (ret >= 0)
546 return r;
547
548 return ret;
549 }
550 }
551
552 if (r < 0) {
553 if (ret >= 0)
554 return r;
555
556 return ret;
557 }
558 } while (!done);
559
560 return ret;
561 }
562
cg_migrate_recursive(const char * cfrom,const char * pfrom,const char * cto,const char * pto,CGroupFlags flags)563 int cg_migrate_recursive(
564 const char *cfrom,
565 const char *pfrom,
566 const char *cto,
567 const char *pto,
568 CGroupFlags flags) {
569
570 _cleanup_closedir_ DIR *d = NULL;
571 int r, ret = 0;
572 char *fn;
573
574 assert(cfrom);
575 assert(pfrom);
576 assert(cto);
577 assert(pto);
578
579 ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
580
581 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
582 if (r < 0) {
583 if (ret >= 0 && r != -ENOENT)
584 return r;
585
586 return ret;
587 }
588
589 while ((r = cg_read_subgroup(d, &fn)) > 0) {
590 _cleanup_free_ char *p = NULL;
591
592 p = path_join(empty_to_root(pfrom), fn);
593 free(fn);
594 if (!p)
595 return -ENOMEM;
596
597 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
598 if (r != 0 && ret >= 0)
599 ret = r;
600 }
601
602 if (r < 0 && ret >= 0)
603 ret = r;
604
605 if (flags & CGROUP_REMOVE) {
606 r = cg_rmdir(cfrom, pfrom);
607 if (r < 0 && ret >= 0 && !IN_SET(r, -ENOENT, -EBUSY))
608 return r;
609 }
610
611 return ret;
612 }
613
cg_migrate_recursive_fallback(const char * cfrom,const char * pfrom,const char * cto,const char * pto,CGroupFlags flags)614 int cg_migrate_recursive_fallback(
615 const char *cfrom,
616 const char *pfrom,
617 const char *cto,
618 const char *pto,
619 CGroupFlags flags) {
620
621 int r;
622
623 assert(cfrom);
624 assert(pfrom);
625 assert(cto);
626 assert(pto);
627
628 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
629 if (r < 0) {
630 char prefix[strlen(pto) + 1];
631
632 /* This didn't work? Then let's try all prefixes of the destination */
633
634 PATH_FOREACH_PREFIX(prefix, pto) {
635 int q;
636
637 q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
638 if (q >= 0)
639 return q;
640 }
641 }
642
643 return r;
644 }
645
cg_create_everywhere(CGroupMask supported,CGroupMask mask,const char * path)646 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
647 CGroupController c;
648 CGroupMask done;
649 bool created;
650 int r;
651
652 /* This one will create a cgroup in our private tree, but also
653 * duplicate it in the trees specified in mask, and remove it
654 * in all others.
655 *
656 * Returns 0 if the group already existed in the systemd hierarchy,
657 * 1 on success, negative otherwise.
658 */
659
660 /* First create the cgroup in our own hierarchy. */
661 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
662 if (r < 0)
663 return r;
664 created = r;
665
666 /* If we are in the unified hierarchy, we are done now */
667 r = cg_all_unified();
668 if (r < 0)
669 return r;
670 if (r > 0)
671 return created;
672
673 supported &= CGROUP_MASK_V1;
674 mask = CGROUP_MASK_EXTEND_JOINED(mask);
675 done = 0;
676
677 /* Otherwise, do the same in the other hierarchies */
678 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
679 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
680 const char *n;
681
682 if (!FLAGS_SET(supported, bit))
683 continue;
684
685 if (FLAGS_SET(done, bit))
686 continue;
687
688 n = cgroup_controller_to_string(c);
689 if (FLAGS_SET(mask, bit))
690 (void) cg_create(n, path);
691
692 done |= CGROUP_MASK_EXTEND_JOINED(bit);
693 }
694
695 return created;
696 }
697
cg_attach_everywhere(CGroupMask supported,const char * path,pid_t pid,cg_migrate_callback_t path_callback,void * userdata)698 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
699 int r;
700
701 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
702 if (r < 0)
703 return r;
704
705 r = cg_all_unified();
706 if (r < 0)
707 return r;
708 if (r > 0)
709 return 0;
710
711 supported &= CGROUP_MASK_V1;
712 CGroupMask done = 0;
713
714 for (CGroupController c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
715 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
716 const char *p = NULL;
717
718 if (!FLAGS_SET(supported, bit))
719 continue;
720
721 if (FLAGS_SET(done, bit))
722 continue;
723
724 if (path_callback)
725 p = path_callback(bit, userdata);
726 if (!p)
727 p = path;
728
729 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
730 done |= CGROUP_MASK_EXTEND_JOINED(bit);
731 }
732
733 return 0;
734 }
735
cg_migrate_v1_controllers(CGroupMask supported,CGroupMask mask,const char * from,cg_migrate_callback_t to_callback,void * userdata)736 int cg_migrate_v1_controllers(CGroupMask supported, CGroupMask mask, const char *from, cg_migrate_callback_t to_callback, void *userdata) {
737 CGroupController c;
738 CGroupMask done;
739 int r = 0, q;
740
741 assert(to_callback);
742
743 supported &= CGROUP_MASK_V1;
744 mask = CGROUP_MASK_EXTEND_JOINED(mask);
745 done = 0;
746
747 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
748 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
749 const char *to = NULL;
750
751 if (!FLAGS_SET(supported, bit))
752 continue;
753
754 if (FLAGS_SET(done, bit))
755 continue;
756
757 if (!FLAGS_SET(mask, bit))
758 continue;
759
760 to = to_callback(bit, userdata);
761
762 /* Remember first error and try continuing */
763 q = cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, from, cgroup_controller_to_string(c), to, 0);
764 r = (r < 0) ? r : q;
765 }
766
767 return r;
768 }
769
cg_trim_everywhere(CGroupMask supported,const char * path,bool delete_root)770 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
771 int r, q;
772
773 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
774 if (r < 0)
775 return r;
776
777 q = cg_all_unified();
778 if (q < 0)
779 return q;
780 if (q > 0)
781 return r;
782
783 return cg_trim_v1_controllers(supported, _CGROUP_MASK_ALL, path, delete_root);
784 }
785
cg_trim_v1_controllers(CGroupMask supported,CGroupMask mask,const char * path,bool delete_root)786 int cg_trim_v1_controllers(CGroupMask supported, CGroupMask mask, const char *path, bool delete_root) {
787 CGroupController c;
788 CGroupMask done;
789 int r = 0, q;
790
791 supported &= CGROUP_MASK_V1;
792 mask = CGROUP_MASK_EXTEND_JOINED(mask);
793 done = 0;
794
795 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
796 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
797
798 if (!FLAGS_SET(supported, bit))
799 continue;
800
801 if (FLAGS_SET(done, bit))
802 continue;
803
804 if (FLAGS_SET(mask, bit)) {
805 /* Remember first error and try continuing */
806 q = cg_trim(cgroup_controller_to_string(c), path, delete_root);
807 r = (r < 0) ? r : q;
808 }
809 done |= CGROUP_MASK_EXTEND_JOINED(bit);
810 }
811
812 return r;
813 }
814
cg_enable_everywhere(CGroupMask supported,CGroupMask mask,const char * p,CGroupMask * ret_result_mask)815 int cg_enable_everywhere(
816 CGroupMask supported,
817 CGroupMask mask,
818 const char *p,
819 CGroupMask *ret_result_mask) {
820
821 _cleanup_fclose_ FILE *f = NULL;
822 _cleanup_free_ char *fs = NULL;
823 CGroupController c;
824 CGroupMask ret = 0;
825 int r;
826
827 assert(p);
828
829 if (supported == 0) {
830 if (ret_result_mask)
831 *ret_result_mask = 0;
832 return 0;
833 }
834
835 r = cg_all_unified();
836 if (r < 0)
837 return r;
838 if (r == 0) {
839 /* On the legacy hierarchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
840 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
841 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
842 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
843 * minimize surprises here and reduce triggers for re-realization by always saying we fully
844 * succeeded.) */
845 if (ret_result_mask)
846 *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
847 * CGROUP_MASK_V2: The 'supported' mask
848 * might contain pure-V1 or BPF
849 * controllers, and we never want to
850 * claim that we could enable those with
851 * cgroup.subtree_control */
852 return 0;
853 }
854
855 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
856 if (r < 0)
857 return r;
858
859 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
860 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
861 const char *n;
862
863 if (!FLAGS_SET(CGROUP_MASK_V2, bit))
864 continue;
865
866 if (!FLAGS_SET(supported, bit))
867 continue;
868
869 n = cgroup_controller_to_string(c);
870 {
871 char s[1 + strlen(n) + 1];
872
873 s[0] = FLAGS_SET(mask, bit) ? '+' : '-';
874 strcpy(s + 1, n);
875
876 if (!f) {
877 f = fopen(fs, "we");
878 if (!f)
879 return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
880 }
881
882 r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
883 if (r < 0) {
884 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
885 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
886 clearerr(f);
887
888 /* If we can't turn off a controller, leave it on in the reported resulting mask. This
889 * happens for example when we attempt to turn off a controller up in the tree that is
890 * used down in the tree. */
891 if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
892 * only here, and not follow the same logic
893 * for other errors such as EINVAL or
894 * EOPNOTSUPP or anything else. That's
895 * because EBUSY indicates that the
896 * controllers is currently enabled and
897 * cannot be disabled because something down
898 * the hierarchy is still using it. Any other
899 * error most likely means something like "I
900 * never heard of this controller" or
901 * similar. In the former case it's hence
902 * safe to assume the controller is still on
903 * after the failed operation, while in the
904 * latter case it's safer to assume the
905 * controller is unknown and hence certainly
906 * not enabled. */
907 ret |= bit;
908 } else {
909 /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
910 if (FLAGS_SET(mask, bit))
911 ret |= bit;
912 }
913 }
914 }
915
916 /* Let's return the precise set of controllers now enabled for the cgroup. */
917 if (ret_result_mask)
918 *ret_result_mask = ret;
919
920 return 0;
921 }
922