1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 #pragma once
3 
4 #include <dirent.h>
5 #include <fcntl.h>
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <sys/statfs.h>
10 #include <sys/types.h>
11 
12 #include "def.h"
13 #include "set.h"
14 
15 #define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=systemd"
16 #define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
17 #define SYSTEMD_CGROUP_CONTROLLER "_systemd"
18 
19 /* An enum of well known cgroup controllers */
20 typedef enum CGroupController {
21         /* Original cgroup controllers */
22         CGROUP_CONTROLLER_CPU,
23         CGROUP_CONTROLLER_CPUACCT,    /* v1 only */
24         CGROUP_CONTROLLER_CPUSET,     /* v2 only */
25         CGROUP_CONTROLLER_IO,         /* v2 only */
26         CGROUP_CONTROLLER_BLKIO,      /* v1 only */
27         CGROUP_CONTROLLER_MEMORY,
28         CGROUP_CONTROLLER_DEVICES,    /* v1 only */
29         CGROUP_CONTROLLER_PIDS,
30 
31         /* BPF-based pseudo-controllers, v2 only */
32         CGROUP_CONTROLLER_BPF_FIREWALL,
33         CGROUP_CONTROLLER_BPF_DEVICES,
34         CGROUP_CONTROLLER_BPF_FOREIGN,
35         CGROUP_CONTROLLER_BPF_SOCKET_BIND,
36         CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES,
37         /* The BPF hook implementing RestrictFileSystems= is not defined here.
38          * It's applied as late as possible in exec_child() so we don't block
39          * our own unit setup code. */
40 
41         _CGROUP_CONTROLLER_MAX,
42         _CGROUP_CONTROLLER_INVALID = -EINVAL,
43 } CGroupController;
44 
45 #define CGROUP_CONTROLLER_TO_MASK(c) (1U << (c))
46 
47 /* A bit mask of well known cgroup controllers */
48 typedef enum CGroupMask {
49         CGROUP_MASK_CPU = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPU),
50         CGROUP_MASK_CPUACCT = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUACCT),
51         CGROUP_MASK_CPUSET = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_CPUSET),
52         CGROUP_MASK_IO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_IO),
53         CGROUP_MASK_BLKIO = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BLKIO),
54         CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
55         CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
56         CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
57         CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
58         CGROUP_MASK_BPF_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_DEVICES),
59         CGROUP_MASK_BPF_FOREIGN = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FOREIGN),
60         CGROUP_MASK_BPF_SOCKET_BIND = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_SOCKET_BIND),
61         CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES),
62 
63         /* All real cgroup v1 controllers */
64         CGROUP_MASK_V1 = CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT|CGROUP_MASK_BLKIO|CGROUP_MASK_MEMORY|CGROUP_MASK_DEVICES|CGROUP_MASK_PIDS,
65 
66         /* All real cgroup v2 controllers */
67         CGROUP_MASK_V2 = CGROUP_MASK_CPU|CGROUP_MASK_CPUSET|CGROUP_MASK_IO|CGROUP_MASK_MEMORY|CGROUP_MASK_PIDS,
68 
69         /* All cgroup v2 BPF pseudo-controllers */
70         CGROUP_MASK_BPF = CGROUP_MASK_BPF_FIREWALL|CGROUP_MASK_BPF_DEVICES|CGROUP_MASK_BPF_FOREIGN|CGROUP_MASK_BPF_SOCKET_BIND|CGROUP_MASK_BPF_RESTRICT_NETWORK_INTERFACES,
71 
72         _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
73 } CGroupMask;
74 
CGROUP_MASK_EXTEND_JOINED(CGroupMask mask)75 static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) {
76         /* We always mount "cpu" and "cpuacct" in the same hierarchy. Hence, when one bit is set also set the other */
77 
78         if (mask & (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT))
79                 mask |= (CGROUP_MASK_CPU|CGROUP_MASK_CPUACCT);
80 
81         return mask;
82 }
83 
84 CGroupMask get_cpu_accounting_mask(void);
85 bool cpu_accounting_is_cheap(void);
86 
87 /* Special values for all weight knobs on unified hierarchy */
88 #define CGROUP_WEIGHT_INVALID UINT64_MAX
89 #define CGROUP_WEIGHT_MIN UINT64_C(1)
90 #define CGROUP_WEIGHT_MAX UINT64_C(10000)
91 #define CGROUP_WEIGHT_DEFAULT UINT64_C(100)
92 
93 #define CGROUP_LIMIT_MIN UINT64_C(0)
94 #define CGROUP_LIMIT_MAX UINT64_MAX
95 
CGROUP_WEIGHT_IS_OK(uint64_t x)96 static inline bool CGROUP_WEIGHT_IS_OK(uint64_t x) {
97         return
98             x == CGROUP_WEIGHT_INVALID ||
99             (x >= CGROUP_WEIGHT_MIN && x <= CGROUP_WEIGHT_MAX);
100 }
101 
102 /* IO limits on unified hierarchy */
103 typedef enum CGroupIOLimitType {
104         CGROUP_IO_RBPS_MAX,
105         CGROUP_IO_WBPS_MAX,
106         CGROUP_IO_RIOPS_MAX,
107         CGROUP_IO_WIOPS_MAX,
108 
109         _CGROUP_IO_LIMIT_TYPE_MAX,
110         _CGROUP_IO_LIMIT_TYPE_INVALID = -EINVAL,
111 } CGroupIOLimitType;
112 
113 extern const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX];
114 
115 const char* cgroup_io_limit_type_to_string(CGroupIOLimitType t) _const_;
116 CGroupIOLimitType cgroup_io_limit_type_from_string(const char *s) _pure_;
117 
118 /* Special values for the cpu.shares attribute */
119 #define CGROUP_CPU_SHARES_INVALID UINT64_MAX
120 #define CGROUP_CPU_SHARES_MIN UINT64_C(2)
121 #define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
122 #define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
123 
CGROUP_CPU_SHARES_IS_OK(uint64_t x)124 static inline bool CGROUP_CPU_SHARES_IS_OK(uint64_t x) {
125         return
126             x == CGROUP_CPU_SHARES_INVALID ||
127             (x >= CGROUP_CPU_SHARES_MIN && x <= CGROUP_CPU_SHARES_MAX);
128 }
129 
130 /* Special values for the special {blkio,io}.bfq.weight attribute */
131 #define CGROUP_BFQ_WEIGHT_INVALID UINT64_MAX
132 #define CGROUP_BFQ_WEIGHT_MIN UINT64_C(1)
133 #define CGROUP_BFQ_WEIGHT_MAX UINT64_C(1000)
134 #define CGROUP_BFQ_WEIGHT_DEFAULT UINT64_C(100)
135 
136 /* Convert the normal io.weight value to io.bfq.weight */
BFQ_WEIGHT(uint64_t io_weight)137 static inline uint64_t BFQ_WEIGHT(uint64_t io_weight) {
138         return
139             io_weight <= CGROUP_WEIGHT_DEFAULT ?
140             CGROUP_BFQ_WEIGHT_DEFAULT - (CGROUP_WEIGHT_DEFAULT - io_weight) * (CGROUP_BFQ_WEIGHT_DEFAULT - CGROUP_BFQ_WEIGHT_MIN) / (CGROUP_WEIGHT_DEFAULT - CGROUP_WEIGHT_MIN) :
141             CGROUP_BFQ_WEIGHT_DEFAULT + (io_weight - CGROUP_WEIGHT_DEFAULT) * (CGROUP_BFQ_WEIGHT_MAX - CGROUP_BFQ_WEIGHT_DEFAULT) / (CGROUP_WEIGHT_MAX - CGROUP_WEIGHT_DEFAULT);
142 }
143 
144 /* Special values for the blkio.weight attribute */
145 #define CGROUP_BLKIO_WEIGHT_INVALID UINT64_MAX
146 #define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
147 #define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
148 #define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
149 
CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x)150 static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
151         return
152             x == CGROUP_BLKIO_WEIGHT_INVALID ||
153             (x >= CGROUP_BLKIO_WEIGHT_MIN && x <= CGROUP_BLKIO_WEIGHT_MAX);
154 }
155 
156 typedef enum CGroupUnified {
157         CGROUP_UNIFIED_UNKNOWN = -1,
158         CGROUP_UNIFIED_NONE = 0,        /* Both systemd and controllers on legacy */
159         CGROUP_UNIFIED_SYSTEMD = 1,     /* Only systemd on unified */
160         CGROUP_UNIFIED_ALL = 2,         /* Both systemd and controllers on unified */
161 } CGroupUnified;
162 
163 /*
164  * General rules:
165  *
166  * We accept named hierarchies in the syntax "foo" and "name=foo".
167  *
168  * We expect that named hierarchies do not conflict in name with a
169  * kernel hierarchy, modulo the "name=" prefix.
170  *
171  * We always generate "normalized" controller names, i.e. without the
172  * "name=" prefix.
173  *
174  * We require absolute cgroup paths. When returning, we will always
175  * generate paths with multiple adjacent / removed.
176  */
177 
178 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f);
179 int cg_read_pid(FILE *f, pid_t *_pid);
180 int cg_read_event(const char *controller, const char *path, const char *event,
181                   char **val);
182 
183 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d);
184 int cg_read_subgroup(DIR *d, char **fn);
185 
186 typedef enum CGroupFlags {
187         CGROUP_SIGCONT     = 1 << 0,
188         CGROUP_IGNORE_SELF = 1 << 1,
189         CGROUP_REMOVE      = 1 << 2,
190 } CGroupFlags;
191 
192 typedef int (*cg_kill_log_func_t)(pid_t pid, int sig, void *userdata);
193 
194 int cg_kill(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
195 int cg_kill_kernel_sigkill(const char *controller, const char *path);
196 int cg_kill_recursive(const char *controller, const char *path, int sig, CGroupFlags flags, Set *s, cg_kill_log_func_t kill_log, void *userdata);
197 
198 int cg_split_spec(const char *spec, char **ret_controller, char **ret_path);
199 int cg_mangle_path(const char *path, char **result);
200 
201 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs);
202 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs);
203 
204 int cg_pid_get_path(const char *controller, pid_t pid, char **path);
205 
206 int cg_rmdir(const char *controller, const char *path);
207 
208 int cg_is_threaded(const char *controller, const char *path);
209 
210 typedef enum  {
211         CG_KEY_MODE_GRACEFUL = 1 << 0,
212 } CGroupKeyMode;
213 
214 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
215 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
216 int cg_get_keyed_attribute_full(const char *controller, const char *path, const char *attribute, char **keys, char **values, CGroupKeyMode mode);
217 
cg_get_keyed_attribute(const char * controller,const char * path,const char * attribute,char ** keys,char ** ret_values)218 static inline int cg_get_keyed_attribute(
219                 const char *controller,
220                 const char *path,
221                 const char *attribute,
222                 char **keys,
223                 char **ret_values) {
224         return cg_get_keyed_attribute_full(controller, path, attribute, keys, ret_values, 0);
225 }
226 
cg_get_keyed_attribute_graceful(const char * controller,const char * path,const char * attribute,char ** keys,char ** ret_values)227 static inline int cg_get_keyed_attribute_graceful(
228                 const char *controller,
229                 const char *path,
230                 const char *attribute,
231                 char **keys,
232                 char **ret_values) {
233         return cg_get_keyed_attribute_full(controller, path, attribute, keys, ret_values, CG_KEY_MODE_GRACEFUL);
234 }
235 
236 int cg_get_attribute_as_uint64(const char *controller, const char *path, const char *attribute, uint64_t *ret);
237 
238 /* Does a parse_boolean() on the attribute contents and sets ret accordingly */
239 int cg_get_attribute_as_bool(const char *controller, const char *path, const char *attribute, bool *ret);
240 
241 int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
242 int cg_get_owner(const char *controller, const char *path, uid_t *ret_uid);
243 
244 int cg_set_xattr(const char *controller, const char *path, const char *name, const void *value, size_t size, int flags);
245 int cg_get_xattr(const char *controller, const char *path, const char *name, void *value, size_t size);
246 int cg_get_xattr_malloc(const char *controller, const char *path, const char *name, char **ret);
247 /* Returns negative on error, and 0 or 1 on success for the bool value */
248 int cg_get_xattr_bool(const char *controller, const char *path, const char *name);
249 int cg_remove_xattr(const char *controller, const char *path, const char *name);
250 
251 int cg_install_release_agent(const char *controller, const char *agent);
252 int cg_uninstall_release_agent(const char *controller);
253 
254 int cg_is_empty(const char *controller, const char *path);
255 int cg_is_empty_recursive(const char *controller, const char *path);
256 
257 int cg_get_root_path(char **path);
258 
259 int cg_path_get_cgroupid(const char *path, uint64_t *ret);
260 int cg_path_get_session(const char *path, char **session);
261 int cg_path_get_owner_uid(const char *path, uid_t *uid);
262 int cg_path_get_unit(const char *path, char **unit);
263 int cg_path_get_user_unit(const char *path, char **unit);
264 int cg_path_get_machine_name(const char *path, char **machine);
265 int cg_path_get_slice(const char *path, char **slice);
266 int cg_path_get_user_slice(const char *path, char **slice);
267 
268 int cg_shift_path(const char *cgroup, const char *cached_root, const char **shifted);
269 int cg_pid_get_path_shifted(pid_t pid, const char *cached_root, char **cgroup);
270 
271 int cg_pid_get_session(pid_t pid, char **session);
272 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid);
273 int cg_pid_get_unit(pid_t pid, char **unit);
274 int cg_pid_get_user_unit(pid_t pid, char **unit);
275 int cg_pid_get_machine_name(pid_t pid, char **machine);
276 int cg_pid_get_slice(pid_t pid, char **slice);
277 int cg_pid_get_user_slice(pid_t pid, char **slice);
278 
279 int cg_path_decode_unit(const char *cgroup, char **unit);
280 
281 char *cg_escape(const char *p);
282 char *cg_unescape(const char *p) _pure_;
283 
284 bool cg_controller_is_valid(const char *p);
285 
286 int cg_slice_to_path(const char *unit, char **ret);
287 
288 typedef const char* (*cg_migrate_callback_t)(CGroupMask mask, void *userdata);
289 
290 int cg_mask_supported(CGroupMask *ret);
291 int cg_mask_supported_subtree(const char *root, CGroupMask *ret);
292 int cg_mask_from_string(const char *s, CGroupMask *ret);
293 int cg_mask_to_string(CGroupMask mask, char **ret);
294 
295 int cg_kernel_controllers(Set **controllers);
296 
297 bool cg_ns_supported(void);
298 bool cg_freezer_supported(void);
299 bool cg_kill_supported(void);
300 
301 int cg_all_unified(void);
302 int cg_hybrid_unified(void);
303 int cg_unified_controller(const char *controller);
304 int cg_unified_cached(bool flush);
cg_unified(void)305 static inline int cg_unified(void) {
306         return cg_unified_cached(true);
307 }
308 
309 const char* cgroup_controller_to_string(CGroupController c) _const_;
310 CGroupController cgroup_controller_from_string(const char *s) _pure_;
311 
312 bool is_cgroup_fs(const struct statfs *s);
313 bool fd_is_cgroup_fs(int fd);
314 
315 typedef enum ManagedOOMMode {
316         MANAGED_OOM_AUTO,
317         MANAGED_OOM_KILL,
318         _MANAGED_OOM_MODE_MAX,
319         _MANAGED_OOM_MODE_INVALID = -EINVAL,
320 } ManagedOOMMode;
321 
322 const char* managed_oom_mode_to_string(ManagedOOMMode m) _const_;
323 ManagedOOMMode managed_oom_mode_from_string(const char *s) _pure_;
324 
325 typedef enum ManagedOOMPreference {
326         MANAGED_OOM_PREFERENCE_NONE = 0,
327         MANAGED_OOM_PREFERENCE_AVOID = 1,
328         MANAGED_OOM_PREFERENCE_OMIT = 2,
329         _MANAGED_OOM_PREFERENCE_MAX,
330         _MANAGED_OOM_PREFERENCE_INVALID = -EINVAL,
331 } ManagedOOMPreference;
332 
333 const char* managed_oom_preference_to_string(ManagedOOMPreference a) _const_;
334 ManagedOOMPreference managed_oom_preference_from_string(const char *s) _pure_;
335 
336 /* The structure to pass to name_to_handle_at() on cgroupfs2 */
337 typedef union {
338         struct file_handle file_handle;
339         uint8_t space[offsetof(struct file_handle, f_handle) + sizeof(uint64_t)];
340 } cg_file_handle;
341 
342 #define CG_FILE_HANDLE_INIT { .file_handle.handle_bytes = sizeof(uint64_t) }
343 #define CG_FILE_HANDLE_CGROUPID(fh) (*(uint64_t*) (fh).file_handle.f_handle)
344