1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 #pragma once
3 
4 #include <stdbool.h>
5 
6 #include "bpf-lsm.h"
7 #include "cgroup-util.h"
8 #include "cpu-set-util.h"
9 #include "list.h"
10 #include "time-util.h"
11 
12 typedef struct TasksMax {
13         /* If scale == 0, just use value; otherwise, value / scale.
14          * See tasks_max_resolve(). */
15         uint64_t value;
16         uint64_t scale;
17 } TasksMax;
18 
19 #define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 })
20 
tasks_max_isset(const TasksMax * tasks_max)21 static inline bool tasks_max_isset(const TasksMax *tasks_max) {
22         return tasks_max->value != UINT64_MAX || tasks_max->scale != 0;
23 }
24 
25 uint64_t tasks_max_resolve(const TasksMax *tasks_max);
26 
27 typedef struct CGroupContext CGroupContext;
28 typedef struct CGroupDeviceAllow CGroupDeviceAllow;
29 typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
30 typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
31 typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
32 typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
33 typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
34 typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram;
35 typedef struct CGroupSocketBindItem CGroupSocketBindItem;
36 
37 typedef enum CGroupDevicePolicy {
38         /* When devices listed, will allow those, plus built-in ones, if none are listed will allow
39          * everything. */
40         CGROUP_DEVICE_POLICY_AUTO,
41 
42         /* Everything forbidden, except built-in ones and listed ones. */
43         CGROUP_DEVICE_POLICY_CLOSED,
44 
45         /* Everything forbidden, except for the listed devices */
46         CGROUP_DEVICE_POLICY_STRICT,
47 
48         _CGROUP_DEVICE_POLICY_MAX,
49         _CGROUP_DEVICE_POLICY_INVALID = -EINVAL,
50 } CGroupDevicePolicy;
51 
52 typedef enum FreezerAction {
53         FREEZER_FREEZE,
54         FREEZER_THAW,
55 
56         _FREEZER_ACTION_MAX,
57         _FREEZER_ACTION_INVALID = -EINVAL,
58 } FreezerAction;
59 
60 struct CGroupDeviceAllow {
61         LIST_FIELDS(CGroupDeviceAllow, device_allow);
62         char *path;
63         bool r:1;
64         bool w:1;
65         bool m:1;
66 };
67 
68 struct CGroupIODeviceWeight {
69         LIST_FIELDS(CGroupIODeviceWeight, device_weights);
70         char *path;
71         uint64_t weight;
72 };
73 
74 struct CGroupIODeviceLimit {
75         LIST_FIELDS(CGroupIODeviceLimit, device_limits);
76         char *path;
77         uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
78 };
79 
80 struct CGroupIODeviceLatency {
81         LIST_FIELDS(CGroupIODeviceLatency, device_latencies);
82         char *path;
83         usec_t target_usec;
84 };
85 
86 struct CGroupBlockIODeviceWeight {
87         LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
88         char *path;
89         uint64_t weight;
90 };
91 
92 struct CGroupBlockIODeviceBandwidth {
93         LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths);
94         char *path;
95         uint64_t rbps;
96         uint64_t wbps;
97 };
98 
99 struct CGroupBPFForeignProgram {
100         LIST_FIELDS(CGroupBPFForeignProgram, programs);
101         uint32_t attach_type;
102         char *bpffs_path;
103 };
104 
105 struct CGroupSocketBindItem {
106         LIST_FIELDS(CGroupSocketBindItem, socket_bind_items);
107         int address_family;
108         int ip_protocol;
109         uint16_t nr_ports;
110         uint16_t port_min;
111 };
112 
113 struct CGroupContext {
114         bool cpu_accounting;
115         bool io_accounting;
116         bool blockio_accounting;
117         bool memory_accounting;
118         bool tasks_accounting;
119         bool ip_accounting;
120 
121         /* Configures the memory.oom.group attribute (on unified) */
122         bool memory_oom_group;
123 
124         bool delegate;
125         CGroupMask delegate_controllers;
126         CGroupMask disable_controllers;
127 
128         /* For unified hierarchy */
129         uint64_t cpu_weight;
130         uint64_t startup_cpu_weight;
131         usec_t cpu_quota_per_sec_usec;
132         usec_t cpu_quota_period_usec;
133 
134         CPUSet cpuset_cpus;
135         CPUSet startup_cpuset_cpus;
136         CPUSet cpuset_mems;
137         CPUSet startup_cpuset_mems;
138 
139         uint64_t io_weight;
140         uint64_t startup_io_weight;
141         LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
142         LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
143         LIST_HEAD(CGroupIODeviceLatency, io_device_latencies);
144 
145         uint64_t default_memory_min;
146         uint64_t default_memory_low;
147         uint64_t memory_min;
148         uint64_t memory_low;
149         uint64_t memory_high;
150         uint64_t memory_max;
151         uint64_t memory_swap_max;
152 
153         bool default_memory_min_set:1;
154         bool default_memory_low_set:1;
155         bool memory_min_set:1;
156         bool memory_low_set:1;
157 
158         Set *ip_address_allow;
159         Set *ip_address_deny;
160         /* These two flags indicate that redundant entries have been removed from
161          * ip_address_allow/ip_address_deny, i.e. in_addr_prefixes_reduce() has already been called. */
162         bool ip_address_allow_reduced;
163         bool ip_address_deny_reduced;
164 
165         char **ip_filters_ingress;
166         char **ip_filters_egress;
167         LIST_HEAD(CGroupBPFForeignProgram, bpf_foreign_programs);
168 
169         Set *restrict_network_interfaces;
170         bool restrict_network_interfaces_is_allow_list;
171 
172         /* For legacy hierarchies */
173         uint64_t cpu_shares;
174         uint64_t startup_cpu_shares;
175 
176         uint64_t blockio_weight;
177         uint64_t startup_blockio_weight;
178         LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights);
179         LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths);
180 
181         uint64_t memory_limit;
182 
183         CGroupDevicePolicy device_policy;
184         LIST_HEAD(CGroupDeviceAllow, device_allow);
185 
186         LIST_HEAD(CGroupSocketBindItem, socket_bind_allow);
187         LIST_HEAD(CGroupSocketBindItem, socket_bind_deny);
188 
189         /* Common */
190         TasksMax tasks_max;
191 
192         /* Settings for systemd-oomd */
193         ManagedOOMMode moom_swap;
194         ManagedOOMMode moom_mem_pressure;
195         uint32_t moom_mem_pressure_limit; /* Normalized to 2^32-1 == 100% */
196         ManagedOOMPreference moom_preference;
197 };
198 
199 /* Used when querying IP accounting data */
200 typedef enum CGroupIPAccountingMetric {
201         CGROUP_IP_INGRESS_BYTES,
202         CGROUP_IP_INGRESS_PACKETS,
203         CGROUP_IP_EGRESS_BYTES,
204         CGROUP_IP_EGRESS_PACKETS,
205         _CGROUP_IP_ACCOUNTING_METRIC_MAX,
206         _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -EINVAL,
207 } CGroupIPAccountingMetric;
208 
209 /* Used when querying IO accounting data */
210 typedef enum CGroupIOAccountingMetric {
211         CGROUP_IO_READ_BYTES,
212         CGROUP_IO_WRITE_BYTES,
213         CGROUP_IO_READ_OPERATIONS,
214         CGROUP_IO_WRITE_OPERATIONS,
215         _CGROUP_IO_ACCOUNTING_METRIC_MAX,
216         _CGROUP_IO_ACCOUNTING_METRIC_INVALID = -EINVAL,
217 } CGroupIOAccountingMetric;
218 
219 typedef struct Unit Unit;
220 typedef struct Manager Manager;
221 
222 usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
223 
224 void cgroup_context_init(CGroupContext *c);
225 void cgroup_context_done(CGroupContext *c);
226 void cgroup_context_dump(Unit *u, FILE* f, const char *prefix);
227 void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f);
228 
229 void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
230 void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
231 void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
232 void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
233 void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
234 void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
235 void cgroup_context_remove_bpf_foreign_program(CGroupContext *c, CGroupBPFForeignProgram *p);
236 void cgroup_context_remove_socket_bind(CGroupSocketBindItem **head);
237 
238 int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
239 int cgroup_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
240 
241 void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path);
242 
243 CGroupMask unit_get_own_mask(Unit *u);
244 CGroupMask unit_get_delegate_mask(Unit *u);
245 CGroupMask unit_get_members_mask(Unit *u);
246 CGroupMask unit_get_siblings_mask(Unit *u);
247 CGroupMask unit_get_ancestor_disable_mask(Unit *u);
248 
249 CGroupMask unit_get_target_mask(Unit *u);
250 CGroupMask unit_get_enable_mask(Unit *u);
251 
252 void unit_invalidate_cgroup_members_masks(Unit *u);
253 
254 void unit_add_family_to_cgroup_realize_queue(Unit *u);
255 
256 const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
257 char *unit_default_cgroup_path(const Unit *u);
258 int unit_set_cgroup_path(Unit *u, const char *path);
259 int unit_pick_cgroup_path(Unit *u);
260 
261 int unit_realize_cgroup(Unit *u);
262 void unit_prune_cgroup(Unit *u);
263 int unit_watch_cgroup(Unit *u);
264 int unit_watch_cgroup_memory(Unit *u);
265 
266 void unit_release_cgroup(Unit *u);
267 /* Releases the cgroup only if it is recursively empty.
268  * Returns true if the cgroup was released, false otherwise. */
269 bool unit_maybe_release_cgroup(Unit *u);
270 
271 void unit_add_to_cgroup_empty_queue(Unit *u);
272 int unit_check_oomd_kill(Unit *u);
273 int unit_check_oom(Unit *u);
274 
275 int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path);
276 
277 int manager_setup_cgroup(Manager *m);
278 void manager_shutdown_cgroup(Manager *m, bool delete);
279 
280 unsigned manager_dispatch_cgroup_realize_queue(Manager *m);
281 
282 Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
283 Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid);
284 Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
285 
286 uint64_t unit_get_ancestor_memory_min(Unit *u);
287 uint64_t unit_get_ancestor_memory_low(Unit *u);
288 
289 int unit_search_main_pid(Unit *u, pid_t *ret);
290 int unit_watch_all_pids(Unit *u);
291 
292 int unit_synthesize_cgroup_empty_event(Unit *u);
293 
294 int unit_get_memory_current(Unit *u, uint64_t *ret);
295 int unit_get_memory_available(Unit *u, uint64_t *ret);
296 int unit_get_tasks_current(Unit *u, uint64_t *ret);
297 int unit_get_cpu_usage(Unit *u, nsec_t *ret);
298 int unit_get_io_accounting(Unit *u, CGroupIOAccountingMetric metric, bool allow_cache, uint64_t *ret);
299 int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
300 
301 int unit_reset_cpu_accounting(Unit *u);
302 int unit_reset_ip_accounting(Unit *u);
303 int unit_reset_io_accounting(Unit *u);
304 int unit_reset_accounting(Unit *u);
305 
306 #define UNIT_CGROUP_BOOL(u, name)                       \
307         ({                                              \
308         CGroupContext *cc = unit_get_cgroup_context(u); \
309         cc ? cc->name : false;                          \
310         })
311 
312 bool manager_owns_host_root_cgroup(Manager *m);
313 bool unit_has_host_root_cgroup(Unit *u);
314 
315 bool unit_has_startup_cgroup_constraints(Unit *u);
316 
317 int manager_notify_cgroup_empty(Manager *m, const char *group);
318 
319 void unit_invalidate_cgroup(Unit *u, CGroupMask m);
320 void unit_invalidate_cgroup_bpf(Unit *u);
321 
322 void manager_invalidate_startup_units(Manager *m);
323 
324 const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
325 CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
326 
327 void unit_cgroup_catchup(Unit *u);
328 
329 bool unit_cgroup_delegate(Unit *u);
330 
331 int compare_job_priority(const void *a, const void *b);
332 
333 int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name);
334 int unit_cgroup_freezer_action(Unit *u, FreezerAction action);
335 
336 const char* freezer_action_to_string(FreezerAction a) _const_;
337 FreezerAction freezer_action_from_string(const char *s) _pure_;
338