1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 #pragma once
3
4 #include <stdbool.h>
5 #include <stdio.h>
6
7 #include "sd-bus.h"
8 #include "sd-device.h"
9 #include "sd-event.h"
10
11 #include "cgroup-util.h"
12 #include "cgroup.h"
13 #include "fdset.h"
14 #include "hashmap.h"
15 #include "list.h"
16 #include "prioq.h"
17 #include "ratelimit.h"
18 #include "varlink.h"
19
20 struct libmnt_monitor;
21 typedef struct Unit Unit;
22
23 /* Enforce upper limit how many names we allow */
24 #define MANAGER_MAX_NAMES 131072 /* 128K */
25
26 typedef struct Manager Manager;
27
28 /* An externally visible state. We don't actually maintain this as state variable, but derive it from various fields
29 * when requested */
30 typedef enum ManagerState {
31 MANAGER_INITIALIZING,
32 MANAGER_STARTING,
33 MANAGER_RUNNING,
34 MANAGER_DEGRADED,
35 MANAGER_MAINTENANCE,
36 MANAGER_STOPPING,
37 _MANAGER_STATE_MAX,
38 _MANAGER_STATE_INVALID = -EINVAL,
39 } ManagerState;
40
41 typedef enum ManagerObjective {
42 MANAGER_OK,
43 MANAGER_EXIT,
44 MANAGER_RELOAD,
45 MANAGER_REEXECUTE,
46 MANAGER_REBOOT,
47 MANAGER_POWEROFF,
48 MANAGER_HALT,
49 MANAGER_KEXEC,
50 MANAGER_SWITCH_ROOT,
51 _MANAGER_OBJECTIVE_MAX,
52 _MANAGER_OBJECTIVE_INVALID = -EINVAL,
53 } ManagerObjective;
54
55 typedef enum StatusType {
56 STATUS_TYPE_EPHEMERAL,
57 STATUS_TYPE_NORMAL,
58 STATUS_TYPE_NOTICE,
59 STATUS_TYPE_EMERGENCY,
60 } StatusType;
61
62 typedef enum OOMPolicy {
63 OOM_CONTINUE, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */
64 OOM_STOP, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */
65 OOM_KILL, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */
66 _OOM_POLICY_MAX,
67 _OOM_POLICY_INVALID = -EINVAL,
68 } OOMPolicy;
69
70 /* Notes:
71 * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD,
72 * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when
73 * the manager is system and not running under container environment.
74 *
75 * 2. The monotonic timestamp of TIMESTAMP_KERNEL is always zero.
76 *
77 * 3. The realtime timestamp of TIMESTAMP_KERNEL will be unset if the system does not
78 * have RTC.
79 *
80 * 4. TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER will be unset if the system does not
81 * have RTC, or systemd is built without EFI support.
82 *
83 * 5. The monotonic timestamps of TIMESTAMP_FIRMWARE and TIMESTAMP_LOADER are stored as
84 * negative of the actual value.
85 *
86 * 6. TIMESTAMP_USERSPACE is the timestamp of when the manager was started.
87 *
88 * 7. TIMESTAMP_INITRD_* are set only when the system is booted with an initrd.
89 */
90
91 typedef enum ManagerTimestamp {
92 MANAGER_TIMESTAMP_FIRMWARE,
93 MANAGER_TIMESTAMP_LOADER,
94 MANAGER_TIMESTAMP_KERNEL,
95 MANAGER_TIMESTAMP_INITRD,
96 MANAGER_TIMESTAMP_USERSPACE,
97 MANAGER_TIMESTAMP_FINISH,
98
99 MANAGER_TIMESTAMP_SECURITY_START,
100 MANAGER_TIMESTAMP_SECURITY_FINISH,
101 MANAGER_TIMESTAMP_GENERATORS_START,
102 MANAGER_TIMESTAMP_GENERATORS_FINISH,
103 MANAGER_TIMESTAMP_UNITS_LOAD_START,
104 MANAGER_TIMESTAMP_UNITS_LOAD_FINISH,
105 MANAGER_TIMESTAMP_UNITS_LOAD,
106
107 MANAGER_TIMESTAMP_INITRD_SECURITY_START,
108 MANAGER_TIMESTAMP_INITRD_SECURITY_FINISH,
109 MANAGER_TIMESTAMP_INITRD_GENERATORS_START,
110 MANAGER_TIMESTAMP_INITRD_GENERATORS_FINISH,
111 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_START,
112 MANAGER_TIMESTAMP_INITRD_UNITS_LOAD_FINISH,
113 _MANAGER_TIMESTAMP_MAX,
114 _MANAGER_TIMESTAMP_INVALID = -EINVAL,
115 } ManagerTimestamp;
116
117 typedef enum WatchdogType {
118 WATCHDOG_RUNTIME,
119 WATCHDOG_REBOOT,
120 WATCHDOG_KEXEC,
121 WATCHDOG_PRETIMEOUT,
122 _WATCHDOG_TYPE_MAX,
123 } WatchdogType;
124
125 #include "execute.h"
126 #include "job.h"
127 #include "path-lookup.h"
128 #include "show-status.h"
129 #include "unit-name.h"
130
131 typedef enum ManagerTestRunFlags {
132 MANAGER_TEST_NORMAL = 0, /* run normally */
133 MANAGER_TEST_RUN_MINIMAL = 1 << 0, /* create basic data structures */
134 MANAGER_TEST_RUN_BASIC = 1 << 1, /* interact with the environment */
135 MANAGER_TEST_RUN_ENV_GENERATORS = 1 << 2, /* also run env generators */
136 MANAGER_TEST_RUN_GENERATORS = 1 << 3, /* also run unit generators */
137 MANAGER_TEST_RUN_IGNORE_DEPENDENCIES = 1 << 4, /* run while ignoring dependencies */
138 MANAGER_TEST_FULL = MANAGER_TEST_RUN_BASIC | MANAGER_TEST_RUN_ENV_GENERATORS | MANAGER_TEST_RUN_GENERATORS,
139 } ManagerTestRunFlags;
140
141 assert_cc((MANAGER_TEST_FULL & UINT8_MAX) == MANAGER_TEST_FULL);
142
143 struct Manager {
144 /* Note that the set of units we know of is allowed to be
145 * inconsistent. However the subset of it that is loaded may
146 * not, and the list of jobs may neither. */
147
148 /* Active jobs and units */
149 Hashmap *units; /* name string => Unit object n:1 */
150 Hashmap *units_by_invocation_id;
151 Hashmap *jobs; /* job id => Job object 1:1 */
152
153 /* To make it easy to iterate through the units of a specific
154 * type we maintain a per type linked list */
155 LIST_HEAD(Unit, units_by_type[_UNIT_TYPE_MAX]);
156
157 /* Units that need to be loaded */
158 LIST_HEAD(Unit, load_queue); /* this is actually more a stack than a queue, but uh. */
159
160 /* Jobs that need to be run */
161 struct Prioq *run_queue;
162
163 /* Units and jobs that have not yet been announced via
164 * D-Bus. When something about a job changes it is added here
165 * if it is not in there yet. This allows easy coalescing of
166 * D-Bus change signals. */
167 LIST_HEAD(Unit, dbus_unit_queue);
168 LIST_HEAD(Job, dbus_job_queue);
169
170 /* Units to remove */
171 LIST_HEAD(Unit, cleanup_queue);
172
173 /* Units and jobs to check when doing GC */
174 LIST_HEAD(Unit, gc_unit_queue);
175 LIST_HEAD(Job, gc_job_queue);
176
177 /* Units that should be realized */
178 LIST_HEAD(Unit, cgroup_realize_queue);
179
180 /* Units whose cgroup ran empty */
181 LIST_HEAD(Unit, cgroup_empty_queue);
182
183 /* Units whose memory.event fired */
184 LIST_HEAD(Unit, cgroup_oom_queue);
185
186 /* Target units whose default target dependencies haven't been set yet */
187 LIST_HEAD(Unit, target_deps_queue);
188
189 /* Units that might be subject to StopWhenUnneeded= clean-up */
190 LIST_HEAD(Unit, stop_when_unneeded_queue);
191
192 /* Units which are upheld by another other which we might need to act on */
193 LIST_HEAD(Unit, start_when_upheld_queue);
194
195 /* Units that have BindsTo= another unit, and might need to be shutdown because the bound unit is not active. */
196 LIST_HEAD(Unit, stop_when_bound_queue);
197
198 sd_event *event;
199
200 /* This maps PIDs we care about to units that are interested in. We allow multiple units to be interested in
201 * the same PID and multiple PIDs to be relevant to the same unit. Since in most cases only a single unit will
202 * be interested in the same PID we use a somewhat special encoding here: the first unit interested in a PID is
203 * stored directly in the hashmap, keyed by the PID unmodified. If there are other units interested too they'll
204 * be stored in a NULL-terminated array, and keyed by the negative PID. This is safe as pid_t is signed and
205 * negative PIDs are not used for regular processes but process groups, which we don't care about in this
206 * context, but this allows us to use the negative range for our own purposes. */
207 Hashmap *watch_pids; /* pid => unit as well as -pid => array of units */
208
209 /* A set contains all units which cgroup should be refreshed after startup */
210 Set *startup_units;
211
212 /* A set which contains all currently failed units */
213 Set *failed_units;
214
215 sd_event_source *run_queue_event_source;
216
217 char *notify_socket;
218 int notify_fd;
219 sd_event_source *notify_event_source;
220
221 int cgroups_agent_fd;
222 sd_event_source *cgroups_agent_event_source;
223
224 int signal_fd;
225 sd_event_source *signal_event_source;
226
227 sd_event_source *sigchld_event_source;
228
229 sd_event_source *time_change_event_source;
230
231 sd_event_source *timezone_change_event_source;
232
233 sd_event_source *jobs_in_progress_event_source;
234
235 int user_lookup_fds[2];
236 sd_event_source *user_lookup_event_source;
237
238 LookupScope unit_file_scope;
239 LookupPaths lookup_paths;
240 Hashmap *unit_id_map;
241 Hashmap *unit_name_map;
242 Set *unit_path_cache;
243 uint64_t unit_cache_timestamp_hash;
244
245 char **transient_environment; /* The environment, as determined from config files, kernel cmdline and environment generators */
246 char **client_environment; /* Environment variables created by clients through the bus API */
247
248 usec_t watchdog[_WATCHDOG_TYPE_MAX];
249 usec_t watchdog_overridden[_WATCHDOG_TYPE_MAX];
250 char *watchdog_pretimeout_governor;
251 char *watchdog_pretimeout_governor_overridden;
252
253 dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
254
255 /* Data specific to the device subsystem */
256 sd_device_monitor *device_monitor;
257 Hashmap *devices_by_sysfs;
258
259 /* Data specific to the mount subsystem */
260 struct libmnt_monitor *mount_monitor;
261 sd_event_source *mount_event_source;
262
263 /* Data specific to the swap filesystem */
264 FILE *proc_swaps;
265 sd_event_source *swap_event_source;
266 Hashmap *swaps_by_devnode;
267
268 /* Data specific to the D-Bus subsystem */
269 sd_bus *api_bus, *system_bus;
270 Set *private_buses;
271 int private_listen_fd;
272 sd_event_source *private_listen_event_source;
273
274 /* Contains all the clients that are subscribed to signals via
275 the API bus. Note that private bus connections are always
276 considered subscribes, since they last for very short only,
277 and it is much simpler that way. */
278 sd_bus_track *subscribed;
279 char **deserialized_subscribed;
280
281 /* This is used during reloading: before the reload we queue
282 * the reply message here, and afterwards we send it */
283 sd_bus_message *pending_reload_message;
284
285 Hashmap *watch_bus; /* D-Bus names => Unit object n:1 */
286
287 bool send_reloading_done;
288
289 uint32_t current_job_id;
290 uint32_t default_unit_job_id;
291
292 /* Data specific to the Automount subsystem */
293 int dev_autofs_fd;
294
295 /* Data specific to the cgroup subsystem */
296 Hashmap *cgroup_unit;
297 CGroupMask cgroup_supported;
298 char *cgroup_root;
299
300 /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */
301 int cgroup_inotify_fd;
302 sd_event_source *cgroup_inotify_event_source;
303
304 /* Maps for finding the unit for each inotify watch descriptor for the cgroup.events and
305 * memory.events cgroupv2 attributes. */
306 Hashmap *cgroup_control_inotify_wd_unit;
307 Hashmap *cgroup_memory_inotify_wd_unit;
308
309 /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */
310 sd_event_source *cgroup_empty_event_source;
311 sd_event_source *cgroup_oom_event_source;
312
313 /* Make sure the user cannot accidentally unmount our cgroup
314 * file system */
315 int pin_cgroupfs_fd;
316
317 unsigned gc_marker;
318
319 /* The stat() data the last time we saw /etc/localtime */
320 usec_t etc_localtime_mtime;
321 bool etc_localtime_accessible;
322
323 ManagerObjective objective;
324
325 /* Flags */
326 bool dispatching_load_queue;
327
328 bool taint_usr;
329
330 /* Have we already sent out the READY=1 notification? */
331 bool ready_sent;
332
333 /* Was the last status sent "STATUS=Ready."? */
334 bool status_ready;
335
336 /* Have we already printed the taint line if necessary? */
337 bool taint_logged;
338
339 /* Have we ever changed the "kernel.pid_max" sysctl? */
340 bool sysctl_pid_max_changed;
341
342 ManagerTestRunFlags test_run_flags;
343
344 /* If non-zero, exit with the following value when the systemd
345 * process terminate. Useful for containers: systemd-nspawn could get
346 * the return value. */
347 uint8_t return_value;
348
349 ShowStatus show_status;
350 ShowStatus show_status_overridden;
351 StatusUnitFormat status_unit_format;
352 char *confirm_spawn;
353 bool no_console_output;
354 bool service_watchdogs;
355
356 ExecOutput default_std_output, default_std_error;
357
358 usec_t default_restart_usec, default_timeout_start_usec, default_timeout_stop_usec;
359 usec_t default_timeout_abort_usec;
360 bool default_timeout_abort_set;
361
362 usec_t default_start_limit_interval;
363 unsigned default_start_limit_burst;
364
365 bool default_cpu_accounting;
366 bool default_memory_accounting;
367 bool default_io_accounting;
368 bool default_blockio_accounting;
369 bool default_tasks_accounting;
370 bool default_ip_accounting;
371
372 TasksMax default_tasks_max;
373 usec_t default_timer_accuracy_usec;
374
375 OOMPolicy default_oom_policy;
376 int default_oom_score_adjust;
377 bool default_oom_score_adjust_set;
378
379 int original_log_level;
380 LogTarget original_log_target;
381 bool log_level_overridden;
382 bool log_target_overridden;
383
384 struct rlimit *rlimit[_RLIMIT_MAX];
385
386 /* non-zero if we are reloading or reexecuting, */
387 int n_reloading;
388
389 unsigned n_installed_jobs;
390 unsigned n_failed_jobs;
391
392 /* Jobs in progress watching */
393 unsigned n_running_jobs;
394 unsigned n_on_console;
395 unsigned jobs_in_progress_iteration;
396
397 /* Do we have any outstanding password prompts? */
398 int have_ask_password;
399 int ask_password_inotify_fd;
400 sd_event_source *ask_password_event_source;
401
402 /* Type=idle pipes */
403 int idle_pipe[4];
404 sd_event_source *idle_pipe_event_source;
405
406 char *switch_root;
407 char *switch_root_init;
408
409 /* This maps all possible path prefixes to the units needing
410 * them. It's a hashmap with a path string as key and a Set as
411 * value where Unit objects are contained. */
412 Hashmap *units_requiring_mounts_for;
413
414 /* Used for processing polkit authorization responses */
415 Hashmap *polkit_registry;
416
417 /* Dynamic users/groups, indexed by their name */
418 Hashmap *dynamic_users;
419
420 /* Keep track of all UIDs and GIDs any of our services currently use. This is useful for the RemoveIPC= logic. */
421 Hashmap *uid_refs;
422 Hashmap *gid_refs;
423
424 /* ExecRuntime, indexed by their owner unit id */
425 Hashmap *exec_runtime_by_id;
426
427 /* When the user hits C-A-D more than 7 times per 2s, do something immediately... */
428 RateLimit ctrl_alt_del_ratelimit;
429 EmergencyAction cad_burst_action;
430
431 const char *unit_log_field;
432 const char *unit_log_format_string;
433
434 const char *invocation_log_field;
435 const char *invocation_log_format_string;
436
437 int first_boot; /* tri-state */
438
439 /* Prefixes of e.g. RuntimeDirectory= */
440 char *prefix[_EXEC_DIRECTORY_TYPE_MAX];
441 char *received_credentials_directory;
442 char *received_encrypted_credentials_directory;
443
444 /* Used in the SIGCHLD and sd_notify() message invocation logic to avoid that we dispatch the same event
445 * multiple times on the same unit. */
446 unsigned sigchldgen;
447 unsigned notifygen;
448
449 bool honor_device_enumeration;
450
451 VarlinkServer *varlink_server;
452 /* When we're a system manager, this object manages the subscription from systemd-oomd to PID1 that's
453 * used to report changes in ManagedOOM settings (systemd server - oomd client). When
454 * we're a user manager, this object manages the client connection from the user manager to
455 * systemd-oomd to report changes in ManagedOOM settings (systemd client - oomd server). */
456 Varlink *managed_oom_varlink;
457
458 /* Reference to RestrictFileSystems= BPF program */
459 struct restrict_fs_bpf *restrict_fs;
460 };
461
manager_default_timeout_abort_usec(Manager * m)462 static inline usec_t manager_default_timeout_abort_usec(Manager *m) {
463 assert(m);
464 return m->default_timeout_abort_set ? m->default_timeout_abort_usec : m->default_timeout_stop_usec;
465 }
466
467 #define MANAGER_IS_SYSTEM(m) ((m)->unit_file_scope == LOOKUP_SCOPE_SYSTEM)
468 #define MANAGER_IS_USER(m) ((m)->unit_file_scope != LOOKUP_SCOPE_SYSTEM)
469
470 #define MANAGER_IS_RELOADING(m) ((m)->n_reloading > 0)
471
472 #define MANAGER_IS_FINISHED(m) (dual_timestamp_is_set((m)->timestamps + MANAGER_TIMESTAMP_FINISH))
473
474 /* The objective is set to OK as soon as we enter the main loop, and set otherwise as soon as we are done with it */
475 #define MANAGER_IS_RUNNING(m) ((m)->objective == MANAGER_OK)
476
477 #define MANAGER_IS_TEST_RUN(m) ((m)->test_run_flags != 0)
478
479 int manager_new(LookupScope scope, ManagerTestRunFlags test_run_flags, Manager **m);
480 Manager* manager_free(Manager *m);
481 DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
482
483 int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *root);
484
485 Job *manager_get_job(Manager *m, uint32_t id);
486 Unit *manager_get_unit(Manager *m, const char *name);
487
488 int manager_get_job_from_dbus_path(Manager *m, const char *s, Job **_j);
489
490 bool manager_unit_cache_should_retry_load(Unit *u);
491 int manager_load_unit_prepare(Manager *m, const char *name, const char *path, sd_bus_error *e, Unit **_ret);
492 int manager_load_unit(Manager *m, const char *name, const char *path, sd_bus_error *e, Unit **_ret);
493 int manager_load_startable_unit_or_warn(Manager *m, const char *name, const char *path, Unit **ret);
494 int manager_load_unit_from_dbus_path(Manager *m, const char *s, sd_bus_error *e, Unit **_u);
495
496 int manager_add_job(Manager *m, JobType type, Unit *unit, JobMode mode, Set *affected_jobs, sd_bus_error *e, Job **_ret);
497 int manager_add_job_by_name(Manager *m, JobType type, const char *name, JobMode mode, Set *affected_jobs, sd_bus_error *e, Job **_ret);
498 int manager_add_job_by_name_and_warn(Manager *m, JobType type, const char *name, JobMode mode, Set *affected_jobs, Job **ret);
499 int manager_propagate_reload(Manager *m, Unit *unit, JobMode mode, sd_bus_error *e);
500
501 void manager_clear_jobs(Manager *m);
502
503 void manager_unwatch_pid(Manager *m, pid_t pid);
504
505 unsigned manager_dispatch_load_queue(Manager *m);
506
507 int manager_default_environment(Manager *m);
508 int manager_transient_environment_add(Manager *m, char **plus);
509 int manager_client_environment_modify(Manager *m, char **minus, char **plus);
510 int manager_get_effective_environment(Manager *m, char ***ret);
511
512 int manager_set_default_rlimits(Manager *m, struct rlimit **default_rlimit);
513
514 void manager_trigger_run_queue(Manager *m);
515
516 int manager_loop(Manager *m);
517
518 int manager_reload(Manager *m);
519 Manager* manager_reloading_start(Manager *m);
520 void manager_reloading_stopp(Manager **m);
521
522 void manager_reset_failed(Manager *m);
523
524 void manager_send_unit_audit(Manager *m, Unit *u, int type, bool success);
525 void manager_send_unit_plymouth(Manager *m, Unit *u);
526
527 bool manager_unit_inactive_or_pending(Manager *m, const char *name);
528
529 void manager_check_finished(Manager *m);
530
531 void disable_printk_ratelimit(void);
532 void manager_recheck_dbus(Manager *m);
533 void manager_recheck_journal(Manager *m);
534
535 bool manager_get_show_status_on(Manager *m);
536 void manager_set_show_status(Manager *m, ShowStatus mode, const char *reason);
537 void manager_override_show_status(Manager *m, ShowStatus mode, const char *reason);
538
539 void manager_set_first_boot(Manager *m, bool b);
540
541 void manager_status_printf(Manager *m, StatusType type, const char *status, const char *format, ...) _printf_(4,5);
542
543 Set *manager_get_units_requiring_mounts_for(Manager *m, const char *path);
544
545 ManagerState manager_state(Manager *m);
546
547 int manager_update_failed_units(Manager *m, Unit *u, bool failed);
548
549 void manager_unref_uid(Manager *m, uid_t uid, bool destroy_now);
550 int manager_ref_uid(Manager *m, uid_t uid, bool clean_ipc);
551
552 void manager_unref_gid(Manager *m, gid_t gid, bool destroy_now);
553 int manager_ref_gid(Manager *m, gid_t gid, bool clean_ipc);
554
555 char* manager_taint_string(const Manager *m);
556
557 void manager_ref_console(Manager *m);
558 void manager_unref_console(Manager *m);
559
560 void manager_override_log_level(Manager *m, int level);
561 void manager_restore_original_log_level(Manager *m);
562
563 void manager_override_log_target(Manager *m, LogTarget target);
564 void manager_restore_original_log_target(Manager *m);
565
566 const char *manager_state_to_string(ManagerState m) _const_;
567 ManagerState manager_state_from_string(const char *s) _pure_;
568
569 const char *manager_get_confirm_spawn(Manager *m);
570 bool manager_is_confirm_spawn_disabled(Manager *m);
571 void manager_disable_confirm_spawn(void);
572
573 const char *manager_timestamp_to_string(ManagerTimestamp m) _const_;
574 ManagerTimestamp manager_timestamp_from_string(const char *s) _pure_;
575 ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
576
577 usec_t manager_get_watchdog(Manager *m, WatchdogType t);
578 void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout);
579 void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
580 int manager_set_watchdog_pretimeout_governor(Manager *m, const char *governor);
581 int manager_override_watchdog_pretimeout_governor(Manager *m, const char *governor);
582
583 const char* oom_policy_to_string(OOMPolicy i) _const_;
584 OOMPolicy oom_policy_from_string(const char *s) _pure_;
585