1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 #pragma once
3 
4 typedef struct ExecStatus ExecStatus;
5 typedef struct ExecCommand ExecCommand;
6 typedef struct ExecContext ExecContext;
7 typedef struct ExecRuntime ExecRuntime;
8 typedef struct ExecParameters ExecParameters;
9 typedef struct Manager Manager;
10 
11 #include <sched.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <sys/capability.h>
15 
16 #include "cgroup-util.h"
17 #include "coredump-util.h"
18 #include "cpu-set-util.h"
19 #include "exec-util.h"
20 #include "fdset.h"
21 #include "list.h"
22 #include "missing_resource.h"
23 #include "namespace.h"
24 #include "nsflags.h"
25 #include "numa-util.h"
26 #include "path-util.h"
27 #include "time-util.h"
28 
29 #define EXEC_STDIN_DATA_MAX (64U*1024U*1024U)
30 
31 typedef enum ExecUtmpMode {
32         EXEC_UTMP_INIT,
33         EXEC_UTMP_LOGIN,
34         EXEC_UTMP_USER,
35         _EXEC_UTMP_MODE_MAX,
36         _EXEC_UTMP_MODE_INVALID = -EINVAL,
37 } ExecUtmpMode;
38 
39 typedef enum ExecInput {
40         EXEC_INPUT_NULL,
41         EXEC_INPUT_TTY,
42         EXEC_INPUT_TTY_FORCE,
43         EXEC_INPUT_TTY_FAIL,
44         EXEC_INPUT_SOCKET,
45         EXEC_INPUT_NAMED_FD,
46         EXEC_INPUT_DATA,
47         EXEC_INPUT_FILE,
48         _EXEC_INPUT_MAX,
49         _EXEC_INPUT_INVALID = -EINVAL,
50 } ExecInput;
51 
52 typedef enum ExecOutput {
53         EXEC_OUTPUT_INHERIT,
54         EXEC_OUTPUT_NULL,
55         EXEC_OUTPUT_TTY,
56         EXEC_OUTPUT_KMSG,
57         EXEC_OUTPUT_KMSG_AND_CONSOLE,
58         EXEC_OUTPUT_JOURNAL,
59         EXEC_OUTPUT_JOURNAL_AND_CONSOLE,
60         EXEC_OUTPUT_SOCKET,
61         EXEC_OUTPUT_NAMED_FD,
62         EXEC_OUTPUT_FILE,
63         EXEC_OUTPUT_FILE_APPEND,
64         EXEC_OUTPUT_FILE_TRUNCATE,
65         _EXEC_OUTPUT_MAX,
66         _EXEC_OUTPUT_INVALID = -EINVAL,
67 } ExecOutput;
68 
69 typedef enum ExecPreserveMode {
70         EXEC_PRESERVE_NO,
71         EXEC_PRESERVE_YES,
72         EXEC_PRESERVE_RESTART,
73         _EXEC_PRESERVE_MODE_MAX,
74         _EXEC_PRESERVE_MODE_INVALID = -EINVAL,
75 } ExecPreserveMode;
76 
77 typedef enum ExecKeyringMode {
78         EXEC_KEYRING_INHERIT,
79         EXEC_KEYRING_PRIVATE,
80         EXEC_KEYRING_SHARED,
81         _EXEC_KEYRING_MODE_MAX,
82         _EXEC_KEYRING_MODE_INVALID = -EINVAL,
83 } ExecKeyringMode;
84 
85 /* Contains start and exit information about an executed command.  */
86 struct ExecStatus {
87         dual_timestamp start_timestamp;
88         dual_timestamp exit_timestamp;
89         pid_t pid;
90         int code;     /* as in siginfo_t::si_code */
91         int status;   /* as in siginfo_t::si_status */
92 };
93 
94 /* Stores information about commands we execute. Covers both configuration settings as well as runtime data. */
95 struct ExecCommand {
96         char *path;
97         char **argv;
98         ExecStatus exec_status;
99         ExecCommandFlags flags;
100         LIST_FIELDS(ExecCommand, command); /* useful for chaining commands */
101 };
102 
103 /* Encapsulates certain aspects of the runtime environment that is to be shared between multiple otherwise separate
104  * invocations of commands. Specifically, this allows sharing of /tmp and /var/tmp data as well as network namespaces
105  * between invocations of commands. This is a reference counted object, with one reference taken by each currently
106  * active command invocation that wants to share this runtime. */
107 struct ExecRuntime {
108         unsigned n_ref;
109 
110         Manager *manager;
111 
112         char *id; /* Unit id of the owner */
113 
114         char *tmp_dir;
115         char *var_tmp_dir;
116 
117         /* An AF_UNIX socket pair, that contains a datagram containing a file descriptor referring to the network
118          * namespace. */
119         int netns_storage_socket[2];
120 
121         /* Like netns_storage_socket, but the file descriptor is referring to the IPC namespace. */
122         int ipcns_storage_socket[2];
123 };
124 
125 typedef enum ExecDirectoryType {
126         EXEC_DIRECTORY_RUNTIME = 0,
127         EXEC_DIRECTORY_STATE,
128         EXEC_DIRECTORY_CACHE,
129         EXEC_DIRECTORY_LOGS,
130         EXEC_DIRECTORY_CONFIGURATION,
131         _EXEC_DIRECTORY_TYPE_MAX,
132         _EXEC_DIRECTORY_TYPE_INVALID = -EINVAL,
133 } ExecDirectoryType;
134 
135 typedef struct ExecDirectoryItem {
136         char *path;
137         char **symlinks;
138 } ExecDirectoryItem;
139 
140 typedef struct ExecDirectory {
141         mode_t mode;
142         size_t n_items;
143         ExecDirectoryItem *items;
144 } ExecDirectory;
145 
146 typedef enum ExecCleanMask {
147         /* In case you wonder why the bitmask below doesn't use "directory" in its name: we want to keep this
148          * generic so that .timer timestamp files can nicely be covered by this too, and similar. */
149         EXEC_CLEAN_RUNTIME       = 1U << EXEC_DIRECTORY_RUNTIME,
150         EXEC_CLEAN_STATE         = 1U << EXEC_DIRECTORY_STATE,
151         EXEC_CLEAN_CACHE         = 1U << EXEC_DIRECTORY_CACHE,
152         EXEC_CLEAN_LOGS          = 1U << EXEC_DIRECTORY_LOGS,
153         EXEC_CLEAN_CONFIGURATION = 1U << EXEC_DIRECTORY_CONFIGURATION,
154         EXEC_CLEAN_NONE          = 0,
155         EXEC_CLEAN_ALL           = (1U << _EXEC_DIRECTORY_TYPE_MAX) - 1,
156         _EXEC_CLEAN_MASK_INVALID = -EINVAL,
157 } ExecCleanMask;
158 
159 /* A credential configured with LoadCredential= */
160 typedef struct ExecLoadCredential {
161         char *id, *path;
162         bool encrypted;
163 } ExecLoadCredential;
164 
165 /* A credential configured with SetCredential= */
166 typedef struct ExecSetCredential {
167         char *id;
168         bool encrypted;
169         void *data;
170         size_t size;
171 } ExecSetCredential;
172 
173 /* Encodes configuration parameters applied to invoked commands. Does not carry runtime data, but only configuration
174  * changes sourced from unit files and suchlike. ExecContext objects are usually embedded into Unit objects, and do not
175  * change after being loaded. */
176 struct ExecContext {
177         char **environment;
178         char **environment_files;
179         char **pass_environment;
180         char **unset_environment;
181 
182         struct rlimit *rlimit[_RLIMIT_MAX];
183         char *working_directory, *root_directory, *root_image, *root_verity, *root_hash_path, *root_hash_sig_path;
184         void *root_hash, *root_hash_sig;
185         size_t root_hash_size, root_hash_sig_size;
186         LIST_HEAD(MountOptions, root_image_options);
187         bool working_directory_missing_ok:1;
188         bool working_directory_home:1;
189 
190         bool oom_score_adjust_set:1;
191         bool coredump_filter_set:1;
192         bool nice_set:1;
193         bool ioprio_set:1;
194         bool cpu_sched_set:1;
195         bool mount_apivfs_set:1;
196 
197         /* This is not exposed to the user but available internally. We need it to make sure that whenever we
198          * spawn /usr/bin/mount it is run in the same process group as us so that the autofs logic detects
199          * that it belongs to us and we don't enter a trigger loop. */
200         bool same_pgrp;
201 
202         bool cpu_sched_reset_on_fork;
203         bool non_blocking;
204 
205         mode_t umask;
206         int oom_score_adjust;
207         int nice;
208         int ioprio;
209         int cpu_sched_policy;
210         int cpu_sched_priority;
211         uint64_t coredump_filter;
212 
213         CPUSet cpu_set;
214         NUMAPolicy numa_policy;
215         bool cpu_affinity_from_numa;
216 
217         ExecInput std_input;
218         ExecOutput std_output;
219         ExecOutput std_error;
220         bool stdio_as_fds;
221         char *stdio_fdname[3];
222         char *stdio_file[3];
223 
224         void *stdin_data;
225         size_t stdin_data_size;
226 
227         nsec_t timer_slack_nsec;
228 
229         char *tty_path;
230 
231         bool tty_reset;
232         bool tty_vhangup;
233         bool tty_vt_disallocate;
234 
235         unsigned tty_rows;
236         unsigned tty_cols;
237 
238         bool ignore_sigpipe;
239 
240         ExecKeyringMode keyring_mode;
241 
242         /* Since resolving these names might involve socket
243          * connections and we don't want to deadlock ourselves these
244          * names are resolved on execution only and in the child
245          * process. */
246         char *user;
247         char *group;
248         char **supplementary_groups;
249 
250         char *pam_name;
251 
252         char *utmp_id;
253         ExecUtmpMode utmp_mode;
254 
255         bool no_new_privileges;
256 
257         bool selinux_context_ignore;
258         bool apparmor_profile_ignore;
259         bool smack_process_label_ignore;
260 
261         char *selinux_context;
262         char *apparmor_profile;
263         char *smack_process_label;
264 
265         char **read_write_paths, **read_only_paths, **inaccessible_paths, **exec_paths, **no_exec_paths;
266         char **exec_search_path;
267         unsigned long mount_flags;
268         BindMount *bind_mounts;
269         size_t n_bind_mounts;
270         TemporaryFileSystem *temporary_filesystems;
271         size_t n_temporary_filesystems;
272         MountImage *mount_images;
273         size_t n_mount_images;
274         MountImage *extension_images;
275         size_t n_extension_images;
276         char **extension_directories;
277 
278         uint64_t capability_bounding_set;
279         uint64_t capability_ambient_set;
280         int secure_bits;
281 
282         int syslog_priority;
283         bool syslog_level_prefix;
284         char *syslog_identifier;
285 
286         struct iovec* log_extra_fields;
287         size_t n_log_extra_fields;
288 
289         usec_t log_ratelimit_interval_usec;
290         unsigned log_ratelimit_burst;
291 
292         int log_level_max;
293 
294         char *log_namespace;
295 
296         ProtectProc protect_proc;  /* hidepid= */
297         ProcSubset proc_subset;    /* subset= */
298 
299         bool private_tmp;
300         bool private_network;
301         bool private_devices;
302         bool private_users;
303         bool private_mounts;
304         bool private_ipc;
305         bool protect_kernel_tunables;
306         bool protect_kernel_modules;
307         bool protect_kernel_logs;
308         bool protect_clock;
309         bool protect_control_groups;
310         ProtectSystem protect_system;
311         ProtectHome protect_home;
312         bool protect_hostname;
313         bool mount_apivfs;
314 
315         bool dynamic_user;
316         bool remove_ipc;
317 
318         bool memory_deny_write_execute;
319         bool restrict_realtime;
320         bool restrict_suid_sgid;
321 
322         bool lock_personality;
323         unsigned long personality;
324 
325         unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
326 
327         Set *restrict_filesystems;
328         bool restrict_filesystems_allow_list:1;
329 
330         Hashmap *syscall_filter;
331         Set *syscall_archs;
332         int syscall_errno;
333         bool syscall_allow_list:1;
334 
335         Hashmap *syscall_log;
336         bool syscall_log_allow_list:1; /* Log listed system calls */
337 
338         bool address_families_allow_list:1;
339         Set *address_families;
340 
341         char *network_namespace_path;
342         char *ipc_namespace_path;
343 
344         ExecDirectory directories[_EXEC_DIRECTORY_TYPE_MAX];
345         ExecPreserveMode runtime_directory_preserve_mode;
346         usec_t timeout_clean_usec;
347 
348         Hashmap *set_credentials; /* output id → ExecSetCredential */
349         Hashmap *load_credentials; /* output id → ExecLoadCredential */
350 };
351 
exec_context_restrict_namespaces_set(const ExecContext * c)352 static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
353         assert(c);
354 
355         return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
356 }
357 
exec_context_restrict_filesystems_set(const ExecContext * c)358 static inline bool exec_context_restrict_filesystems_set(const ExecContext *c) {
359         assert(c);
360 
361         return c->restrict_filesystems_allow_list ||
362           !set_isempty(c->restrict_filesystems);
363 }
364 
exec_context_with_rootfs(const ExecContext * c)365 static inline bool exec_context_with_rootfs(const ExecContext *c) {
366         assert(c);
367 
368         /* Checks if RootDirectory= or RootImage= are used */
369 
370         return !empty_or_root(c->root_directory) || c->root_image;
371 }
372 
373 typedef enum ExecFlags {
374         EXEC_APPLY_SANDBOXING      = 1 << 0,
375         EXEC_APPLY_CHROOT          = 1 << 1,
376         EXEC_APPLY_TTY_STDIN       = 1 << 2,
377         EXEC_PASS_LOG_UNIT         = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */
378         EXEC_CHOWN_DIRECTORIES     = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
379         EXEC_NSS_DYNAMIC_BYPASS    = 1 << 5, /* Set the SYSTEMD_NSS_DYNAMIC_BYPASS environment variable, to disable nss-systemd blocking on PID 1, for use by dbus-daemon */
380         EXEC_CGROUP_DELEGATE       = 1 << 6,
381         EXEC_IS_CONTROL            = 1 << 7,
382         EXEC_CONTROL_CGROUP        = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
383         EXEC_WRITE_CREDENTIALS     = 1 << 9, /* Set up the credential store logic */
384 
385         /* The following are not used by execute.c, but by consumers internally */
386         EXEC_PASS_FDS              = 1 << 10,
387         EXEC_SETENV_RESULT         = 1 << 11,
388         EXEC_SET_WATCHDOG          = 1 << 12,
389         EXEC_SETENV_MONITOR_RESULT = 1 << 13, /* Pass exit status to OnFailure= and OnSuccess= dependencies. */
390 } ExecFlags;
391 
392 /* Parameters for a specific invocation of a command. This structure is put together right before a command is
393  * executed. */
394 struct ExecParameters {
395         char **environment;
396 
397         int *fds;
398         char **fd_names;
399         size_t n_socket_fds;
400         size_t n_storage_fds;
401 
402         ExecFlags flags;
403         bool selinux_context_net:1;
404 
405         CGroupMask cgroup_supported;
406         const char *cgroup_path;
407 
408         char **prefix;
409         const char *received_credentials_directory;
410         const char *received_encrypted_credentials_directory;
411 
412         const char *confirm_spawn;
413 
414         usec_t watchdog_usec;
415 
416         int *idle_pipe;
417 
418         int stdin_fd;
419         int stdout_fd;
420         int stderr_fd;
421 
422         /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
423         int exec_fd;
424 
425         const char *notify_socket;
426 };
427 
428 #include "unit.h"
429 #include "dynamic-user.h"
430 
431 int exec_spawn(Unit *unit,
432                ExecCommand *command,
433                const ExecContext *context,
434                const ExecParameters *exec_params,
435                ExecRuntime *runtime,
436                DynamicCreds *dynamic_creds,
437                pid_t *ret);
438 
439 void exec_command_done_array(ExecCommand *c, size_t n);
440 ExecCommand* exec_command_free_list(ExecCommand *c);
441 void exec_command_free_array(ExecCommand **c, size_t n);
442 void exec_command_reset_status_array(ExecCommand *c, size_t n);
443 void exec_command_reset_status_list_array(ExecCommand **c, size_t n);
444 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix);
445 void exec_command_append_list(ExecCommand **l, ExecCommand *e);
446 int exec_command_set(ExecCommand *c, const char *path, ...) _sentinel_;
447 int exec_command_append(ExecCommand *c, const char *path, ...) _sentinel_;
448 
449 void exec_context_init(ExecContext *c);
450 void exec_context_done(ExecContext *c);
451 void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix);
452 
453 int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_root);
454 int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_root, const char *unit);
455 
456 const char* exec_context_fdname(const ExecContext *c, int fd_index);
457 
458 bool exec_context_may_touch_console(const ExecContext *c);
459 bool exec_context_maintains_privileges(const ExecContext *c);
460 
461 int exec_context_get_effective_ioprio(const ExecContext *c);
462 bool exec_context_get_effective_mount_apivfs(const ExecContext *c);
463 
464 void exec_context_free_log_extra_fields(ExecContext *c);
465 
466 void exec_context_revert_tty(ExecContext *c);
467 
468 int exec_context_get_clean_directories(ExecContext *c, char **prefix, ExecCleanMask mask, char ***ret);
469 int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret);
470 
471 void exec_status_start(ExecStatus *s, pid_t pid);
472 void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status);
473 void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix);
474 void exec_status_reset(ExecStatus *s);
475 
476 int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *name, bool create, ExecRuntime **ret);
477 ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy);
478 
479 int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds);
480 int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds);
481 int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds);
482 void exec_runtime_vacuum(Manager *m);
483 
484 void exec_params_clear(ExecParameters *p);
485 
486 bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
487 
488 ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc);
489 DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSetCredential*, exec_set_credential_free);
490 
491 ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc);
492 DEFINE_TRIVIAL_CLEANUP_FUNC(ExecLoadCredential*, exec_load_credential_free);
493 
494 void exec_directory_done(ExecDirectory *d);
495 int exec_directory_add(ExecDirectoryItem **d, size_t *n, const char *path, char **symlinks);
496 
497 extern const struct hash_ops exec_set_credential_hash_ops;
498 extern const struct hash_ops exec_load_credential_hash_ops;
499 
500 const char* exec_output_to_string(ExecOutput i) _const_;
501 ExecOutput exec_output_from_string(const char *s) _pure_;
502 
503 const char* exec_input_to_string(ExecInput i) _const_;
504 ExecInput exec_input_from_string(const char *s) _pure_;
505 
506 const char* exec_utmp_mode_to_string(ExecUtmpMode i) _const_;
507 ExecUtmpMode exec_utmp_mode_from_string(const char *s) _pure_;
508 
509 const char* exec_preserve_mode_to_string(ExecPreserveMode i) _const_;
510 ExecPreserveMode exec_preserve_mode_from_string(const char *s) _pure_;
511 
512 const char* exec_keyring_mode_to_string(ExecKeyringMode i) _const_;
513 ExecKeyringMode exec_keyring_mode_from_string(const char *s) _pure_;
514 
515 const char* exec_directory_type_to_string(ExecDirectoryType i) _const_;
516 ExecDirectoryType exec_directory_type_from_string(const char *s) _pure_;
517 
518 const char* exec_directory_type_symlink_to_string(ExecDirectoryType i) _const_;
519 ExecDirectoryType exec_directory_type_symlink_from_string(const char *s) _pure_;
520 
521 const char* exec_resource_type_to_string(ExecDirectoryType i) _const_;
522 ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_;
523 
524 bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime);
525