1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/mount.h>
4
5 #include "copy.h"
6 #include "creds-util.h"
7 #include "fileio.h"
8 #include "format-util.h"
9 #include "fs-util.h"
10 #include "import-creds.h"
11 #include "io-util.h"
12 #include "mkdir-label.h"
13 #include "mount-util.h"
14 #include "mountpoint-util.h"
15 #include "parse-util.h"
16 #include "path-util.h"
17 #include "proc-cmdline.h"
18 #include "recurse-dir.h"
19 #include "strv.h"
20
21 /* This imports credentials passed in from environments higher up (VM manager, boot loader, …) and rearranges
22 * them so that later code can access them using our regular credential protocol
23 * (i.e. $CREDENTIALS_DIRECTORY). It's supposed to be minimal glue to unify behaviour how PID 1 (and
24 * generators invoked by it) can acquire credentials from outside, to mimic how we support it for containers,
25 * but on VM/physical environments.
26 *
27 * This does three things:
28 *
29 * 1. It imports credentials picked up by sd-boot (and placed in the /.extra/credentials/ dir in the initrd)
30 * and puts them in /run/credentials/@encrypted/. Note that during the initrd→host transition the initrd root
31 * file system is cleaned out, thus it is essential we pick up these files before they are deleted. Note
32 * that these credentials originate from an untrusted source, i.e. the ESP and are not
33 * pre-authenticated. They still have to be authenticated before use.
34 *
35 * 2. It imports credentials from /proc/cmdline and puts them in /run/credentials/@system/. These come from a
36 * trusted environment (i.e. the boot loader), and are typically authenticated (if authentication is done
37 * at all). However, they are world-readable, which might be less than ideal. Hence only use this for data
38 * that doesn't require trust.
39 *
40 * 3. It imports credentials passed in through qemu's fw_cfg logic. Specifically, credential data passed in
41 * /sys/firmware/qemu_fw_cfg/by_name/opt/io.systemd.credentials/ is picked up and also placed in
42 * /run/credentials/@system/.
43 *
44 * If it picked up any credentials it will set the $CREDENTIALS_DIRECTORY and
45 * $ENCRYPTED_CREDENTIALS_DIRECTORY environment variables to point to these directories, so that processes
46 * can find them there later on. If "ramfs" is available $CREDENTIALS_DIRECTORY will be backed by it (but
47 * $ENCRYPTED_CREDENTIALS_DIRECTORY is just a regular tmpfs).
48 *
49 * Net result: the service manager can pick up trusted credentials from $CREDENTIALS_DIRECTORY afterwards,
50 * and untrusted ones from $ENCRYPTED_CREDENTIALS_DIRECTORY. */
51
52 typedef struct ImportCredentialContext {
53 int target_dir_fd;
54 size_t size_sum;
55 unsigned n_credentials;
56 } ImportCredentialContext;
57
import_credentials_context_free(ImportCredentialContext * c)58 static void import_credentials_context_free(ImportCredentialContext *c) {
59 assert(c);
60
61 c->target_dir_fd = safe_close(c->target_dir_fd);
62 }
63
acquire_encrypted_credential_directory(ImportCredentialContext * c)64 static int acquire_encrypted_credential_directory(ImportCredentialContext *c) {
65 int r;
66
67 assert(c);
68
69 if (c->target_dir_fd >= 0)
70 return c->target_dir_fd;
71
72 r = mkdir_safe_label(ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, 0700, 0, 0, MKDIR_WARN_MODE);
73 if (r < 0)
74 return log_error_errno(r, "Failed to create " ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY ": %m");
75
76 c->target_dir_fd = open(ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
77 if (c->target_dir_fd < 0)
78 return log_error_errno(errno, "Failed to open " ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY ": %m");
79
80 return c->target_dir_fd;
81 }
82
open_credential_file_for_write(int target_dir_fd,const char * dir_name,const char * n)83 static int open_credential_file_for_write(int target_dir_fd, const char *dir_name, const char *n) {
84 int fd;
85
86 assert(target_dir_fd >= 0);
87 assert(dir_name);
88 assert(n);
89
90 fd = openat(target_dir_fd, n, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL|O_NOFOLLOW, 0400);
91 if (fd < 0) {
92 if (errno == EEXIST) /* In case of EEXIST we'll only debug log! */
93 return log_debug_errno(errno, "Credential '%s' set twice, ignoring.", n);
94
95 return log_error_errno(errno, "Failed to create %s/%s: %m", dir_name, n);
96 }
97
98 return fd;
99 }
100
credential_size_ok(ImportCredentialContext * c,const char * name,uint64_t size)101 static bool credential_size_ok(ImportCredentialContext *c, const char *name, uint64_t size) {
102 assert(c);
103 assert(name);
104
105 if (size > CREDENTIAL_SIZE_MAX) {
106 log_warning("Credential '%s' is larger than allowed limit (%s > %s), skipping.", name, FORMAT_BYTES(size), FORMAT_BYTES(CREDENTIAL_SIZE_MAX));
107 return false;
108 }
109
110 if (size > CREDENTIALS_TOTAL_SIZE_MAX - c->size_sum) {
111 log_warning("Accumulated credential size would be above allowed limit (%s+%s > %s), skipping '%s'.",
112 FORMAT_BYTES(c->size_sum), FORMAT_BYTES(size), FORMAT_BYTES(CREDENTIALS_TOTAL_SIZE_MAX), name);
113 return false;
114 }
115
116 return true;
117 }
118
finalize_credentials_dir(const char * dir,const char * envvar)119 static int finalize_credentials_dir(const char *dir, const char *envvar) {
120 int r;
121
122 assert(dir);
123 assert(envvar);
124
125 /* Try to make the credentials directory read-only now */
126
127 r = make_mount_point(dir);
128 if (r < 0)
129 log_warning_errno(r, "Failed to make '%s' a mount point, ignoring: %m", dir);
130 else
131 (void) mount_nofollow_verbose(LOG_WARNING, NULL, dir, NULL, MS_BIND|MS_NODEV|MS_NOEXEC|MS_NOSUID|MS_RDONLY|MS_REMOUNT, NULL);
132
133 if (setenv(envvar, dir, /* overwrite= */ true) < 0)
134 return log_error_errno(errno, "Failed to set $%s environment variable: %m", envvar);
135
136 return 0;
137 }
138
import_credentials_boot(void)139 static int import_credentials_boot(void) {
140 _cleanup_(import_credentials_context_free) ImportCredentialContext context = {
141 .target_dir_fd = -1,
142 };
143 int r;
144
145 /* systemd-stub will wrap sidecar *.cred files from the UEFI kernel image directory into initrd
146 * cpios, so that they unpack into /.extra/. We'll pick them up from there and copy them into /run/
147 * so that we can access them during the entire runtime (note that the initrd file system is erased
148 * during the initrd → host transition). Note that these credentials originate from an untrusted
149 * source (i.e. the ESP typically) and thus need to be authenticated later. We thus put them in a
150 * directory separate from the usual credentials which are from a trusted source. */
151
152 if (!in_initrd())
153 return 0;
154
155 FOREACH_STRING(p,
156 "/.extra/credentials/", /* specific to this boot menu */
157 "/.extra/global_credentials/") { /* boot partition wide */
158
159 _cleanup_free_ DirectoryEntries *de = NULL;
160 _cleanup_close_ int source_dir_fd = -1;
161
162 source_dir_fd = open(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOFOLLOW);
163 if (source_dir_fd < 0) {
164 if (errno == ENOENT) {
165 log_debug("No credentials passed via %s.", p);
166 continue;
167 }
168
169 log_warning_errno(errno, "Failed to open '%s', ignoring: %m", p);
170 continue;
171 }
172
173 r = readdir_all(source_dir_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
174 if (r < 0) {
175 log_warning_errno(r, "Failed to read '%s' contents, ignoring: %m", p);
176 continue;
177 }
178
179 for (size_t i = 0; i < de->n_entries; i++) {
180 const struct dirent *d = de->entries[i];
181 _cleanup_close_ int cfd = -1, nfd = -1;
182 _cleanup_free_ char *n = NULL;
183 const char *e;
184 struct stat st;
185
186 e = endswith(d->d_name, ".cred");
187 if (!e)
188 continue;
189
190 /* drop .cred suffix (which we want in the ESP sidecar dir, but not for our internal
191 * processing) */
192 n = strndup(d->d_name, e - d->d_name);
193 if (!n)
194 return log_oom();
195
196 if (!credential_name_valid(n)) {
197 log_warning("Credential '%s' has invalid name, ignoring.", d->d_name);
198 continue;
199 }
200
201 cfd = openat(source_dir_fd, d->d_name, O_RDONLY|O_CLOEXEC);
202 if (cfd < 0) {
203 log_warning_errno(errno, "Failed to open %s, ignoring: %m", d->d_name);
204 continue;
205 }
206
207 if (fstat(cfd, &st) < 0) {
208 log_warning_errno(errno, "Failed to stat %s, ignoring: %m", d->d_name);
209 continue;
210 }
211
212 r = stat_verify_regular(&st);
213 if (r < 0) {
214 log_warning_errno(r, "Credential file %s is not a regular file, ignoring: %m", d->d_name);
215 continue;
216 }
217
218 if (!credential_size_ok(&context, n, st.st_size))
219 continue;
220
221 r = acquire_encrypted_credential_directory(&context);
222 if (r < 0)
223 return r;
224
225 nfd = open_credential_file_for_write(context.target_dir_fd, ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, n);
226 if (nfd == -EEXIST)
227 continue;
228 if (nfd < 0)
229 return r;
230
231 r = copy_bytes(cfd, nfd, st.st_size, 0);
232 if (r < 0) {
233 (void) unlinkat(context.target_dir_fd, n, 0);
234 return log_error_errno(r, "Failed to create credential '%s': %m", n);
235 }
236
237 context.size_sum += st.st_size;
238 context.n_credentials++;
239
240 log_debug("Successfully copied boot credential '%s'.", n);
241 }
242 }
243
244 if (context.n_credentials > 0) {
245 log_debug("Imported %u credentials from boot loader.", context.n_credentials);
246
247 r = finalize_credentials_dir(ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY, "ENCRYPTED_CREDENTIALS_DIRECTORY");
248 if (r < 0)
249 return r;
250 }
251
252 return 0;
253 }
254
acquire_credential_directory(ImportCredentialContext * c)255 static int acquire_credential_directory(ImportCredentialContext *c) {
256 int r;
257
258 assert(c);
259
260 if (c->target_dir_fd >= 0)
261 return c->target_dir_fd;
262
263 r = path_is_mount_point(SYSTEM_CREDENTIALS_DIRECTORY, NULL, 0);
264 if (r < 0) {
265 if (r != -ENOENT)
266 return log_error_errno(r, "Failed to determine if " SYSTEM_CREDENTIALS_DIRECTORY " is a mount point: %m");
267
268 r = mkdir_safe_label(SYSTEM_CREDENTIALS_DIRECTORY, 0700, 0, 0, MKDIR_WARN_MODE);
269 if (r < 0)
270 return log_error_errno(r, "Failed to create " SYSTEM_CREDENTIALS_DIRECTORY " mount point: %m");
271
272 r = 0; /* Now it exists and is not a mount point */
273 }
274 if (r == 0)
275 /* If not a mountpoint yet, try to mount a ramfs there (so that this stuff isn't swapped
276 * out), but if that doesn't work, let's just use the regular tmpfs it already is. */
277 (void) mount_nofollow_verbose(LOG_WARNING, "ramfs", SYSTEM_CREDENTIALS_DIRECTORY, "ramfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, "mode=0700");
278
279 c->target_dir_fd = open(SYSTEM_CREDENTIALS_DIRECTORY, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
280 if (c->target_dir_fd < 0)
281 return log_error_errno(errno, "Failed to open " SYSTEM_CREDENTIALS_DIRECTORY ": %m");
282
283 return c->target_dir_fd;
284 }
285
proc_cmdline_callback(const char * key,const char * value,void * data)286 static int proc_cmdline_callback(const char *key, const char *value, void *data) {
287 ImportCredentialContext *c = ASSERT_PTR(data);
288 _cleanup_free_ char *n = NULL;
289 _cleanup_close_ int nfd = -1;
290 const char *colon;
291 size_t l;
292 int r;
293
294 assert(key);
295
296 if (!proc_cmdline_key_streq(key, "systemd.set_credential"))
297 return 0;
298
299 colon = value ? strchr(value, ':') : NULL;
300 if (!colon) {
301 log_warning("Credential assignment through kernel command line lacks ':' character, ignoring: %s", value);
302 return 0;
303 }
304
305 n = strndup(value, colon - value);
306 if (!n)
307 return log_oom();
308
309 if (!credential_name_valid(n)) {
310 log_warning("Credential name '%s' is invalid, ignoring.", n);
311 return 0;
312 }
313
314 colon++;
315 l = strlen(colon);
316
317 if (!credential_size_ok(c, n, l))
318 return 0;
319
320 r = acquire_credential_directory(c);
321 if (r < 0)
322 return r;
323
324 nfd = open_credential_file_for_write(c->target_dir_fd, SYSTEM_CREDENTIALS_DIRECTORY, n);
325 if (nfd == -EEXIST)
326 return 0;
327 if (nfd < 0)
328 return r;
329
330 r = loop_write(nfd, colon, l, /* do_poll= */ false);
331 if (r < 0) {
332 (void) unlinkat(c->target_dir_fd, n, 0);
333 return log_error_errno(r, "Failed to write credential: %m");
334 }
335
336 c->size_sum += l;
337 c->n_credentials++;
338
339 log_debug("Successfully processed kernel command line credential '%s'.", n);
340
341 return 0;
342 }
343
import_credentials_proc_cmdline(ImportCredentialContext * c)344 static int import_credentials_proc_cmdline(ImportCredentialContext *c) {
345 int r;
346
347 assert(c);
348
349 r = proc_cmdline_parse(proc_cmdline_callback, c, 0);
350 if (r < 0)
351 return log_error_errno(r, "Failed to parse /proc/cmdline: %m");
352
353 return 0;
354 }
355
356 #define QEMU_FWCFG_PATH "/sys/firmware/qemu_fw_cfg/by_name/opt/io.systemd.credentials"
357
import_credentials_qemu(ImportCredentialContext * c)358 static int import_credentials_qemu(ImportCredentialContext *c) {
359 _cleanup_free_ DirectoryEntries *de = NULL;
360 _cleanup_close_ int source_dir_fd = -1;
361 int r;
362
363 assert(c);
364
365 source_dir_fd = open(QEMU_FWCFG_PATH, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
366 if (source_dir_fd < 0) {
367 if (errno == ENOENT) {
368 log_debug("No credentials passed via fw_cfg.");
369 return 0;
370 }
371
372 log_warning_errno(errno, "Failed to open '" QEMU_FWCFG_PATH "', ignoring: %m");
373 return 0;
374 }
375
376 r = readdir_all(source_dir_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
377 if (r < 0) {
378 log_warning_errno(r, "Failed to read '" QEMU_FWCFG_PATH "' contents, ignoring: %m");
379 return 0;
380 }
381
382 for (size_t i = 0; i < de->n_entries; i++) {
383 const struct dirent *d = de->entries[i];
384 _cleanup_close_ int vfd = -1, rfd = -1, nfd = -1;
385 _cleanup_free_ char *szs = NULL;
386 uint64_t sz;
387
388 if (!credential_name_valid(d->d_name)) {
389 log_warning("Credential '%s' has invalid name, ignoring.", d->d_name);
390 continue;
391 }
392
393 vfd = openat(source_dir_fd, d->d_name, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
394 if (vfd < 0) {
395 log_warning_errno(errno, "Failed to open '" QEMU_FWCFG_PATH "'/%s/, ignoring: %m", d->d_name);
396 continue;
397 }
398
399 r = read_virtual_file_at(vfd, "size", LINE_MAX, &szs, NULL);
400 if (r < 0) {
401 log_warning_errno(r, "Failed to read '" QEMU_FWCFG_PATH "'/%s/size, ignoring: %m", d->d_name);
402 continue;
403 }
404
405 r = safe_atou64(strstrip(szs), &sz);
406 if (r < 0) {
407 log_warning_errno(r, "Failed to parse size of credential '%s', ignoring: %s", d->d_name, szs);
408 continue;
409 }
410
411 if (!credential_size_ok(c, d->d_name, sz))
412 continue;
413
414 /* Ideally we'd just symlink the data here. Alas the kernel driver exports the raw file as
415 * having size zero, and we'd rather not have applications support such credential
416 * files. Let's hence copy the files to make them regular. */
417
418 rfd = openat(vfd, "raw", O_RDONLY|O_CLOEXEC);
419 if (rfd < 0) {
420 log_warning_errno(r, "Failed to open '" QEMU_FWCFG_PATH "'/%s/raw, ignoring: %m", d->d_name);
421 continue;
422 }
423
424 r = acquire_credential_directory(c);
425 if (r < 0)
426 return r;
427
428 nfd = open_credential_file_for_write(c->target_dir_fd, SYSTEM_CREDENTIALS_DIRECTORY, d->d_name);
429 if (nfd == -EEXIST)
430 continue;
431 if (nfd < 0)
432 return r;
433
434 r = copy_bytes(rfd, nfd, sz, 0);
435 if (r < 0) {
436 (void) unlinkat(c->target_dir_fd, d->d_name, 0);
437 return log_error_errno(r, "Failed to create credential '%s': %m", d->d_name);
438 }
439
440 c->size_sum += sz;
441 c->n_credentials++;
442
443 log_debug("Successfully copied qemu fw_cfg credential '%s'.", d->d_name);
444 }
445
446 return 0;
447 }
448
import_credentials_trusted(void)449 static int import_credentials_trusted(void) {
450 _cleanup_(import_credentials_context_free) ImportCredentialContext c = {
451 .target_dir_fd = -1,
452 };
453 int q, r;
454
455 r = import_credentials_qemu(&c);
456 q = import_credentials_proc_cmdline(&c);
457
458 if (c.n_credentials > 0) {
459 int z;
460
461 log_debug("Imported %u credentials from kernel command line/fw_cfg.", c.n_credentials);
462
463 z = finalize_credentials_dir(SYSTEM_CREDENTIALS_DIRECTORY, "CREDENTIALS_DIRECTORY");
464 if (z < 0)
465 return z;
466 }
467
468 return r < 0 ? r : q;
469 }
470
symlink_credential_dir(const char * envvar,const char * path,const char * where)471 static int symlink_credential_dir(const char *envvar, const char *path, const char *where) {
472 int r;
473
474 assert(envvar);
475 assert(path);
476 assert(where);
477
478 if (!path_is_valid(path) || !path_is_absolute(path))
479 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "String specified via $%s is not a valid absolute path, refusing: %s", envvar, path);
480
481 /* If the env var already points to where we intend to create the symlink, then most likely we
482 * already imported some creds earlier, and thus set the env var, and hence don't need to do
483 * anything. */
484 if (path_equal(path, where))
485 return 0;
486
487 r = symlink_idempotent(path, where, /* make_relative= */ true);
488 if (r < 0)
489 return log_error_errno(r, "Failed to link $%s to %s: %m", envvar, where);
490
491 return 0;
492 }
493
import_credentials(void)494 int import_credentials(void) {
495 const char *received_creds_dir = NULL, *received_encrypted_creds_dir = NULL;
496 bool envvar_set = false;
497 int r, q;
498
499 r = get_credentials_dir(&received_creds_dir);
500 if (r < 0 && r != -ENXIO) /* ENXIO → env var not set yet */
501 log_warning_errno(r, "Failed to determine credentials directory, ignoring: %m");
502
503 envvar_set = r >= 0;
504
505 r = get_encrypted_credentials_dir(&received_encrypted_creds_dir);
506 if (r < 0 && r != -ENXIO) /* ENXIO → env var not set yet */
507 log_warning_errno(r, "Failed to determine encrypted credentials directory, ignoring: %m");
508
509 envvar_set = envvar_set || r >= 0;
510
511 if (envvar_set) {
512 /* Maybe an earlier stage initrd already set this up? If so, don't try to import anything again. */
513 log_debug("Not importing credentials, $CREDENTIALS_DIRECTORY or $ENCRYPTED_CREDENTIALS_DIRECTORY already set.");
514
515 /* But, let's make sure the creds are available from our regular paths. */
516 if (received_creds_dir)
517 r = symlink_credential_dir("CREDENTIALS_DIRECTORY", received_creds_dir, SYSTEM_CREDENTIALS_DIRECTORY);
518 else
519 r = 0;
520
521 if (received_encrypted_creds_dir) {
522 q = symlink_credential_dir("ENCRYPTED_CREDENTIALS_DIRECTORY", received_encrypted_creds_dir, ENCRYPTED_SYSTEM_CREDENTIALS_DIRECTORY);
523 if (r >= 0)
524 r = q;
525 }
526
527 } else {
528 _cleanup_free_ char *v = NULL;
529
530 r = proc_cmdline_get_key("systemd.import_credentials", PROC_CMDLINE_STRIP_RD_PREFIX, &v);
531 if (r < 0)
532 log_debug_errno(r, "Failed to check if 'systemd.import_credentials=' kernel command line option is set, ignoring: %m");
533 else if (r > 0) {
534 r = parse_boolean(v);
535 if (r < 0)
536 log_debug_errno(r, "Failed to parse 'systemd.import_credentials=' parameter, ignoring: %m");
537 else if (r == 0) {
538 log_notice("systemd.import_credentials=no is set, skipping importing of credentials.");
539 return 0;
540 }
541 }
542
543 r = import_credentials_boot();
544
545 q = import_credentials_trusted();
546 if (r >= 0)
547 r = q;
548 }
549
550 return r;
551 }
552