1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <linux/loop.h>
4 #include <poll.h>
5 #include <sys/file.h>
6 #include <sys/ioctl.h>
7 #include <sys/mount.h>
8 #include <sys/xattr.h>
9
10 #if HAVE_VALGRIND_MEMCHECK_H
11 #include <valgrind/memcheck.h>
12 #endif
13
14 #include "sd-daemon.h"
15 #include "sd-device.h"
16 #include "sd-event.h"
17
18 #include "blkid-util.h"
19 #include "blockdev-util.h"
20 #include "btrfs-util.h"
21 #include "chattr-util.h"
22 #include "devnum-util.h"
23 #include "dm-util.h"
24 #include "env-util.h"
25 #include "errno-util.h"
26 #include "fd-util.h"
27 #include "fdisk-util.h"
28 #include "fileio.h"
29 #include "filesystems.h"
30 #include "fs-util.h"
31 #include "fsck-util.h"
32 #include "gpt.h"
33 #include "home-util.h"
34 #include "homework-luks.h"
35 #include "homework-mount.h"
36 #include "id128-util.h"
37 #include "io-util.h"
38 #include "keyring-util.h"
39 #include "memory-util.h"
40 #include "missing_magic.h"
41 #include "mkdir.h"
42 #include "mkfs-util.h"
43 #include "mount-util.h"
44 #include "openssl-util.h"
45 #include "parse-util.h"
46 #include "path-util.h"
47 #include "process-util.h"
48 #include "random-util.h"
49 #include "resize-fs.h"
50 #include "strv.h"
51 #include "sync-util.h"
52 #include "tmpfile-util.h"
53 #include "udev-util.h"
54 #include "user-util.h"
55
56 /* Round down to the nearest 4K size. Given that newer hardware generally prefers 4K sectors, let's align our
57 * partitions to that too. In the worst case we'll waste 3.5K per partition that way, but I think I can live
58 * with that. */
59 #define DISK_SIZE_ROUND_DOWN(x) ((x) & ~UINT64_C(4095))
60
61 /* Rounds up to the nearest 4K boundary. Returns UINT64_MAX on overflow */
62 #define DISK_SIZE_ROUND_UP(x) \
63 ({ \
64 uint64_t _x = (x); \
65 _x > UINT64_MAX - 4095U ? UINT64_MAX : (_x + 4095U) & ~UINT64_C(4095); \
66 })
67
68 /* How much larger will the image on disk be than the fs inside it, i.e. the space we pay for the GPT and
69 * LUKS2 envelope. (As measured on cryptsetup 2.4.1) */
70 #define GPT_LUKS2_OVERHEAD UINT64_C(18874368)
71
72 static int resize_image_loop(UserRecord *h, HomeSetup *setup, uint64_t old_image_size, uint64_t new_image_size, uint64_t *ret_image_size);
73
run_mark_dirty(int fd,bool b)74 int run_mark_dirty(int fd, bool b) {
75 char x = '1';
76 int r, ret;
77
78 /* Sets or removes the 'user.home-dirty' xattr on the specified file. We use this to detect when a
79 * home directory was not properly unmounted. */
80
81 assert(fd >= 0);
82
83 r = fd_verify_regular(fd);
84 if (r < 0)
85 return r;
86
87 if (b) {
88 ret = fsetxattr(fd, "user.home-dirty", &x, 1, XATTR_CREATE);
89 if (ret < 0 && errno != EEXIST)
90 return log_debug_errno(errno, "Could not mark home directory as dirty: %m");
91
92 } else {
93 r = fsync_full(fd);
94 if (r < 0)
95 return log_debug_errno(r, "Failed to synchronize image before marking it clean: %m");
96
97 ret = fremovexattr(fd, "user.home-dirty");
98 if (ret < 0 && errno != ENODATA)
99 return log_debug_errno(errno, "Could not mark home directory as clean: %m");
100 }
101
102 r = fsync_full(fd);
103 if (r < 0)
104 return log_debug_errno(r, "Failed to synchronize dirty flag to disk: %m");
105
106 return ret >= 0;
107 }
108
run_mark_dirty_by_path(const char * path,bool b)109 int run_mark_dirty_by_path(const char *path, bool b) {
110 _cleanup_close_ int fd = -1;
111
112 assert(path);
113
114 fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY);
115 if (fd < 0)
116 return log_debug_errno(errno, "Failed to open %s to mark dirty or clean: %m", path);
117
118 return run_mark_dirty(fd, b);
119 }
120
probe_file_system_by_fd(int fd,char ** ret_fstype,sd_id128_t * ret_uuid)121 static int probe_file_system_by_fd(
122 int fd,
123 char **ret_fstype,
124 sd_id128_t *ret_uuid) {
125
126 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
127 _cleanup_free_ char *s = NULL;
128 const char *fstype = NULL, *uuid = NULL;
129 sd_id128_t id;
130 int r;
131
132 assert(fd >= 0);
133 assert(ret_fstype);
134 assert(ret_uuid);
135
136 b = blkid_new_probe();
137 if (!b)
138 return -ENOMEM;
139
140 errno = 0;
141 r = blkid_probe_set_device(b, fd, 0, 0);
142 if (r != 0)
143 return errno > 0 ? -errno : -ENOMEM;
144
145 (void) blkid_probe_enable_superblocks(b, 1);
146 (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_UUID);
147
148 errno = 0;
149 r = blkid_do_safeprobe(b);
150 if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */
151 return -ENOPKG;
152 if (r != 0)
153 return errno > 0 ? -errno : -EIO;
154
155 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
156 if (!fstype)
157 return -ENOPKG;
158
159 (void) blkid_probe_lookup_value(b, "UUID", &uuid, NULL);
160 if (!uuid)
161 return -ENOPKG;
162
163 r = sd_id128_from_string(uuid, &id);
164 if (r < 0)
165 return r;
166
167 s = strdup(fstype);
168 if (!s)
169 return -ENOMEM;
170
171 *ret_fstype = TAKE_PTR(s);
172 *ret_uuid = id;
173
174 return 0;
175 }
176
probe_file_system_by_path(const char * path,char ** ret_fstype,sd_id128_t * ret_uuid)177 static int probe_file_system_by_path(const char *path, char **ret_fstype, sd_id128_t *ret_uuid) {
178 _cleanup_close_ int fd = -1;
179
180 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
181 if (fd < 0)
182 return -errno;
183
184 return probe_file_system_by_fd(fd, ret_fstype, ret_uuid);
185 }
186
block_get_size_by_fd(int fd,uint64_t * ret)187 static int block_get_size_by_fd(int fd, uint64_t *ret) {
188 struct stat st;
189
190 assert(fd >= 0);
191 assert(ret);
192
193 if (fstat(fd, &st) < 0)
194 return -errno;
195
196 if (!S_ISBLK(st.st_mode))
197 return -ENOTBLK;
198
199 return RET_NERRNO(ioctl(fd, BLKGETSIZE64, ret));
200 }
201
block_get_size_by_path(const char * path,uint64_t * ret)202 static int block_get_size_by_path(const char *path, uint64_t *ret) {
203 _cleanup_close_ int fd = -1;
204
205 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
206 if (fd < 0)
207 return -errno;
208
209 return block_get_size_by_fd(fd, ret);
210 }
211
run_fsck(const char * node,const char * fstype)212 static int run_fsck(const char *node, const char *fstype) {
213 int r, exit_status;
214 pid_t fsck_pid;
215
216 assert(node);
217 assert(fstype);
218
219 r = fsck_exists(fstype);
220 if (r < 0)
221 return log_error_errno(r, "Failed to check if fsck for file system %s exists: %m", fstype);
222 if (r == 0) {
223 log_warning("No fsck for file system %s installed, ignoring.", fstype);
224 return 0;
225 }
226
227 r = safe_fork("(fsck)",
228 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
229 &fsck_pid);
230 if (r < 0)
231 return r;
232 if (r == 0) {
233 /* Child */
234 execl("/sbin/fsck", "/sbin/fsck", "-aTl", node, NULL);
235 log_open();
236 log_error_errno(errno, "Failed to execute fsck: %m");
237 _exit(FSCK_OPERATIONAL_ERROR);
238 }
239
240 exit_status = wait_for_terminate_and_check("fsck", fsck_pid, WAIT_LOG_ABNORMAL);
241 if (exit_status < 0)
242 return exit_status;
243 if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
244 log_warning("fsck failed with exit status %i.", exit_status);
245
246 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
247 return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
248
249 log_warning("Ignoring fsck error.");
250 }
251
252 log_info("File system check completed.");
253
254 return 1;
255 }
256
257 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(key_serial_t, keyring_unlink, -1);
258
upload_to_keyring(UserRecord * h,const char * password,key_serial_t * ret_key_serial)259 static int upload_to_keyring(
260 UserRecord *h,
261 const char *password,
262 key_serial_t *ret_key_serial) {
263
264 _cleanup_free_ char *name = NULL;
265 key_serial_t serial;
266
267 assert(h);
268 assert(password);
269
270 /* If auto-shrink-on-logout is turned on, we need to keep the key we used to unlock the LUKS volume
271 * around, since we'll need it when automatically resizing (since we can't ask the user there
272 * again). We do this by uploading it into the kernel keyring, specifically the "session" one. This
273 * is done under the assumption systemd-homed gets its private per-session keyring (i.e. default
274 * service behaviour, given that KeyringMode=private is the default). It will survive between our
275 * systemd-homework invocations that way.
276 *
277 * If auto-shrink-on-logout is disabled we'll skip this step, to be frugal with sensitive data. */
278
279 if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW) { /* Won't need it */
280 if (ret_key_serial)
281 *ret_key_serial = -1;
282 return 0;
283 }
284
285 name = strjoin("homework-user-", h->user_name);
286 if (!name)
287 return -ENOMEM;
288
289 serial = add_key("user", name, password, strlen(password), KEY_SPEC_SESSION_KEYRING);
290 if (serial == -1)
291 return -errno;
292
293 if (ret_key_serial)
294 *ret_key_serial = serial;
295
296 return 1;
297 }
298
luks_try_passwords(UserRecord * h,struct crypt_device * cd,char ** passwords,void * volume_key,size_t * volume_key_size,key_serial_t * ret_key_serial)299 static int luks_try_passwords(
300 UserRecord *h,
301 struct crypt_device *cd,
302 char **passwords,
303 void *volume_key,
304 size_t *volume_key_size,
305 key_serial_t *ret_key_serial) {
306
307 int r;
308
309 assert(h);
310 assert(cd);
311
312 STRV_FOREACH(pp, passwords) {
313 size_t vks = *volume_key_size;
314
315 r = sym_crypt_volume_key_get(
316 cd,
317 CRYPT_ANY_SLOT,
318 volume_key,
319 &vks,
320 *pp,
321 strlen(*pp));
322 if (r >= 0) {
323 if (ret_key_serial) {
324 /* If ret_key_serial is non-NULL, let's try to upload the password that
325 * worked, and return its serial. */
326 r = upload_to_keyring(h, *pp, ret_key_serial);
327 if (r < 0) {
328 log_debug_errno(r, "Failed to upload LUKS password to kernel keyring, ignoring: %m");
329 *ret_key_serial = -1;
330 }
331 }
332
333 *volume_key_size = vks;
334 return 0;
335 }
336
337 log_debug_errno(r, "Password %zu didn't work for unlocking LUKS superblock: %m", (size_t) (pp - passwords));
338 }
339
340 return -ENOKEY;
341 }
342
luks_setup(UserRecord * h,const char * node,const char * dm_name,sd_id128_t uuid,const char * cipher,const char * cipher_mode,uint64_t volume_key_size,char ** passwords,const PasswordCache * cache,bool discard,struct crypt_device ** ret,sd_id128_t * ret_found_uuid,void ** ret_volume_key,size_t * ret_volume_key_size,key_serial_t * ret_key_serial)343 static int luks_setup(
344 UserRecord *h,
345 const char *node,
346 const char *dm_name,
347 sd_id128_t uuid,
348 const char *cipher,
349 const char *cipher_mode,
350 uint64_t volume_key_size,
351 char **passwords,
352 const PasswordCache *cache,
353 bool discard,
354 struct crypt_device **ret,
355 sd_id128_t *ret_found_uuid,
356 void **ret_volume_key,
357 size_t *ret_volume_key_size,
358 key_serial_t *ret_key_serial) {
359
360 _cleanup_(keyring_unlinkp) key_serial_t key_serial = -1;
361 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
362 _cleanup_(erase_and_freep) void *vk = NULL;
363 sd_id128_t p;
364 size_t vks;
365 char **list;
366 int r;
367
368 assert(h);
369 assert(node);
370 assert(dm_name);
371 assert(ret);
372
373 r = sym_crypt_init(&cd, node);
374 if (r < 0)
375 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
376
377 cryptsetup_enable_logging(cd);
378
379 r = sym_crypt_load(cd, CRYPT_LUKS2, NULL);
380 if (r < 0)
381 return log_error_errno(r, "Failed to load LUKS superblock: %m");
382
383 r = sym_crypt_get_volume_key_size(cd);
384 if (r <= 0)
385 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
386 vks = (size_t) r;
387
388 if (!sd_id128_is_null(uuid) || ret_found_uuid) {
389 const char *s;
390
391 s = sym_crypt_get_uuid(cd);
392 if (!s)
393 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
394
395 r = sd_id128_from_string(s, &p);
396 if (r < 0)
397 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
398
399 /* Check that the UUID matches, if specified */
400 if (!sd_id128_is_null(uuid) &&
401 !sd_id128_equal(uuid, p))
402 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has wrong UUID.");
403 }
404
405 if (cipher && !streq_ptr(cipher, sym_crypt_get_cipher(cd)))
406 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher.");
407
408 if (cipher_mode && !streq_ptr(cipher_mode, sym_crypt_get_cipher_mode(cd)))
409 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher mode.");
410
411 if (volume_key_size != UINT64_MAX && vks != volume_key_size)
412 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong volume key size.");
413
414 vk = malloc(vks);
415 if (!vk)
416 return log_oom();
417
418 r = -ENOKEY;
419 FOREACH_POINTER(list,
420 cache ? cache->keyring_passswords : NULL,
421 cache ? cache->pkcs11_passwords : NULL,
422 cache ? cache->fido2_passwords : NULL,
423 passwords) {
424 r = luks_try_passwords(h, cd, list, vk, &vks, ret_key_serial ? &key_serial : NULL);
425 if (r != -ENOKEY)
426 break;
427 }
428 if (r == -ENOKEY)
429 return log_error_errno(r, "No valid password for LUKS superblock.");
430 if (r < 0)
431 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
432
433 r = sym_crypt_activate_by_volume_key(
434 cd,
435 dm_name,
436 vk, vks,
437 discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
438 if (r < 0)
439 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
440
441 log_info("Setting up LUKS device /dev/mapper/%s completed.", dm_name);
442
443 *ret = TAKE_PTR(cd);
444
445 if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
446 *ret_found_uuid = p;
447 if (ret_volume_key)
448 *ret_volume_key = TAKE_PTR(vk);
449 if (ret_volume_key_size)
450 *ret_volume_key_size = vks;
451 if (ret_key_serial)
452 *ret_key_serial = TAKE_KEY_SERIAL(key_serial);
453
454 return 0;
455 }
456
make_dm_names(UserRecord * h,HomeSetup * setup)457 static int make_dm_names(UserRecord *h, HomeSetup *setup) {
458 assert(h);
459 assert(h->user_name);
460 assert(setup);
461
462 if (!setup->dm_name) {
463 setup->dm_name = strjoin("home-", h->user_name);
464 if (!setup->dm_name)
465 return log_oom();
466 }
467
468 if (!setup->dm_node) {
469 setup->dm_node = path_join("/dev/mapper/", setup->dm_name);
470 if (!setup->dm_node)
471 return log_oom();
472 }
473
474 return 0;
475 }
476
acquire_open_luks_device(UserRecord * h,HomeSetup * setup,bool graceful)477 static int acquire_open_luks_device(
478 UserRecord *h,
479 HomeSetup *setup,
480 bool graceful) {
481
482 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
483 int r;
484
485 assert(h);
486 assert(setup);
487 assert(!setup->crypt_device);
488
489 r = dlopen_cryptsetup();
490 if (r < 0)
491 return r;
492
493 r = make_dm_names(h, setup);
494 if (r < 0)
495 return r;
496
497 r = sym_crypt_init_by_name(&cd, setup->dm_name);
498 if ((ERRNO_IS_DEVICE_ABSENT(r) || r == -EINVAL) && graceful)
499 return 0;
500 if (r < 0)
501 return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
502
503 cryptsetup_enable_logging(cd);
504
505 setup->crypt_device = TAKE_PTR(cd);
506 return 1;
507 }
508
luks_open(UserRecord * h,HomeSetup * setup,const PasswordCache * cache,sd_id128_t * ret_found_uuid,void ** ret_volume_key,size_t * ret_volume_key_size)509 static int luks_open(
510 UserRecord *h,
511 HomeSetup *setup,
512 const PasswordCache *cache,
513 sd_id128_t *ret_found_uuid,
514 void **ret_volume_key,
515 size_t *ret_volume_key_size) {
516
517 _cleanup_(erase_and_freep) void *vk = NULL;
518 sd_id128_t p;
519 char **list;
520 size_t vks;
521 int r;
522
523 assert(h);
524 assert(setup);
525 assert(!setup->crypt_device);
526
527 /* Opens a LUKS device that is already set up. Re-validates the password while doing so (which also
528 * provides us with the volume key, which we want). */
529
530 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
531 if (r < 0)
532 return r;
533
534 r = sym_crypt_load(setup->crypt_device, CRYPT_LUKS2, NULL);
535 if (r < 0)
536 return log_error_errno(r, "Failed to load LUKS superblock: %m");
537
538 r = sym_crypt_get_volume_key_size(setup->crypt_device);
539 if (r <= 0)
540 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
541 vks = (size_t) r;
542
543 if (ret_found_uuid) {
544 const char *s;
545
546 s = sym_crypt_get_uuid(setup->crypt_device);
547 if (!s)
548 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
549
550 r = sd_id128_from_string(s, &p);
551 if (r < 0)
552 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
553 }
554
555 vk = malloc(vks);
556 if (!vk)
557 return log_oom();
558
559 r = -ENOKEY;
560 FOREACH_POINTER(list,
561 cache ? cache->keyring_passswords : NULL,
562 cache ? cache->pkcs11_passwords : NULL,
563 cache ? cache->fido2_passwords : NULL,
564 h->password) {
565 r = luks_try_passwords(h, setup->crypt_device, list, vk, &vks, NULL);
566 if (r != -ENOKEY)
567 break;
568 }
569 if (r == -ENOKEY)
570 return log_error_errno(r, "No valid password for LUKS superblock.");
571 if (r < 0)
572 return log_error_errno(r, "Failed to unlocks LUKS superblock: %m");
573
574 log_info("Discovered used LUKS device /dev/mapper/%s, and validated password.", setup->dm_name);
575
576 /* This is needed so that crypt_resize() can operate correctly for pre-existing LUKS devices. We need
577 * to tell libcryptsetup the volume key explicitly, so that it is in the kernel keyring. */
578 r = sym_crypt_activate_by_volume_key(setup->crypt_device, NULL, vk, vks, CRYPT_ACTIVATE_KEYRING_KEY);
579 if (r < 0)
580 return log_error_errno(r, "Failed to upload volume key again: %m");
581
582 log_info("Successfully re-activated LUKS device.");
583
584 if (ret_found_uuid)
585 *ret_found_uuid = p;
586 if (ret_volume_key)
587 *ret_volume_key = TAKE_PTR(vk);
588 if (ret_volume_key_size)
589 *ret_volume_key_size = vks;
590
591 return 0;
592 }
593
fs_validate(const char * dm_node,sd_id128_t uuid,char ** ret_fstype,sd_id128_t * ret_found_uuid)594 static int fs_validate(
595 const char *dm_node,
596 sd_id128_t uuid,
597 char **ret_fstype,
598 sd_id128_t *ret_found_uuid) {
599
600 _cleanup_free_ char *fstype = NULL;
601 sd_id128_t u;
602 int r;
603
604 assert(dm_node);
605 assert(ret_fstype);
606
607 r = probe_file_system_by_path(dm_node, &fstype, &u);
608 if (r < 0)
609 return log_error_errno(r, "Failed to probe file system: %m");
610
611 /* Limit the set of supported file systems a bit, as protection against little tested kernel file
612 * systems. Also, we only support the resize ioctls for these file systems. */
613 if (!supported_fstype(fstype))
614 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Image contains unsupported file system: %s", strna(fstype));
615
616 if (!sd_id128_is_null(uuid) &&
617 !sd_id128_equal(uuid, u))
618 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "File system has wrong UUID.");
619
620 log_info("Probing file system completed (found %s).", fstype);
621
622 *ret_fstype = TAKE_PTR(fstype);
623
624 if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
625 *ret_found_uuid = u;
626
627 return 0;
628 }
629
luks_validate(int fd,const char * label,sd_id128_t partition_uuid,sd_id128_t * ret_partition_uuid,uint64_t * ret_offset,uint64_t * ret_size)630 static int luks_validate(
631 int fd,
632 const char *label,
633 sd_id128_t partition_uuid,
634 sd_id128_t *ret_partition_uuid,
635 uint64_t *ret_offset,
636 uint64_t *ret_size) {
637
638 _cleanup_(blkid_free_probep) blkid_probe b = NULL;
639 sd_id128_t found_partition_uuid = SD_ID128_NULL;
640 const char *fstype = NULL, *pttype = NULL;
641 blkid_loff_t offset = 0, size = 0;
642 blkid_partlist pl;
643 bool found = false;
644 int r, n;
645
646 assert(fd >= 0);
647 assert(label);
648 assert(ret_offset);
649 assert(ret_size);
650
651 b = blkid_new_probe();
652 if (!b)
653 return -ENOMEM;
654
655 errno = 0;
656 r = blkid_probe_set_device(b, fd, 0, 0);
657 if (r != 0)
658 return errno > 0 ? -errno : -ENOMEM;
659
660 (void) blkid_probe_enable_superblocks(b, 1);
661 (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
662 (void) blkid_probe_enable_partitions(b, 1);
663 (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
664
665 errno = 0;
666 r = blkid_do_safeprobe(b);
667 if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */
668 return -ENOPKG;
669 if (r != 0)
670 return errno > 0 ? -errno : -EIO;
671
672 (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
673 if (streq_ptr(fstype, "crypto_LUKS")) {
674 /* Directly a LUKS image */
675 *ret_offset = 0;
676 *ret_size = UINT64_MAX; /* full disk */
677 *ret_partition_uuid = SD_ID128_NULL;
678 return 0;
679 } else if (fstype)
680 return -ENOPKG;
681
682 (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
683 if (!streq_ptr(pttype, "gpt"))
684 return -ENOPKG;
685
686 errno = 0;
687 pl = blkid_probe_get_partitions(b);
688 if (!pl)
689 return errno > 0 ? -errno : -ENOMEM;
690
691 errno = 0;
692 n = blkid_partlist_numof_partitions(pl);
693 if (n < 0)
694 return errno > 0 ? -errno : -EIO;
695
696 for (int i = 0; i < n; i++) {
697 blkid_partition pp;
698 sd_id128_t id = SD_ID128_NULL;
699 const char *sid;
700
701 errno = 0;
702 pp = blkid_partlist_get_partition(pl, i);
703 if (!pp)
704 return errno > 0 ? -errno : -EIO;
705
706 if (id128_equal_string(blkid_partition_get_type_string(pp), GPT_USER_HOME) <= 0)
707 continue;
708
709 if (!streq_ptr(blkid_partition_get_name(pp), label))
710 continue;
711
712 sid = blkid_partition_get_uuid(pp);
713 if (sid) {
714 r = sd_id128_from_string(sid, &id);
715 if (r < 0)
716 log_debug_errno(r, "Couldn't parse partition UUID %s, weird: %m", sid);
717
718 if (!sd_id128_is_null(partition_uuid) && !sd_id128_equal(id, partition_uuid))
719 continue;
720 }
721
722 if (found)
723 return -ENOPKG;
724
725 offset = blkid_partition_get_start(pp);
726 size = blkid_partition_get_size(pp);
727 found_partition_uuid = id;
728
729 found = true;
730 }
731
732 if (!found)
733 return -ENOPKG;
734
735 if (offset < 0)
736 return -EINVAL;
737 if ((uint64_t) offset > UINT64_MAX / 512U)
738 return -EINVAL;
739 if (size <= 0)
740 return -EINVAL;
741 if ((uint64_t) size > UINT64_MAX / 512U)
742 return -EINVAL;
743
744 *ret_offset = offset * 512U;
745 *ret_size = size * 512U;
746 *ret_partition_uuid = found_partition_uuid;
747
748 return 0;
749 }
750
crypt_device_to_evp_cipher(struct crypt_device * cd,const EVP_CIPHER ** ret)751 static int crypt_device_to_evp_cipher(struct crypt_device *cd, const EVP_CIPHER **ret) {
752 _cleanup_free_ char *cipher_name = NULL;
753 const char *cipher, *cipher_mode, *e;
754 size_t key_size, key_bits;
755 const EVP_CIPHER *cc;
756 int r;
757
758 assert(cd);
759
760 /* Let's find the right OpenSSL EVP_CIPHER object that matches the encryption settings of the LUKS
761 * device */
762
763 cipher = sym_crypt_get_cipher(cd);
764 if (!cipher)
765 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher from LUKS device.");
766
767 cipher_mode = sym_crypt_get_cipher_mode(cd);
768 if (!cipher_mode)
769 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher mode from LUKS device.");
770
771 e = strchr(cipher_mode, '-');
772 if (e)
773 cipher_mode = strndupa_safe(cipher_mode, e - cipher_mode);
774
775 r = sym_crypt_get_volume_key_size(cd);
776 if (r <= 0)
777 return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Cannot get volume key size from LUKS device.");
778
779 key_size = r;
780 key_bits = key_size * 8;
781 if (streq(cipher_mode, "xts"))
782 key_bits /= 2;
783
784 if (asprintf(&cipher_name, "%s-%zu-%s", cipher, key_bits, cipher_mode) < 0)
785 return log_oom();
786
787 cc = EVP_get_cipherbyname(cipher_name);
788 if (!cc)
789 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Selected cipher mode '%s' not supported, can't encrypt JSON record.", cipher_name);
790
791 /* Verify that our key length calculations match what OpenSSL thinks */
792 r = EVP_CIPHER_key_length(cc);
793 if (r < 0 || (uint64_t) r != key_size)
794 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Key size of selected cipher doesn't meet our expectations.");
795
796 *ret = cc;
797 return 0;
798 }
799
luks_validate_home_record(struct crypt_device * cd,UserRecord * h,const void * volume_key,PasswordCache * cache,UserRecord ** ret_luks_home_record)800 static int luks_validate_home_record(
801 struct crypt_device *cd,
802 UserRecord *h,
803 const void *volume_key,
804 PasswordCache *cache,
805 UserRecord **ret_luks_home_record) {
806
807 int r;
808
809 assert(cd);
810 assert(h);
811
812 for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
813 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *rr = NULL;
814 _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
815 _cleanup_(user_record_unrefp) UserRecord *lhr = NULL;
816 _cleanup_free_ void *encrypted = NULL, *iv = NULL;
817 size_t decrypted_size, encrypted_size, iv_size;
818 int decrypted_size_out1, decrypted_size_out2;
819 _cleanup_free_ char *decrypted = NULL;
820 const char *text, *type;
821 crypt_token_info state;
822 JsonVariant *jr, *jiv;
823 unsigned line, column;
824 const EVP_CIPHER *cc;
825
826 state = sym_crypt_token_status(cd, token, &type);
827 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, give up */
828 break;
829 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
830 continue;
831 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
832 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
833
834 if (!streq(type, "systemd-homed"))
835 continue;
836
837 r = sym_crypt_token_json_get(cd, token, &text);
838 if (r < 0)
839 return log_error_errno(r, "Failed to read LUKS token %i: %m", token);
840
841 r = json_parse(text, JSON_PARSE_SENSITIVE, &v, &line, &column);
842 if (r < 0)
843 return log_error_errno(r, "Failed to parse LUKS token JSON data %u:%u: %m", line, column);
844
845 jr = json_variant_by_key(v, "record");
846 if (!jr)
847 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'record' field.");
848 jiv = json_variant_by_key(v, "iv");
849 if (!jiv)
850 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'iv' field.");
851
852 r = json_variant_unbase64(jr, &encrypted, &encrypted_size);
853 if (r < 0)
854 return log_error_errno(r, "Failed to base64 decode record: %m");
855
856 r = json_variant_unbase64(jiv, &iv, &iv_size);
857 if (r < 0)
858 return log_error_errno(r, "Failed to base64 decode IV: %m");
859
860 r = crypt_device_to_evp_cipher(cd, &cc);
861 if (r < 0)
862 return r;
863 if (iv_size > INT_MAX || EVP_CIPHER_iv_length(cc) != (int) iv_size)
864 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "IV size doesn't match.");
865
866 context = EVP_CIPHER_CTX_new();
867 if (!context)
868 return log_oom();
869
870 if (EVP_DecryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
871 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize decryption context.");
872
873 decrypted_size = encrypted_size + EVP_CIPHER_key_length(cc) * 2;
874 decrypted = new(char, decrypted_size);
875 if (!decrypted)
876 return log_oom();
877
878 if (EVP_DecryptUpdate(context, (uint8_t*) decrypted, &decrypted_size_out1, encrypted, encrypted_size) != 1)
879 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decrypt JSON record.");
880
881 assert((size_t) decrypted_size_out1 <= decrypted_size);
882
883 if (EVP_DecryptFinal_ex(context, (uint8_t*) decrypted + decrypted_size_out1, &decrypted_size_out2) != 1)
884 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish decryption of JSON record.");
885
886 assert((size_t) decrypted_size_out1 + (size_t) decrypted_size_out2 < decrypted_size);
887 decrypted_size = (size_t) decrypted_size_out1 + (size_t) decrypted_size_out2;
888
889 if (memchr(decrypted, 0, decrypted_size))
890 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Inner NUL byte in JSON record, refusing.");
891
892 decrypted[decrypted_size] = 0;
893
894 r = json_parse(decrypted, JSON_PARSE_SENSITIVE, &rr, NULL, NULL);
895 if (r < 0)
896 return log_error_errno(r, "Failed to parse decrypted JSON record, refusing.");
897
898 lhr = user_record_new();
899 if (!lhr)
900 return log_oom();
901
902 r = user_record_load(lhr, rr, USER_RECORD_LOAD_EMBEDDED|USER_RECORD_PERMISSIVE);
903 if (r < 0)
904 return log_error_errno(r, "Failed to parse user record: %m");
905
906 if (!user_record_compatible(h, lhr))
907 return log_error_errno(SYNTHETIC_ERRNO(EREMCHG), "LUKS home record not compatible with host record, refusing.");
908
909 r = user_record_authenticate(lhr, h, cache, /* strict_verify= */ true);
910 if (r < 0)
911 return r;
912 assert(r > 0); /* Insist that a password was verified */
913
914 *ret_luks_home_record = TAKE_PTR(lhr);
915 return 0;
916 }
917
918 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Couldn't find home record in LUKS2 header, refusing.");
919 }
920
format_luks_token_text(struct crypt_device * cd,UserRecord * hr,const void * volume_key,char ** ret)921 static int format_luks_token_text(
922 struct crypt_device *cd,
923 UserRecord *hr,
924 const void *volume_key,
925 char **ret) {
926
927 int r, encrypted_size_out1 = 0, encrypted_size_out2 = 0, iv_size, key_size;
928 _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
929 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
930 _cleanup_free_ void *iv = NULL, *encrypted = NULL;
931 size_t text_length, encrypted_size;
932 _cleanup_free_ char *text = NULL;
933 const EVP_CIPHER *cc;
934
935 assert(cd);
936 assert(hr);
937 assert(volume_key);
938 assert(ret);
939
940 r = crypt_device_to_evp_cipher(cd, &cc);
941 if (r < 0)
942 return r;
943
944 key_size = EVP_CIPHER_key_length(cc);
945 iv_size = EVP_CIPHER_iv_length(cc);
946
947 if (iv_size > 0) {
948 iv = malloc(iv_size);
949 if (!iv)
950 return log_oom();
951
952 r = genuine_random_bytes(iv, iv_size, RANDOM_BLOCK);
953 if (r < 0)
954 return log_error_errno(r, "Failed to generate IV: %m");
955 }
956
957 context = EVP_CIPHER_CTX_new();
958 if (!context)
959 return log_oom();
960
961 if (EVP_EncryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
962 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize encryption context.");
963
964 r = json_variant_format(hr->json, 0, &text);
965 if (r < 0)
966 return log_error_errno(r, "Failed to format user record for LUKS: %m");
967
968 text_length = strlen(text);
969 encrypted_size = text_length + 2*key_size - 1;
970
971 encrypted = malloc(encrypted_size);
972 if (!encrypted)
973 return log_oom();
974
975 if (EVP_EncryptUpdate(context, encrypted, &encrypted_size_out1, (uint8_t*) text, text_length) != 1)
976 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to encrypt JSON record.");
977
978 assert((size_t) encrypted_size_out1 <= encrypted_size);
979
980 if (EVP_EncryptFinal_ex(context, (uint8_t*) encrypted + encrypted_size_out1, &encrypted_size_out2) != 1)
981 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish encryption of JSON record. ");
982
983 assert((size_t) encrypted_size_out1 + (size_t) encrypted_size_out2 <= encrypted_size);
984
985 r = json_build(&v,
986 JSON_BUILD_OBJECT(
987 JSON_BUILD_PAIR("type", JSON_BUILD_CONST_STRING("systemd-homed")),
988 JSON_BUILD_PAIR("keyslots", JSON_BUILD_EMPTY_ARRAY),
989 JSON_BUILD_PAIR("record", JSON_BUILD_BASE64(encrypted, encrypted_size_out1 + encrypted_size_out2)),
990 JSON_BUILD_PAIR("iv", JSON_BUILD_BASE64(iv, iv_size))));
991 if (r < 0)
992 return log_error_errno(r, "Failed to prepare LUKS JSON token object: %m");
993
994 r = json_variant_format(v, 0, ret);
995 if (r < 0)
996 return log_error_errno(r, "Failed to format encrypted user record for LUKS: %m");
997
998 return 0;
999 }
1000
home_store_header_identity_luks(UserRecord * h,HomeSetup * setup,UserRecord * old_home)1001 int home_store_header_identity_luks(
1002 UserRecord *h,
1003 HomeSetup *setup,
1004 UserRecord *old_home) {
1005
1006 _cleanup_(user_record_unrefp) UserRecord *header_home = NULL;
1007 _cleanup_free_ char *text = NULL;
1008 int r;
1009
1010 assert(h);
1011
1012 if (!setup->crypt_device)
1013 return 0;
1014
1015 assert(setup->volume_key);
1016
1017 /* Let's store the user's identity record in the LUKS2 "token" header data fields, in an encrypted
1018 * fashion. Why that? If we'd rely on the record being embedded in the payload file system itself we
1019 * would have to mount the file system before we can validate the JSON record, its signatures and
1020 * whether it matches what we are looking for. However, kernel file system implementations are
1021 * generally not ready to be used on untrusted media. Hence let's store the record independently of
1022 * the file system, so that we can validate it first, and only then mount the file system. To keep
1023 * things simple we use the same encryption settings for this record as for the file system itself. */
1024
1025 r = user_record_clone(h, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &header_home);
1026 if (r < 0)
1027 return log_error_errno(r, "Failed to determine new header record: %m");
1028
1029 if (old_home && user_record_equal(old_home, header_home)) {
1030 log_debug("Not updating header home record.");
1031 return 0;
1032 }
1033
1034 r = format_luks_token_text(setup->crypt_device, header_home, setup->volume_key, &text);
1035 if (r < 0)
1036 return r;
1037
1038 for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
1039 crypt_token_info state;
1040 const char *type;
1041
1042 state = sym_crypt_token_status(setup->crypt_device, token, &type);
1043 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, we are done */
1044 break;
1045 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
1046 continue; /* Not ours */
1047 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
1048 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
1049
1050 if (!streq(type, "systemd-homed"))
1051 continue;
1052
1053 r = sym_crypt_token_json_set(setup->crypt_device, token, text);
1054 if (r < 0)
1055 return log_error_errno(r, "Failed to set JSON token for slot %i: %m", token);
1056
1057 /* Now, let's free the text so that for all further matching tokens we all crypt_json_token_set()
1058 * with a NULL text in order to invalidate the tokens. */
1059 text = mfree(text);
1060 }
1061
1062 if (text)
1063 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Didn't find any record token to update.");
1064
1065 log_info("Wrote LUKS header user record.");
1066
1067 return 1;
1068 }
1069
run_fitrim(int root_fd)1070 int run_fitrim(int root_fd) {
1071 struct fstrim_range range = {
1072 .len = UINT64_MAX,
1073 };
1074
1075 /* If discarding is on, discard everything right after mounting, so that the discard setting takes
1076 * effect on activation. (Also, optionally, trim on logout) */
1077
1078 assert(root_fd >= 0);
1079
1080 if (ioctl(root_fd, FITRIM, &range) < 0) {
1081 if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EBADF) {
1082 log_debug_errno(errno, "File system does not support FITRIM, not trimming.");
1083 return 0;
1084 }
1085
1086 return log_warning_errno(errno, "Failed to invoke FITRIM, ignoring: %m");
1087 }
1088
1089 log_info("Discarded unused %s.", FORMAT_BYTES(range.len));
1090 return 1;
1091 }
1092
run_fallocate(int backing_fd,const struct stat * st)1093 int run_fallocate(int backing_fd, const struct stat *st) {
1094 struct stat stbuf;
1095
1096 assert(backing_fd >= 0);
1097
1098 /* If discarding is off, let's allocate the whole image before mounting, so that the setting takes
1099 * effect on activation */
1100
1101 if (!st) {
1102 if (fstat(backing_fd, &stbuf) < 0)
1103 return log_error_errno(errno, "Failed to fstat(): %m");
1104
1105 st = &stbuf;
1106 }
1107
1108 if (!S_ISREG(st->st_mode))
1109 return 0;
1110
1111 if (st->st_blocks >= DIV_ROUND_UP(st->st_size, 512)) {
1112 log_info("Backing file is fully allocated already.");
1113 return 0;
1114 }
1115
1116 if (fallocate(backing_fd, FALLOC_FL_KEEP_SIZE, 0, st->st_size) < 0) {
1117
1118 if (ERRNO_IS_NOT_SUPPORTED(errno)) {
1119 log_debug_errno(errno, "fallocate() not supported on file system, ignoring.");
1120 return 0;
1121 }
1122
1123 if (ERRNO_IS_DISK_SPACE(errno)) {
1124 log_debug_errno(errno, "Not enough disk space to fully allocate home.");
1125 return -ENOSPC; /* make recognizable */
1126 }
1127
1128 return log_error_errno(errno, "Failed to allocate backing file blocks: %m");
1129 }
1130
1131 log_info("Allocated additional %s.",
1132 FORMAT_BYTES((DIV_ROUND_UP(st->st_size, 512) - st->st_blocks) * 512));
1133 return 1;
1134 }
1135
run_fallocate_by_path(const char * backing_path)1136 int run_fallocate_by_path(const char *backing_path) {
1137 _cleanup_close_ int backing_fd = -1;
1138
1139 backing_fd = open(backing_path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1140 if (backing_fd < 0)
1141 return log_error_errno(errno, "Failed to open '%s' for fallocate(): %m", backing_path);
1142
1143 return run_fallocate(backing_fd, NULL);
1144 }
1145
lock_image_fd(int image_fd,const char * ip)1146 static int lock_image_fd(int image_fd, const char *ip) {
1147 int r;
1148
1149 /* If the $SYSTEMD_LUKS_LOCK environment variable is set we'll take an exclusive BSD lock on the
1150 * image file, and send it to our parent. homed will keep it open to ensure no other instance of
1151 * homed (across the network or such) will also mount the file. */
1152
1153 assert(image_fd >= 0);
1154 assert(ip);
1155
1156 r = getenv_bool("SYSTEMD_LUKS_LOCK");
1157 if (r == -ENXIO)
1158 return 0;
1159 if (r < 0)
1160 return log_error_errno(r, "Failed to parse $SYSTEMD_LUKS_LOCK environment variable: %m");
1161 if (r == 0)
1162 return 0;
1163
1164 if (flock(image_fd, LOCK_EX|LOCK_NB) < 0) {
1165
1166 if (errno == EAGAIN)
1167 log_error_errno(errno, "Image file '%s' already locked, can't use.", ip);
1168 else
1169 log_error_errno(errno, "Failed to lock image file '%s': %m", ip);
1170
1171 return errno != EAGAIN ? -errno : -EADDRINUSE; /* Make error recognizable */
1172 }
1173
1174 log_info("Successfully locked image file '%s'.", ip);
1175
1176 /* Now send it to our parent to keep safe while the home dir is active */
1177 r = sd_pid_notify_with_fds(0, false, "SYSTEMD_LUKS_LOCK_FD=1", &image_fd, 1);
1178 if (r < 0)
1179 log_warning_errno(r, "Failed to send LUKS lock fd to parent, ignoring: %m");
1180
1181 return 0;
1182 }
1183
open_image_file(UserRecord * h,const char * force_image_path,struct stat * ret_stat)1184 static int open_image_file(
1185 UserRecord *h,
1186 const char *force_image_path,
1187 struct stat *ret_stat) {
1188
1189 _cleanup_close_ int image_fd = -1;
1190 struct stat st;
1191 const char *ip;
1192 int r;
1193
1194 assert(h || force_image_path);
1195
1196 ip = force_image_path ?: user_record_image_path(h);
1197
1198 image_fd = open(ip, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1199 if (image_fd < 0)
1200 return log_error_errno(errno, "Failed to open image file %s: %m", ip);
1201
1202 if (fstat(image_fd, &st) < 0)
1203 return log_error_errno(errno, "Failed to fstat() image file: %m");
1204 if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
1205 return log_error_errno(
1206 S_ISDIR(st.st_mode) ? SYNTHETIC_ERRNO(EISDIR) : SYNTHETIC_ERRNO(EBADFD),
1207 "Image file %s is not a regular file or block device: %m", ip);
1208
1209 /* Locking block devices doesn't really make sense, as this might interfere with
1210 * udev's workings, and these locks aren't network propagated anyway, hence not what
1211 * we are after here. */
1212 if (S_ISREG(st.st_mode)) {
1213 r = lock_image_fd(image_fd, ip);
1214 if (r < 0)
1215 return r;
1216 }
1217
1218 if (ret_stat)
1219 *ret_stat = st;
1220
1221 return TAKE_FD(image_fd);
1222 }
1223
home_setup_luks(UserRecord * h,HomeSetupFlags flags,const char * force_image_path,HomeSetup * setup,PasswordCache * cache,UserRecord ** ret_luks_home)1224 int home_setup_luks(
1225 UserRecord *h,
1226 HomeSetupFlags flags,
1227 const char *force_image_path,
1228 HomeSetup *setup,
1229 PasswordCache *cache,
1230 UserRecord **ret_luks_home) {
1231
1232 sd_id128_t found_partition_uuid = SD_ID128_NULL, found_luks_uuid = SD_ID128_NULL, found_fs_uuid = SD_ID128_NULL;
1233 _cleanup_(user_record_unrefp) UserRecord *luks_home = NULL;
1234 _cleanup_(erase_and_freep) void *volume_key = NULL;
1235 size_t volume_key_size = 0;
1236 uint64_t offset, size;
1237 struct stat st;
1238 int r;
1239
1240 assert(h);
1241 assert(setup);
1242 assert(user_record_storage(h) == USER_LUKS);
1243
1244 r = dlopen_cryptsetup();
1245 if (r < 0)
1246 return r;
1247
1248 r = make_dm_names(h, setup);
1249 if (r < 0)
1250 return r;
1251
1252 /* Reuse the image fd if it has already been opened by an earlier step */
1253 if (setup->image_fd < 0) {
1254 setup->image_fd = open_image_file(h, force_image_path, &st);
1255 if (setup->image_fd < 0)
1256 return setup->image_fd;
1257 } else if (fstat(setup->image_fd, &st) < 0)
1258 return log_error_errno(errno, "Failed to stat image: %m");
1259
1260 if (FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED)) {
1261 struct loop_info64 info;
1262 const char *n;
1263
1264 if (!setup->crypt_device) {
1265 r = luks_open(h,
1266 setup,
1267 cache,
1268 &found_luks_uuid,
1269 &volume_key,
1270 &volume_key_size);
1271 if (r < 0)
1272 return r;
1273 }
1274
1275 if (ret_luks_home) {
1276 r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1277 if (r < 0)
1278 return r;
1279 }
1280
1281 n = sym_crypt_get_device_name(setup->crypt_device);
1282 if (!n)
1283 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine backing device for DM %s.", setup->dm_name);
1284
1285 if (!setup->loop) {
1286 r = loop_device_open(n, O_RDWR, &setup->loop);
1287 if (r < 0)
1288 return log_error_errno(r, "Failed to open loopback device %s: %m", n);
1289 }
1290
1291 if (ioctl(setup->loop->fd, LOOP_GET_STATUS64, &info) < 0) {
1292 _cleanup_free_ char *sysfs = NULL;
1293
1294 if (!IN_SET(errno, ENOTTY, EINVAL))
1295 return log_error_errno(errno, "Failed to get block device metrics of %s: %m", n);
1296
1297 if (ioctl(setup->loop->fd, BLKGETSIZE64, &size) < 0)
1298 return log_error_errno(r, "Failed to read block device size of %s: %m", n);
1299
1300 if (fstat(setup->loop->fd, &st) < 0)
1301 return log_error_errno(r, "Failed to stat block device %s: %m", n);
1302 assert(S_ISBLK(st.st_mode));
1303
1304 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1305 return log_oom();
1306
1307 if (access(sysfs, F_OK) < 0) {
1308 if (errno != ENOENT)
1309 return log_error_errno(errno, "Failed to determine whether %s exists: %m", sysfs);
1310
1311 offset = 0;
1312 } else {
1313 _cleanup_free_ char *buffer = NULL;
1314
1315 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1316 return log_oom();
1317
1318 r = read_one_line_file(sysfs, &buffer);
1319 if (r < 0)
1320 return log_error_errno(r, "Failed to read partition start offset: %m");
1321
1322 r = safe_atou64(buffer, &offset);
1323 if (r < 0)
1324 return log_error_errno(r, "Failed to parse partition start offset: %m");
1325
1326 if (offset > UINT64_MAX / 512U)
1327 return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Offset too large for 64 byte range, refusing.");
1328
1329 offset *= 512U;
1330 }
1331 } else {
1332 #if HAVE_VALGRIND_MEMCHECK_H
1333 VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1334 #endif
1335
1336 offset = info.lo_offset;
1337 size = info.lo_sizelimit;
1338 }
1339
1340 found_partition_uuid = found_fs_uuid = SD_ID128_NULL;
1341
1342 log_info("Discovered used loopback device %s.", setup->loop->node);
1343
1344 if (setup->root_fd < 0) {
1345 setup->root_fd = open(user_record_home_directory(h), O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1346 if (setup->root_fd < 0)
1347 return log_error_errno(errno, "Failed to open home directory: %m");
1348 }
1349 } else {
1350 _cleanup_free_ char *fstype = NULL, *subdir = NULL;
1351 const char *ip;
1352
1353 /* When we aren't reopening the home directory we are allocating it fresh, hence the relevant
1354 * objects can't be allocated yet. */
1355 assert(setup->root_fd < 0);
1356 assert(!setup->crypt_device);
1357 assert(!setup->loop);
1358
1359 ip = force_image_path ?: user_record_image_path(h);
1360
1361 subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
1362 if (!subdir)
1363 return log_oom();
1364
1365 r = luks_validate(setup->image_fd, user_record_user_name_and_realm(h), h->partition_uuid, &found_partition_uuid, &offset, &size);
1366 if (r < 0)
1367 return log_error_errno(r, "Failed to validate disk label: %m");
1368
1369 /* Everything before this point left the image untouched. We are now starting to make
1370 * changes, hence mark the image dirty */
1371 if (run_mark_dirty(setup->image_fd, true) > 0)
1372 setup->do_mark_clean = true;
1373
1374 if (!user_record_luks_discard(h)) {
1375 r = run_fallocate(setup->image_fd, &st);
1376 if (r < 0)
1377 return r;
1378 }
1379
1380 r = loop_device_make(setup->image_fd, O_RDWR, offset, size, 0, &setup->loop);
1381 if (r == -ENOENT) {
1382 log_error_errno(r, "Loopback block device support is not available on this system.");
1383 return -ENOLINK; /* make recognizable */
1384 }
1385 if (r < 0)
1386 return log_error_errno(r, "Failed to allocate loopback context: %m");
1387
1388 log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
1389
1390 r = luks_setup(h,
1391 setup->loop->node ?: ip,
1392 setup->dm_name,
1393 h->luks_uuid,
1394 h->luks_cipher,
1395 h->luks_cipher_mode,
1396 h->luks_volume_key_size,
1397 h->password,
1398 cache,
1399 user_record_luks_discard(h) || user_record_luks_offline_discard(h),
1400 &setup->crypt_device,
1401 &found_luks_uuid,
1402 &volume_key,
1403 &volume_key_size,
1404 &setup->key_serial);
1405 if (r < 0)
1406 return r;
1407
1408 setup->undo_dm = true;
1409
1410 if (ret_luks_home) {
1411 r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1412 if (r < 0)
1413 return r;
1414 }
1415
1416 r = fs_validate(setup->dm_node, h->file_system_uuid, &fstype, &found_fs_uuid);
1417 if (r < 0)
1418 return r;
1419
1420 r = run_fsck(setup->dm_node, fstype);
1421 if (r < 0)
1422 return r;
1423
1424 r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
1425 if (r < 0)
1426 return r;
1427
1428 setup->undo_mount = true;
1429
1430 setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1431 if (setup->root_fd < 0)
1432 return log_error_errno(errno, "Failed to open home directory: %m");
1433
1434 if (user_record_luks_discard(h))
1435 (void) run_fitrim(setup->root_fd);
1436
1437 setup->do_offline_fallocate = !(setup->do_offline_fitrim = user_record_luks_offline_discard(h));
1438 }
1439
1440 if (!sd_id128_is_null(found_partition_uuid))
1441 setup->found_partition_uuid = found_partition_uuid;
1442 if (!sd_id128_is_null(found_luks_uuid))
1443 setup->found_luks_uuid = found_luks_uuid;
1444 if (!sd_id128_is_null(found_fs_uuid))
1445 setup->found_fs_uuid = found_fs_uuid;
1446
1447 setup->partition_offset = offset;
1448 setup->partition_size = size;
1449
1450 if (volume_key) {
1451 erase_and_free(setup->volume_key);
1452 setup->volume_key = TAKE_PTR(volume_key);
1453 setup->volume_key_size = volume_key_size;
1454 }
1455
1456 if (ret_luks_home)
1457 *ret_luks_home = TAKE_PTR(luks_home);
1458
1459 return 0;
1460 }
1461
print_size_summary(uint64_t host_size,uint64_t encrypted_size,const struct statfs * sfs)1462 static void print_size_summary(uint64_t host_size, uint64_t encrypted_size, const struct statfs *sfs) {
1463 assert(sfs);
1464
1465 log_info("Image size is %s, file system size is %s, file system payload size is %s, file system free is %s.",
1466 FORMAT_BYTES(host_size),
1467 FORMAT_BYTES(encrypted_size),
1468 FORMAT_BYTES((uint64_t) sfs->f_blocks * (uint64_t) sfs->f_frsize),
1469 FORMAT_BYTES((uint64_t) sfs->f_bfree * (uint64_t) sfs->f_frsize));
1470 }
1471
home_auto_grow_luks(UserRecord * h,HomeSetup * setup,PasswordCache * cache)1472 static int home_auto_grow_luks(
1473 UserRecord *h,
1474 HomeSetup *setup,
1475 PasswordCache *cache) {
1476
1477 struct statfs sfs;
1478
1479 assert(h);
1480 assert(setup);
1481
1482 if (!IN_SET(user_record_auto_resize_mode(h), AUTO_RESIZE_GROW, AUTO_RESIZE_SHRINK_AND_GROW))
1483 return 0;
1484
1485 assert(setup->root_fd >= 0);
1486
1487 if (fstatfs(setup->root_fd, &sfs) < 0)
1488 return log_error_errno(errno, "Failed to statfs home directory: %m");
1489
1490 if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
1491 log_debug("Not auto-grow file system, since selected file system cannot do both online shrink and grow.");
1492 return 0;
1493 }
1494
1495 log_debug("Initiating auto-grow...");
1496
1497 return home_resize_luks(
1498 h,
1499 HOME_SETUP_ALREADY_ACTIVATED|
1500 HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
1501 HOME_SETUP_RESIZE_DONT_SHRINK|
1502 HOME_SETUP_RESIZE_DONT_UNDO,
1503 setup,
1504 cache,
1505 NULL);
1506 }
1507
home_activate_luks(UserRecord * h,HomeSetupFlags flags,HomeSetup * setup,PasswordCache * cache,UserRecord ** ret_home)1508 int home_activate_luks(
1509 UserRecord *h,
1510 HomeSetupFlags flags,
1511 HomeSetup *setup,
1512 PasswordCache *cache,
1513 UserRecord **ret_home) {
1514
1515 _cleanup_(user_record_unrefp) UserRecord *new_home = NULL, *luks_home_record = NULL;
1516 uint64_t host_size, encrypted_size;
1517 const char *hdo, *hd;
1518 struct statfs sfs;
1519 int r;
1520
1521 assert(h);
1522 assert(user_record_storage(h) == USER_LUKS);
1523 assert(setup);
1524 assert(ret_home);
1525
1526 r = dlopen_cryptsetup();
1527 if (r < 0)
1528 return r;
1529
1530 assert_se(hdo = user_record_home_directory(h));
1531 hd = strdupa_safe(hdo); /* copy the string out, since it might change later in the home record object */
1532
1533 r = home_get_state_luks(h, setup);
1534 if (r < 0)
1535 return r;
1536 if (r > 0)
1537 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
1538
1539 r = home_setup_luks(
1540 h,
1541 0,
1542 NULL,
1543 setup,
1544 cache,
1545 &luks_home_record);
1546 if (r < 0)
1547 return r;
1548
1549 r = home_auto_grow_luks(h, setup, cache);
1550 if (r < 0)
1551 return r;
1552
1553 r = block_get_size_by_fd(setup->loop->fd, &host_size);
1554 if (r < 0)
1555 return log_error_errno(r, "Failed to get loopback block device size: %m");
1556
1557 r = block_get_size_by_path(setup->dm_node, &encrypted_size);
1558 if (r < 0)
1559 return log_error_errno(r, "Failed to get LUKS block device size: %m");
1560
1561 r = home_refresh(
1562 h,
1563 flags,
1564 setup,
1565 luks_home_record,
1566 cache,
1567 &sfs,
1568 &new_home);
1569 if (r < 0)
1570 return r;
1571
1572 r = home_extend_embedded_identity(new_home, h, setup);
1573 if (r < 0)
1574 return r;
1575
1576 setup->root_fd = safe_close(setup->root_fd);
1577
1578 r = home_move_mount(user_record_user_name_and_realm(h), hd);
1579 if (r < 0)
1580 return r;
1581
1582 setup->undo_mount = false;
1583 setup->do_offline_fitrim = false;
1584
1585 loop_device_relinquish(setup->loop);
1586
1587 r = sym_crypt_deactivate_by_name(NULL, setup->dm_name, CRYPT_DEACTIVATE_DEFERRED);
1588 if (r < 0)
1589 log_warning_errno(r, "Failed to relinquish DM device, ignoring: %m");
1590
1591 setup->undo_dm = false;
1592 setup->do_offline_fallocate = false;
1593 setup->do_mark_clean = false;
1594 setup->do_drop_caches = false;
1595 TAKE_KEY_SERIAL(setup->key_serial); /* Leave key in kernel keyring */
1596
1597 log_info("Activation completed.");
1598
1599 print_size_summary(host_size, encrypted_size, &sfs);
1600
1601 *ret_home = TAKE_PTR(new_home);
1602 return 1;
1603 }
1604
home_deactivate_luks(UserRecord * h,HomeSetup * setup)1605 int home_deactivate_luks(UserRecord *h, HomeSetup *setup) {
1606 bool we_detached;
1607 int r;
1608
1609 assert(h);
1610 assert(setup);
1611
1612 /* Note that the DM device and loopback device are set to auto-detach, hence strictly speaking we
1613 * don't have to explicitly have to detach them. However, we do that nonetheless (in case of the DM
1614 * device), to avoid races: by explicitly detaching them we know when the detaching is complete. We
1615 * don't bother about the loopback device because unlike the DM device it doesn't have a fixed
1616 * name. */
1617
1618 if (!setup->crypt_device) {
1619 r = acquire_open_luks_device(h, setup, /* graceful= */ true);
1620 if (r < 0)
1621 return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
1622 if (r == 0) {
1623 log_debug("LUKS device %s has already been detached.", setup->dm_name);
1624 we_detached = false;
1625 }
1626 }
1627
1628 if (setup->crypt_device) {
1629 log_info("Discovered used LUKS device %s.", setup->dm_node);
1630
1631 cryptsetup_enable_logging(setup->crypt_device);
1632
1633 r = sym_crypt_deactivate_by_name(setup->crypt_device, setup->dm_name, 0);
1634 if (ERRNO_IS_DEVICE_ABSENT(r) || r == -EINVAL) {
1635 log_debug_errno(r, "LUKS device %s is already detached.", setup->dm_node);
1636 we_detached = false;
1637 } else if (r < 0)
1638 return log_info_errno(r, "LUKS device %s couldn't be deactivated: %m", setup->dm_node);
1639 else {
1640 log_info("LUKS device detaching completed.");
1641 we_detached = true;
1642 }
1643 }
1644
1645 (void) wait_for_block_device_gone(setup, USEC_PER_SEC * 30);
1646 setup->undo_dm = false;
1647
1648 if (user_record_luks_offline_discard(h))
1649 log_debug("Not allocating on logout.");
1650 else
1651 (void) run_fallocate_by_path(user_record_image_path(h));
1652
1653 run_mark_dirty_by_path(user_record_image_path(h), false);
1654 return we_detached;
1655 }
1656
home_trim_luks(UserRecord * h,HomeSetup * setup)1657 int home_trim_luks(UserRecord *h, HomeSetup *setup) {
1658 assert(h);
1659 assert(setup);
1660 assert(setup->root_fd >= 0);
1661
1662 if (!user_record_luks_offline_discard(h)) {
1663 log_debug("Not trimming on logout.");
1664 return 0;
1665 }
1666
1667 (void) run_fitrim(setup->root_fd);
1668 return 0;
1669 }
1670
build_good_pbkdf(struct crypt_pbkdf_type * buffer,UserRecord * hr)1671 static struct crypt_pbkdf_type* build_good_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1672 assert(buffer);
1673 assert(hr);
1674
1675 *buffer = (struct crypt_pbkdf_type) {
1676 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1677 .type = user_record_luks_pbkdf_type(hr),
1678 .time_ms = user_record_luks_pbkdf_time_cost_usec(hr) / USEC_PER_MSEC,
1679 .max_memory_kb = user_record_luks_pbkdf_memory_cost(hr) / 1024,
1680 .parallel_threads = user_record_luks_pbkdf_parallel_threads(hr),
1681 };
1682
1683 return buffer;
1684 }
1685
build_minimal_pbkdf(struct crypt_pbkdf_type * buffer,UserRecord * hr)1686 static struct crypt_pbkdf_type* build_minimal_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1687 assert(buffer);
1688 assert(hr);
1689
1690 /* For PKCS#11 derived keys (which are generated randomly and are of high quality already) we use a
1691 * minimal PBKDF */
1692 *buffer = (struct crypt_pbkdf_type) {
1693 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1694 .type = CRYPT_KDF_PBKDF2,
1695 .iterations = 1,
1696 .time_ms = 1,
1697 };
1698
1699 return buffer;
1700 }
1701
luks_format(const char * node,const char * dm_name,sd_id128_t uuid,const char * label,const PasswordCache * cache,char ** effective_passwords,bool discard,UserRecord * hr,struct crypt_device ** ret)1702 static int luks_format(
1703 const char *node,
1704 const char *dm_name,
1705 sd_id128_t uuid,
1706 const char *label,
1707 const PasswordCache *cache,
1708 char **effective_passwords,
1709 bool discard,
1710 UserRecord *hr,
1711 struct crypt_device **ret) {
1712
1713 _cleanup_(user_record_unrefp) UserRecord *reduced = NULL;
1714 _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
1715 _cleanup_(erase_and_freep) void *volume_key = NULL;
1716 struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
1717 _cleanup_free_ char *text = NULL;
1718 size_t volume_key_size;
1719 int slot = 0, r;
1720
1721 assert(node);
1722 assert(dm_name);
1723 assert(hr);
1724 assert(ret);
1725
1726 r = sym_crypt_init(&cd, node);
1727 if (r < 0)
1728 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
1729
1730 cryptsetup_enable_logging(cd);
1731
1732 /* Normally we'd, just leave volume key generation to libcryptsetup. However, we can't, since we
1733 * can't extract the volume key from the library again, but we need it in order to encrypt the JSON
1734 * record. Hence, let's generate it on our own, so that we can keep track of it. */
1735
1736 volume_key_size = user_record_luks_volume_key_size(hr);
1737 volume_key = malloc(volume_key_size);
1738 if (!volume_key)
1739 return log_oom();
1740
1741 r = genuine_random_bytes(volume_key, volume_key_size, RANDOM_BLOCK);
1742 if (r < 0)
1743 return log_error_errno(r, "Failed to generate volume key: %m");
1744
1745 #if HAVE_CRYPT_SET_METADATA_SIZE
1746 /* Increase the metadata space to 4M, the largest LUKS2 supports */
1747 r = sym_crypt_set_metadata_size(cd, 4096U*1024U, 0);
1748 if (r < 0)
1749 return log_error_errno(r, "Failed to change LUKS2 metadata size: %m");
1750 #endif
1751
1752 build_good_pbkdf(&good_pbkdf, hr);
1753 build_minimal_pbkdf(&minimal_pbkdf, hr);
1754
1755 r = sym_crypt_format(
1756 cd,
1757 CRYPT_LUKS2,
1758 user_record_luks_cipher(hr),
1759 user_record_luks_cipher_mode(hr),
1760 SD_ID128_TO_UUID_STRING(uuid),
1761 volume_key,
1762 volume_key_size,
1763 &(struct crypt_params_luks2) {
1764 .label = label,
1765 .subsystem = "systemd-home",
1766 .sector_size = 512U,
1767 .pbkdf = &good_pbkdf,
1768 });
1769 if (r < 0)
1770 return log_error_errno(r, "Failed to format LUKS image: %m");
1771
1772 log_info("LUKS formatting completed.");
1773
1774 STRV_FOREACH(pp, effective_passwords) {
1775
1776 if (password_cache_contains(cache, *pp)) { /* is this a fido2 or pkcs11 password? */
1777 log_debug("Using minimal PBKDF for slot %i", slot);
1778 r = sym_crypt_set_pbkdf_type(cd, &minimal_pbkdf);
1779 } else {
1780 log_debug("Using good PBKDF for slot %i", slot);
1781 r = sym_crypt_set_pbkdf_type(cd, &good_pbkdf);
1782 }
1783 if (r < 0)
1784 return log_error_errno(r, "Failed to tweak PBKDF for slot %i: %m", slot);
1785
1786 r = sym_crypt_keyslot_add_by_volume_key(
1787 cd,
1788 slot,
1789 volume_key,
1790 volume_key_size,
1791 *pp,
1792 strlen(*pp));
1793 if (r < 0)
1794 return log_error_errno(r, "Failed to set up LUKS password for slot %i: %m", slot);
1795
1796 log_info("Writing password to LUKS keyslot %i completed.", slot);
1797 slot++;
1798 }
1799
1800 r = sym_crypt_activate_by_volume_key(
1801 cd,
1802 dm_name,
1803 volume_key,
1804 volume_key_size,
1805 discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
1806 if (r < 0)
1807 return log_error_errno(r, "Failed to activate LUKS superblock: %m");
1808
1809 log_info("LUKS activation by volume key succeeded.");
1810
1811 r = user_record_clone(hr, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &reduced);
1812 if (r < 0)
1813 return log_error_errno(r, "Failed to prepare home record for LUKS: %m");
1814
1815 r = format_luks_token_text(cd, reduced, volume_key, &text);
1816 if (r < 0)
1817 return r;
1818
1819 r = sym_crypt_token_json_set(cd, CRYPT_ANY_TOKEN, text);
1820 if (r < 0)
1821 return log_error_errno(r, "Failed to set LUKS JSON token: %m");
1822
1823 log_info("Writing user record as LUKS token completed.");
1824
1825 if (ret)
1826 *ret = TAKE_PTR(cd);
1827
1828 return 0;
1829 }
1830
make_partition_table(int fd,const char * label,sd_id128_t uuid,uint64_t * ret_offset,uint64_t * ret_size,sd_id128_t * ret_disk_uuid)1831 static int make_partition_table(
1832 int fd,
1833 const char *label,
1834 sd_id128_t uuid,
1835 uint64_t *ret_offset,
1836 uint64_t *ret_size,
1837 sd_id128_t *ret_disk_uuid) {
1838
1839 _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *p = NULL, *q = NULL;
1840 _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
1841 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
1842 _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL;
1843 uint64_t offset, size, first_lba, start, last_lba, end;
1844 sd_id128_t disk_uuid;
1845 int r;
1846
1847 assert(fd >= 0);
1848 assert(label);
1849 assert(ret_offset);
1850 assert(ret_size);
1851
1852 t = fdisk_new_parttype();
1853 if (!t)
1854 return log_oom();
1855
1856 r = fdisk_parttype_set_typestr(t, GPT_USER_HOME_STR);
1857 if (r < 0)
1858 return log_error_errno(r, "Failed to initialize partition type: %m");
1859
1860 c = fdisk_new_context();
1861 if (!c)
1862 return log_oom();
1863
1864 if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
1865 return log_oom();
1866
1867 r = fdisk_assign_device(c, path, 0);
1868 if (r < 0)
1869 return log_error_errno(r, "Failed to open device: %m");
1870
1871 r = fdisk_create_disklabel(c, "gpt");
1872 if (r < 0)
1873 return log_error_errno(r, "Failed to create GPT disk label: %m");
1874
1875 p = fdisk_new_partition();
1876 if (!p)
1877 return log_oom();
1878
1879 r = fdisk_partition_set_type(p, t);
1880 if (r < 0)
1881 return log_error_errno(r, "Failed to set partition type: %m");
1882
1883 r = fdisk_partition_partno_follow_default(p, 1);
1884 if (r < 0)
1885 return log_error_errno(r, "Failed to place partition at first free partition index: %m");
1886
1887 first_lba = fdisk_get_first_lba(c); /* Boundary where usable space starts */
1888 assert(first_lba <= UINT64_MAX/512);
1889 start = DISK_SIZE_ROUND_UP(first_lba * 512); /* Round up to multiple of 4K */
1890
1891 log_debug("Starting partition at offset %" PRIu64, start);
1892
1893 if (start == UINT64_MAX)
1894 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Overflow while rounding up start LBA.");
1895
1896 last_lba = fdisk_get_last_lba(c); /* One sector before boundary where usable space ends */
1897 assert(last_lba < UINT64_MAX/512);
1898 end = DISK_SIZE_ROUND_DOWN((last_lba + 1) * 512); /* Round down to multiple of 4K */
1899
1900 if (end <= start)
1901 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Resulting partition size zero or negative.");
1902
1903 r = fdisk_partition_set_start(p, start / 512);
1904 if (r < 0)
1905 return log_error_errno(r, "Failed to place partition at offset %" PRIu64 ": %m", start);
1906
1907 r = fdisk_partition_set_size(p, (end - start) / 512);
1908 if (r < 0)
1909 return log_error_errno(r, "Failed to end partition at offset %" PRIu64 ": %m", end);
1910
1911 r = fdisk_partition_set_name(p, label);
1912 if (r < 0)
1913 return log_error_errno(r, "Failed to set partition name: %m");
1914
1915 r = fdisk_partition_set_uuid(p, SD_ID128_TO_UUID_STRING(uuid));
1916 if (r < 0)
1917 return log_error_errno(r, "Failed to set partition UUID: %m");
1918
1919 r = fdisk_add_partition(c, p, NULL);
1920 if (r < 0)
1921 return log_error_errno(r, "Failed to add partition: %m");
1922
1923 r = fdisk_write_disklabel(c);
1924 if (r < 0)
1925 return log_error_errno(r, "Failed to write disk label: %m");
1926
1927 r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
1928 if (r < 0)
1929 return log_error_errno(r, "Failed to determine disk label UUID: %m");
1930
1931 r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
1932 if (r < 0)
1933 return log_error_errno(r, "Failed to parse disk label UUID: %m");
1934
1935 r = fdisk_get_partition(c, 0, &q);
1936 if (r < 0)
1937 return log_error_errno(r, "Failed to read created partition metadata: %m");
1938
1939 assert(fdisk_partition_has_start(q));
1940 offset = fdisk_partition_get_start(q);
1941 if (offset > UINT64_MAX / 512U)
1942 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition offset too large.");
1943
1944 assert(fdisk_partition_has_size(q));
1945 size = fdisk_partition_get_size(q);
1946 if (size > UINT64_MAX / 512U)
1947 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition size too large.");
1948
1949 *ret_offset = offset * 512U;
1950 *ret_size = size * 512U;
1951 *ret_disk_uuid = disk_uuid;
1952
1953 return 0;
1954 }
1955
supported_fs_size(const char * fstype,uint64_t host_size)1956 static bool supported_fs_size(const char *fstype, uint64_t host_size) {
1957 uint64_t m;
1958
1959 m = minimal_size_by_fs_name(fstype);
1960 if (m == UINT64_MAX)
1961 return false;
1962
1963 return host_size >= m;
1964 }
1965
wait_for_devlink(const char * path)1966 static int wait_for_devlink(const char *path) {
1967 _cleanup_close_ int inotify_fd = -1;
1968 usec_t until;
1969 int r;
1970
1971 /* let's wait for a device link to show up in /dev, with a timeout. This is good to do since we
1972 * return a /dev/disk/by-uuid/… link to our callers and they likely want to access it right-away,
1973 * hence let's wait until udev has caught up with our changes, and wait for the symlink to be
1974 * created. */
1975
1976 until = usec_add(now(CLOCK_MONOTONIC), 45 * USEC_PER_SEC);
1977
1978 for (;;) {
1979 _cleanup_free_ char *dn = NULL;
1980 usec_t w;
1981
1982 if (laccess(path, F_OK) < 0) {
1983 if (errno != ENOENT)
1984 return log_error_errno(errno, "Failed to determine whether %s exists: %m", path);
1985 } else
1986 return 0; /* Found it */
1987
1988 if (inotify_fd < 0) {
1989 /* We need to wait for the device symlink to show up, let's create an inotify watch for it */
1990 inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1991 if (inotify_fd < 0)
1992 return log_error_errno(errno, "Failed to allocate inotify fd: %m");
1993 }
1994
1995 dn = dirname_malloc(path);
1996 for (;;) {
1997 if (!dn)
1998 return log_oom();
1999
2000 log_info("Watching %s", dn);
2001
2002 if (inotify_add_watch(inotify_fd, dn, IN_CREATE|IN_MOVED_TO|IN_ONLYDIR|IN_DELETE_SELF|IN_MOVE_SELF) < 0) {
2003 if (errno != ENOENT)
2004 return log_error_errno(errno, "Failed to add watch on %s: %m", dn);
2005 } else
2006 break;
2007
2008 if (empty_or_root(dn))
2009 break;
2010
2011 dn = dirname_malloc(dn);
2012 }
2013
2014 w = now(CLOCK_MONOTONIC);
2015 if (w >= until)
2016 return log_error_errno(SYNTHETIC_ERRNO(ETIMEDOUT), "Device link %s still hasn't shown up, giving up.", path);
2017
2018 r = fd_wait_for_event(inotify_fd, POLLIN, usec_sub_unsigned(until, w));
2019 if (r < 0)
2020 return log_error_errno(r, "Failed to watch inotify: %m");
2021
2022 (void) flush_fd(inotify_fd);
2023 }
2024 }
2025
calculate_initial_image_size(UserRecord * h,int image_fd,const char * fstype,uint64_t * ret)2026 static int calculate_initial_image_size(UserRecord *h, int image_fd, const char *fstype, uint64_t *ret) {
2027 uint64_t upper_boundary, lower_boundary;
2028 struct statfs sfs;
2029
2030 assert(h);
2031 assert(image_fd >= 0);
2032 assert(ret);
2033
2034 if (fstatfs(image_fd, &sfs) < 0)
2035 return log_error_errno(errno, "statfs() on image failed: %m");
2036
2037 upper_boundary = DISK_SIZE_ROUND_DOWN((uint64_t) sfs.f_bsize * sfs.f_bavail);
2038
2039 if (h->disk_size != UINT64_MAX)
2040 *ret = MIN(DISK_SIZE_ROUND_DOWN(h->disk_size), upper_boundary);
2041 else if (h->disk_size_relative == UINT64_MAX) {
2042
2043 if (upper_boundary > UINT64_MAX / USER_DISK_SIZE_DEFAULT_PERCENT)
2044 return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Disk size too large.");
2045
2046 *ret = DISK_SIZE_ROUND_DOWN(upper_boundary * USER_DISK_SIZE_DEFAULT_PERCENT / 100);
2047
2048 log_info("Sizing home to %u%% of available disk space, which is %s.",
2049 USER_DISK_SIZE_DEFAULT_PERCENT,
2050 FORMAT_BYTES(*ret));
2051 } else {
2052 *ret = DISK_SIZE_ROUND_DOWN((uint64_t) ((double) upper_boundary * (double) CLAMP(h->disk_size_relative, 0U, UINT32_MAX) / (double) UINT32_MAX));
2053
2054 log_info("Sizing home to %" PRIu64 ".%01" PRIu64 "%% of available disk space, which is %s.",
2055 (h->disk_size_relative * 100) / UINT32_MAX,
2056 ((h->disk_size_relative * 1000) / UINT32_MAX) % 10,
2057 FORMAT_BYTES(*ret));
2058 }
2059
2060 lower_boundary = minimal_size_by_fs_name(fstype);
2061 if (lower_boundary != UINT64_MAX) {
2062 assert(GPT_LUKS2_OVERHEAD < UINT64_MAX - lower_boundary);
2063 lower_boundary += GPT_LUKS2_OVERHEAD;
2064 }
2065 if (lower_boundary == UINT64_MAX || lower_boundary < USER_DISK_SIZE_MIN)
2066 lower_boundary = USER_DISK_SIZE_MIN;
2067
2068 if (*ret < lower_boundary)
2069 *ret = lower_boundary;
2070
2071 return 0;
2072 }
2073
home_truncate(UserRecord * h,int fd,uint64_t size)2074 static int home_truncate(
2075 UserRecord *h,
2076 int fd,
2077 uint64_t size) {
2078
2079 bool trunc;
2080 int r;
2081
2082 assert(h);
2083 assert(fd >= 0);
2084
2085 trunc = user_record_luks_discard(h);
2086 if (!trunc) {
2087 r = fallocate(fd, 0, 0, size);
2088 if (r < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) {
2089 /* Some file systems do not support fallocate(), let's gracefully degrade
2090 * (ZFS, reiserfs, …) and fall back to truncation */
2091 log_notice_errno(errno, "Backing file system does not support fallocate(), falling back to ftruncate(), i.e. implicitly using non-discard mode.");
2092 trunc = true;
2093 }
2094 }
2095
2096 if (trunc)
2097 r = ftruncate(fd, size);
2098
2099 if (r < 0) {
2100 if (ERRNO_IS_DISK_SPACE(errno)) {
2101 log_debug_errno(errno, "Not enough disk space to allocate home of size %s.", FORMAT_BYTES(size));
2102 return -ENOSPC; /* make recognizable */
2103 }
2104
2105 return log_error_errno(errno, "Failed to truncate home image: %m");
2106 }
2107
2108 return !trunc; /* Return == 0 if we managed to truncate, > 0 if we managed to allocate */
2109 }
2110
home_create_luks(UserRecord * h,HomeSetup * setup,const PasswordCache * cache,char ** effective_passwords,UserRecord ** ret_home)2111 int home_create_luks(
2112 UserRecord *h,
2113 HomeSetup *setup,
2114 const PasswordCache *cache,
2115 char **effective_passwords,
2116 UserRecord **ret_home) {
2117
2118 _cleanup_free_ char *subdir = NULL, *disk_uuid_path = NULL;
2119 uint64_t encrypted_size,
2120 host_size = 0, partition_offset = 0, partition_size = 0; /* Unnecessary initialization to appease gcc */
2121 _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
2122 sd_id128_t partition_uuid, fs_uuid, luks_uuid, disk_uuid;
2123 _cleanup_close_ int mount_fd = -1;
2124 const char *fstype, *ip;
2125 struct statfs sfs;
2126 int r;
2127
2128 assert(h);
2129 assert(h->storage < 0 || h->storage == USER_LUKS);
2130 assert(setup);
2131 assert(!setup->temporary_image_path);
2132 assert(setup->image_fd < 0);
2133 assert(ret_home);
2134
2135 r = dlopen_cryptsetup();
2136 if (r < 0)
2137 return r;
2138
2139 assert_se(ip = user_record_image_path(h));
2140
2141 fstype = user_record_file_system_type(h);
2142 if (!supported_fstype(fstype))
2143 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Unsupported file system type: %s", fstype);
2144
2145 r = mkfs_exists(fstype);
2146 if (r < 0)
2147 return log_error_errno(r, "Failed to check if mkfs binary for %s exists: %m", fstype);
2148 if (r == 0) {
2149 if (h->file_system_type || streq(fstype, "ext4") || !supported_fstype("ext4"))
2150 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for file system type %s does not exist.", fstype);
2151
2152 /* If the record does not explicitly declare a file system to use, and the compiled-in
2153 * default does not actually exist, than do an automatic fallback onto ext4, as the baseline
2154 * fs of Linux. We won't search for a working fs type here beyond ext4, i.e. nothing fancier
2155 * than a single, conservative fallback to baseline. This should be useful in minimal
2156 * environments where mkfs.btrfs or so are not made available, but mkfs.ext4 as Linux' most
2157 * boring, most basic fs is. */
2158 log_info("Formatting tool for compiled-in default file system %s not available, falling back to ext4 instead.", fstype);
2159 fstype = "ext4";
2160 }
2161
2162 if (sd_id128_is_null(h->partition_uuid)) {
2163 r = sd_id128_randomize(&partition_uuid);
2164 if (r < 0)
2165 return log_error_errno(r, "Failed to acquire partition UUID: %m");
2166 } else
2167 partition_uuid = h->partition_uuid;
2168
2169 if (sd_id128_is_null(h->luks_uuid)) {
2170 r = sd_id128_randomize(&luks_uuid);
2171 if (r < 0)
2172 return log_error_errno(r, "Failed to acquire LUKS UUID: %m");
2173 } else
2174 luks_uuid = h->luks_uuid;
2175
2176 if (sd_id128_is_null(h->file_system_uuid)) {
2177 r = sd_id128_randomize(&fs_uuid);
2178 if (r < 0)
2179 return log_error_errno(r, "Failed to acquire file system UUID: %m");
2180 } else
2181 fs_uuid = h->file_system_uuid;
2182
2183 r = make_dm_names(h, setup);
2184 if (r < 0)
2185 return r;
2186
2187 r = access(setup->dm_node, F_OK);
2188 if (r < 0) {
2189 if (errno != ENOENT)
2190 return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2191 } else
2192 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
2193
2194 if (path_startswith(ip, "/dev/")) {
2195 _cleanup_free_ char *sysfs = NULL;
2196 uint64_t block_device_size;
2197 struct stat st;
2198
2199 /* Let's place the home directory on a real device, i.e. an USB stick or such */
2200
2201 setup->image_fd = open_image_file(h, ip, &st);
2202 if (setup->image_fd < 0)
2203 return setup->image_fd;
2204
2205 if (!S_ISBLK(st.st_mode))
2206 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Device is not a block device, refusing.");
2207
2208 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
2209 return log_oom();
2210 if (access(sysfs, F_OK) < 0) {
2211 if (errno != ENOENT)
2212 return log_error_errno(errno, "Failed to check whether %s exists: %m", sysfs);
2213 } else
2214 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Operating on partitions is currently not supported, sorry. Please specify a top-level block device.");
2215
2216 if (flock(setup->image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
2217 return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
2218
2219 if (ioctl(setup->image_fd, BLKGETSIZE64, &block_device_size) < 0)
2220 return log_error_errno(errno, "Failed to read block device size: %m");
2221
2222 if (h->disk_size == UINT64_MAX) {
2223
2224 /* If a relative disk size is requested, apply it relative to the block device size */
2225 if (h->disk_size_relative < UINT32_MAX)
2226 host_size = CLAMP(DISK_SIZE_ROUND_DOWN(block_device_size * h->disk_size_relative / UINT32_MAX),
2227 USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX);
2228 else
2229 host_size = block_device_size; /* Otherwise, take the full device */
2230
2231 } else if (h->disk_size > block_device_size)
2232 return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Selected disk size larger than backing block device, refusing.");
2233 else
2234 host_size = DISK_SIZE_ROUND_DOWN(h->disk_size);
2235
2236 if (!supported_fs_size(fstype, LESS_BY(host_size, GPT_LUKS2_OVERHEAD)))
2237 return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
2238 "Selected file system size too small for %s.", fstype);
2239
2240 /* After creation we should reference this partition by its UUID instead of the block
2241 * device. That's preferable since the user might have specified a device node such as
2242 * /dev/sdb to us, which might look very different when replugged. */
2243 if (asprintf(&disk_uuid_path, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(luks_uuid)) < 0)
2244 return log_oom();
2245
2246 if (user_record_luks_discard(h) || user_record_luks_offline_discard(h)) {
2247 /* If we want online or offline discard, discard once before we start using things. */
2248
2249 if (ioctl(setup->image_fd, BLKDISCARD, (uint64_t[]) { 0, block_device_size }) < 0)
2250 log_full_errno(errno == EOPNOTSUPP ? LOG_DEBUG : LOG_WARNING, errno,
2251 "Failed to issue full-device BLKDISCARD on device, ignoring: %m");
2252 else
2253 log_info("Full device discard completed.");
2254 }
2255 } else {
2256 _cleanup_free_ char *t = NULL;
2257
2258 r = mkdir_parents(ip, 0755);
2259 if (r < 0)
2260 return log_error_errno(r, "Failed to create parent directory of %s: %m", ip);
2261
2262 r = tempfn_random(ip, "homework", &t);
2263 if (r < 0)
2264 return log_error_errno(r, "Failed to derive temporary file name for %s: %m", ip);
2265
2266 setup->image_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
2267 if (setup->image_fd < 0)
2268 return log_error_errno(errno, "Failed to create home image %s: %m", t);
2269
2270 setup->temporary_image_path = TAKE_PTR(t);
2271
2272 r = chattr_full(t, setup->image_fd, FS_NOCOW_FL|FS_NOCOMP_FL, FS_NOCOW_FL|FS_NOCOMP_FL, NULL, NULL, CHATTR_FALLBACK_BITWISE);
2273 if (r < 0 && r != -ENOANO) /* ENOANO → some bits didn't work; which we skip logging about because chattr_full() already debug logs about those flags */
2274 log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
2275 "Failed to set file attributes on %s, ignoring: %m", setup->temporary_image_path);
2276
2277 r = calculate_initial_image_size(h, setup->image_fd, fstype, &host_size);
2278 if (r < 0)
2279 return r;
2280
2281 r = resize_image_loop(h, setup, 0, host_size, &host_size);
2282 if (r < 0)
2283 return r;
2284
2285 log_info("Allocating image file completed.");
2286 }
2287
2288 r = make_partition_table(
2289 setup->image_fd,
2290 user_record_user_name_and_realm(h),
2291 partition_uuid,
2292 &partition_offset,
2293 &partition_size,
2294 &disk_uuid);
2295 if (r < 0)
2296 return r;
2297
2298 log_info("Writing of partition table completed.");
2299
2300 r = loop_device_make(setup->image_fd, O_RDWR, partition_offset, partition_size, 0, &setup->loop);
2301 if (r < 0) {
2302 if (r == -ENOENT) { /* this means /dev/loop-control doesn't exist, i.e. we are in a container
2303 * or similar and loopback bock devices are not available, return a
2304 * recognizable error in this case. */
2305 log_error_errno(r, "Loopback block device support is not available on this system.");
2306 return -ENOLINK; /* Make recognizable */
2307 }
2308
2309 return log_error_errno(r, "Failed to set up loopback device for %s: %m", setup->temporary_image_path);
2310 }
2311
2312 r = loop_device_flock(setup->loop, LOCK_EX); /* make sure udev won't read before we are done */
2313 if (r < 0)
2314 return log_error_errno(r, "Failed to take lock on loop device: %m");
2315
2316 log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
2317
2318 r = luks_format(setup->loop->node,
2319 setup->dm_name,
2320 luks_uuid,
2321 user_record_user_name_and_realm(h),
2322 cache,
2323 effective_passwords,
2324 user_record_luks_discard(h) || user_record_luks_offline_discard(h),
2325 h,
2326 &setup->crypt_device);
2327 if (r < 0)
2328 return r;
2329
2330 setup->undo_dm = true;
2331
2332 r = block_get_size_by_path(setup->dm_node, &encrypted_size);
2333 if (r < 0)
2334 return log_error_errno(r, "Failed to get encrypted block device size: %m");
2335
2336 log_info("Setting up LUKS device %s completed.", setup->dm_node);
2337
2338 r = make_filesystem(setup->dm_node, fstype, user_record_user_name_and_realm(h), fs_uuid, user_record_luks_discard(h));
2339 if (r < 0)
2340 return r;
2341
2342 log_info("Formatting file system completed.");
2343
2344 r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
2345 if (r < 0)
2346 return r;
2347
2348 setup->undo_mount = true;
2349
2350 subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
2351 if (!subdir)
2352 return log_oom();
2353
2354 /* Prefer using a btrfs subvolume if we can, fall back to directory otherwise */
2355 r = btrfs_subvol_make_fallback(subdir, 0700);
2356 if (r < 0)
2357 return log_error_errno(r, "Failed to create user directory in mounted image file: %m");
2358
2359 setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2360 if (setup->root_fd < 0)
2361 return log_error_errno(errno, "Failed to open user directory in mounted image file: %m");
2362
2363 (void) home_shift_uid(setup->root_fd, NULL, UID_NOBODY, h->uid, &mount_fd);
2364
2365 if (mount_fd >= 0) {
2366 /* If we have established a new mount, then we can use that as new root fd to our home directory. */
2367 safe_close(setup->root_fd);
2368
2369 setup->root_fd = fd_reopen(mount_fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
2370 if (setup->root_fd < 0)
2371 return log_error_errno(setup->root_fd, "Unable to convert mount fd into proper directory fd: %m");
2372
2373 mount_fd = safe_close(mount_fd);
2374 }
2375
2376 r = home_populate(h, setup->root_fd);
2377 if (r < 0)
2378 return r;
2379
2380 r = home_sync_and_statfs(setup->root_fd, &sfs);
2381 if (r < 0)
2382 return r;
2383
2384 r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE, &new_home);
2385 if (r < 0)
2386 return log_error_errno(r, "Failed to clone record: %m");
2387
2388 r = user_record_add_binding(
2389 new_home,
2390 USER_LUKS,
2391 disk_uuid_path ?: ip,
2392 partition_uuid,
2393 luks_uuid,
2394 fs_uuid,
2395 sym_crypt_get_cipher(setup->crypt_device),
2396 sym_crypt_get_cipher_mode(setup->crypt_device),
2397 luks_volume_key_size_convert(setup->crypt_device),
2398 fstype,
2399 NULL,
2400 h->uid,
2401 (gid_t) h->uid);
2402 if (r < 0)
2403 return log_error_errno(r, "Failed to add binding to record: %m");
2404
2405 if (user_record_luks_offline_discard(h)) {
2406 r = run_fitrim(setup->root_fd);
2407 if (r < 0)
2408 return r;
2409 }
2410
2411 setup->root_fd = safe_close(setup->root_fd);
2412
2413 r = home_setup_undo_mount(setup, LOG_ERR);
2414 if (r < 0)
2415 return r;
2416
2417 r = home_setup_undo_dm(setup, LOG_ERR);
2418 if (r < 0)
2419 return r;
2420
2421 setup->loop = loop_device_unref(setup->loop);
2422
2423 if (!user_record_luks_offline_discard(h)) {
2424 r= run_fallocate(setup->image_fd, NULL /* refresh stat() data */);
2425 if (r < 0)
2426 return r;
2427 }
2428
2429 /* Sync everything to disk before we move things into place under the final name. */
2430 if (fsync(setup->image_fd) < 0)
2431 return log_error_errno(r, "Failed to synchronize image to disk: %m");
2432
2433 if (disk_uuid_path)
2434 /* Reread partition table if this is a block device */
2435 (void) ioctl(setup->image_fd, BLKRRPART, 0);
2436 else {
2437 assert(setup->temporary_image_path);
2438
2439 if (rename(setup->temporary_image_path, ip) < 0)
2440 return log_error_errno(errno, "Failed to rename image file: %m");
2441
2442 setup->temporary_image_path = mfree(setup->temporary_image_path);
2443
2444 /* If we operate on a file, sync the containing directory too. */
2445 r = fsync_directory_of_file(setup->image_fd);
2446 if (r < 0)
2447 return log_error_errno(r, "Failed to synchronize directory of image file to disk: %m");
2448
2449 log_info("Moved image file into place.");
2450 }
2451
2452 /* Let's close the image fd now. If we are operating on a real block device this will release the BSD
2453 * lock that ensures udev doesn't interfere with what we are doing */
2454 setup->image_fd = safe_close(setup->image_fd);
2455
2456 if (disk_uuid_path)
2457 (void) wait_for_devlink(disk_uuid_path);
2458
2459 log_info("Creation completed.");
2460
2461 print_size_summary(host_size, encrypted_size, &sfs);
2462
2463 log_debug("GPT + LUKS2 overhead is %" PRIu64 " (expected %" PRIu64 ")", host_size - encrypted_size, GPT_LUKS2_OVERHEAD);
2464
2465 *ret_home = TAKE_PTR(new_home);
2466 return 0;
2467 }
2468
home_get_state_luks(UserRecord * h,HomeSetup * setup)2469 int home_get_state_luks(UserRecord *h, HomeSetup *setup) {
2470 int r;
2471
2472 assert(h);
2473 assert(setup);
2474
2475 r = make_dm_names(h, setup);
2476 if (r < 0)
2477 return r;
2478
2479 r = access(setup->dm_node, F_OK);
2480 if (r < 0 && errno != ENOENT)
2481 return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2482
2483 return r >= 0;
2484 }
2485
2486 enum {
2487 CAN_RESIZE_ONLINE,
2488 CAN_RESIZE_OFFLINE,
2489 };
2490
can_resize_fs(int fd,uint64_t old_size,uint64_t new_size)2491 static int can_resize_fs(int fd, uint64_t old_size, uint64_t new_size) {
2492 struct statfs sfs;
2493
2494 assert(fd >= 0);
2495
2496 /* Filter out bogus requests early */
2497 if (old_size == 0 || old_size == UINT64_MAX ||
2498 new_size == 0 || new_size == UINT64_MAX)
2499 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid resize parameters.");
2500
2501 if ((old_size & 511) != 0 || (new_size & 511) != 0)
2502 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Resize parameters not multiple of 512.");
2503
2504 if (fstatfs(fd, &sfs) < 0)
2505 return log_error_errno(errno, "Failed to fstatfs() file system: %m");
2506
2507 if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
2508
2509 if (new_size < BTRFS_MINIMAL_SIZE)
2510 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for btrfs (needs to be 256M at least.");
2511
2512 /* btrfs can grow and shrink online */
2513
2514 } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) {
2515
2516 if (new_size < XFS_MINIMAL_SIZE)
2517 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for xfs (needs to be 14M at least).");
2518
2519 /* XFS can grow, but not shrink */
2520 if (new_size < old_size)
2521 return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Shrinking this type of file system is not supported.");
2522
2523 } else if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) {
2524
2525 if (new_size < EXT4_MINIMAL_SIZE)
2526 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for ext4 (needs to be 1M at least).");
2527
2528 /* ext4 can grow online, and shrink offline */
2529 if (new_size < old_size)
2530 return CAN_RESIZE_OFFLINE;
2531
2532 } else
2533 return log_error_errno(SYNTHETIC_ERRNO(ESOCKTNOSUPPORT), "Resizing this type of file system is not supported.");
2534
2535 return CAN_RESIZE_ONLINE;
2536 }
2537
ext4_offline_resize_fs(HomeSetup * setup,uint64_t new_size,bool discard,unsigned long flags,const char * extra_mount_options)2538 static int ext4_offline_resize_fs(
2539 HomeSetup *setup,
2540 uint64_t new_size,
2541 bool discard,
2542 unsigned long flags,
2543 const char *extra_mount_options) {
2544
2545 _cleanup_free_ char *size_str = NULL;
2546 bool re_open = false, re_mount = false;
2547 pid_t resize_pid, fsck_pid;
2548 int r, exit_status;
2549
2550 assert(setup);
2551 assert(setup->dm_node);
2552
2553 /* First, unmount the file system */
2554 if (setup->root_fd >= 0) {
2555 setup->root_fd = safe_close(setup->root_fd);
2556 re_open = true;
2557 }
2558
2559 if (setup->undo_mount) {
2560 r = home_setup_undo_mount(setup, LOG_ERR);
2561 if (r < 0)
2562 return r;
2563
2564 re_mount = true;
2565 }
2566
2567 log_info("Temporary unmounting of file system completed.");
2568
2569 /* resize2fs requires that the file system is force checked first, do so. */
2570 r = safe_fork("(e2fsck)",
2571 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2572 &fsck_pid);
2573 if (r < 0)
2574 return r;
2575 if (r == 0) {
2576 /* Child */
2577 execlp("e2fsck" ,"e2fsck", "-fp", setup->dm_node, NULL);
2578 log_open();
2579 log_error_errno(errno, "Failed to execute e2fsck: %m");
2580 _exit(EXIT_FAILURE);
2581 }
2582
2583 exit_status = wait_for_terminate_and_check("e2fsck", fsck_pid, WAIT_LOG_ABNORMAL);
2584 if (exit_status < 0)
2585 return exit_status;
2586 if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
2587 log_warning("e2fsck failed with exit status %i.", exit_status);
2588
2589 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
2590 return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
2591
2592 log_warning("Ignoring fsck error.");
2593 }
2594
2595 log_info("Forced file system check completed.");
2596
2597 /* We use 512 sectors here, because resize2fs doesn't do byte sizes */
2598 if (asprintf(&size_str, "%" PRIu64 "s", new_size / 512) < 0)
2599 return log_oom();
2600
2601 /* Resize the thing */
2602 r = safe_fork("(e2resize)",
2603 FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2604 &resize_pid);
2605 if (r < 0)
2606 return r;
2607 if (r == 0) {
2608 /* Child */
2609 execlp("resize2fs" ,"resize2fs", setup->dm_node, size_str, NULL);
2610 log_open();
2611 log_error_errno(errno, "Failed to execute resize2fs: %m");
2612 _exit(EXIT_FAILURE);
2613 }
2614
2615 log_info("Offline file system resize completed.");
2616
2617 /* Re-establish mounts and reopen the directory */
2618 if (re_mount) {
2619 r = home_mount_node(setup->dm_node, "ext4", discard, flags, extra_mount_options);
2620 if (r < 0)
2621 return r;
2622
2623 setup->undo_mount = true;
2624 }
2625
2626 if (re_open) {
2627 setup->root_fd = open(HOME_RUNTIME_WORK_DIR, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2628 if (setup->root_fd < 0)
2629 return log_error_errno(errno, "Failed to reopen file system: %m");
2630 }
2631
2632 log_info("File system mounted again.");
2633
2634 return 0;
2635 }
2636
prepare_resize_partition(int fd,uint64_t partition_offset,uint64_t old_partition_size,sd_id128_t * ret_disk_uuid,struct fdisk_table ** ret_table,struct fdisk_partition ** ret_partition)2637 static int prepare_resize_partition(
2638 int fd,
2639 uint64_t partition_offset,
2640 uint64_t old_partition_size,
2641 sd_id128_t *ret_disk_uuid,
2642 struct fdisk_table **ret_table,
2643 struct fdisk_partition **ret_partition) {
2644
2645 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2646 _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
2647 _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL;
2648 struct fdisk_partition *found = NULL;
2649 sd_id128_t disk_uuid;
2650 size_t n_partitions;
2651 int r;
2652
2653 assert(fd >= 0);
2654 assert(ret_disk_uuid);
2655 assert(ret_table);
2656
2657 assert((partition_offset & 511) == 0);
2658 assert((old_partition_size & 511) == 0);
2659 assert(UINT64_MAX - old_partition_size >= partition_offset);
2660
2661 if (partition_offset == 0) {
2662 /* If the offset is at the beginning we assume no partition table, let's exit early. */
2663 log_debug("Not rewriting partition table, operating on naked device.");
2664 *ret_disk_uuid = SD_ID128_NULL;
2665 *ret_table = NULL;
2666 return 0;
2667 }
2668
2669 c = fdisk_new_context();
2670 if (!c)
2671 return log_oom();
2672
2673 if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
2674 return log_oom();
2675
2676 r = fdisk_assign_device(c, path, 0);
2677 if (r < 0)
2678 return log_error_errno(r, "Failed to open device: %m");
2679
2680 if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2681 return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM), "Disk has no GPT partition table.");
2682
2683 r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
2684 if (r < 0)
2685 return log_error_errno(r, "Failed to acquire disk UUID: %m");
2686
2687 r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
2688 if (r < 0)
2689 return log_error_errno(r, "Failed parse disk UUID: %m");
2690
2691 r = fdisk_get_partitions(c, &t);
2692 if (r < 0)
2693 return log_error_errno(r, "Failed to acquire partition table: %m");
2694
2695 n_partitions = fdisk_table_get_nents(t);
2696 for (size_t i = 0; i < n_partitions; i++) {
2697 struct fdisk_partition *p;
2698
2699 p = fdisk_table_get_partition(t, i);
2700 if (!p)
2701 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2702
2703 if (fdisk_partition_is_used(p) <= 0)
2704 continue;
2705 if (fdisk_partition_has_start(p) <= 0 || fdisk_partition_has_size(p) <= 0 || fdisk_partition_has_end(p) <= 0)
2706 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found partition without a size.");
2707
2708 if (fdisk_partition_get_start(p) == partition_offset / 512U &&
2709 fdisk_partition_get_size(p) == old_partition_size / 512U) {
2710
2711 if (found)
2712 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "Partition found twice, refusing.");
2713
2714 found = p;
2715 } else if (fdisk_partition_get_end(p) > partition_offset / 512U)
2716 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't extend, not last partition in image.");
2717 }
2718
2719 if (!found)
2720 return log_error_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to find matching partition to resize.");
2721
2722 *ret_disk_uuid = disk_uuid;
2723 *ret_table = TAKE_PTR(t);
2724 *ret_partition = found;
2725
2726 return 1;
2727 }
2728
ask_cb(struct fdisk_context * c,struct fdisk_ask * ask,void * userdata)2729 static int ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *userdata) {
2730 char *result;
2731
2732 assert(c);
2733
2734 switch (fdisk_ask_get_type(ask)) {
2735
2736 case FDISK_ASKTYPE_STRING:
2737 result = new(char, 37);
2738 if (!result)
2739 return log_oom();
2740
2741 fdisk_ask_string_set_result(ask, sd_id128_to_uuid_string(*(sd_id128_t*) userdata, result));
2742 break;
2743
2744 default:
2745 log_debug("Unexpected question from libfdisk, ignoring.");
2746 }
2747
2748 return 0;
2749 }
2750
apply_resize_partition(int fd,sd_id128_t disk_uuids,struct fdisk_table * t,struct fdisk_partition * p,size_t new_partition_size)2751 static int apply_resize_partition(
2752 int fd,
2753 sd_id128_t disk_uuids,
2754 struct fdisk_table *t,
2755 struct fdisk_partition *p,
2756 size_t new_partition_size) {
2757
2758 _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2759 _cleanup_free_ void *two_zero_lbas = NULL;
2760 _cleanup_free_ char *path = NULL;
2761 ssize_t n;
2762 int r;
2763
2764 assert(fd >= 0);
2765 assert(!t == !p);
2766
2767 if (!t) /* no partition table to apply, exit early */
2768 return 0;
2769
2770 assert(p);
2771
2772 /* Before writing our partition patch the final size in */
2773 r = fdisk_partition_size_explicit(p, 1);
2774 if (r < 0)
2775 return log_error_errno(r, "Failed to enable explicit partition size: %m");
2776
2777 r = fdisk_partition_set_size(p, new_partition_size / 512U);
2778 if (r < 0)
2779 return log_error_errno(r, "Failed to change partition size: %m");
2780
2781 two_zero_lbas = malloc0(1024U);
2782 if (!two_zero_lbas)
2783 return log_oom();
2784
2785 /* libfdisk appears to get confused by the existing PMBR. Let's explicitly flush it out. */
2786 n = pwrite(fd, two_zero_lbas, 1024U, 0);
2787 if (n < 0)
2788 return log_error_errno(errno, "Failed to wipe partition table: %m");
2789 if (n != 1024)
2790 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while wiping partition table.");
2791
2792 c = fdisk_new_context();
2793 if (!c)
2794 return log_oom();
2795
2796 if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
2797 return log_oom();
2798
2799 r = fdisk_assign_device(c, path, 0);
2800 if (r < 0)
2801 return log_error_errno(r, "Failed to open device: %m");
2802
2803 r = fdisk_create_disklabel(c, "gpt");
2804 if (r < 0)
2805 return log_error_errno(r, "Failed to create GPT disk label: %m");
2806
2807 r = fdisk_apply_table(c, t);
2808 if (r < 0)
2809 return log_error_errno(r, "Failed to apply partition table: %m");
2810
2811 r = fdisk_set_ask(c, ask_cb, &disk_uuids);
2812 if (r < 0)
2813 return log_error_errno(r, "Failed to set libfdisk query function: %m");
2814
2815 r = fdisk_set_disklabel_id(c);
2816 if (r < 0)
2817 return log_error_errno(r, "Failed to change disklabel ID: %m");
2818
2819 r = fdisk_write_disklabel(c);
2820 if (r < 0)
2821 return log_error_errno(r, "Failed to write disk label: %m");
2822
2823 return 1;
2824 }
2825
2826 /* Always keep at least 16M free, so that we can safely log in and update the user record while doing so */
2827 #define HOME_MIN_FREE (16U*1024U*1024U)
2828
get_smallest_fs_size(int fd,uint64_t * ret)2829 static int get_smallest_fs_size(int fd, uint64_t *ret) {
2830 uint64_t minsz, needed;
2831 struct statfs sfs;
2832
2833 assert(fd >= 0);
2834 assert(ret);
2835
2836 /* Determines the minimal disk size we might be able to shrink the file system referenced by the fd to. */
2837
2838 if (syncfs(fd) < 0) /* let's sync before we query the size, so that the values returned are accurate */
2839 return log_error_errno(errno, "Failed to synchronize home file system: %m");
2840
2841 if (fstatfs(fd, &sfs) < 0)
2842 return log_error_errno(errno, "Failed to statfs() home file system: %m");
2843
2844 /* Let's determine the minimal file system size of the used fstype */
2845 minsz = minimal_size_by_fs_magic(sfs.f_type);
2846 if (minsz == UINT64_MAX)
2847 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Don't know minimum file system size of file system type '%s' of home directory.", fs_type_to_string(sfs.f_type));
2848
2849 if (minsz < USER_DISK_SIZE_MIN)
2850 minsz = USER_DISK_SIZE_MIN;
2851
2852 if (sfs.f_bfree > sfs.f_blocks)
2853 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Detected amount of free blocks is greater than the total amount of file system blocks. Refusing.");
2854
2855 /* Calculate how much disk space is currently in use. */
2856 needed = sfs.f_blocks - sfs.f_bfree;
2857 if (needed > UINT64_MAX / sfs.f_bsize)
2858 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File system size out of range.");
2859
2860 needed *= sfs.f_bsize;
2861
2862 /* Add some safety margin of free space we'll always keep */
2863 if (needed > UINT64_MAX - HOME_MIN_FREE) /* Check for overflow */
2864 needed = UINT64_MAX;
2865 else
2866 needed += HOME_MIN_FREE;
2867
2868 *ret = DISK_SIZE_ROUND_UP(MAX(needed, minsz));
2869 return 0;
2870 }
2871
get_largest_image_size(int fd,const struct stat * st,uint64_t * ret)2872 static int get_largest_image_size(int fd, const struct stat *st, uint64_t *ret) {
2873 uint64_t used, avail, sum;
2874 struct statfs sfs;
2875 int r;
2876
2877 assert(fd >= 0);
2878 assert(st);
2879 assert(ret);
2880
2881 /* Determines the maximum file size we might be able to grow the image file referenced by the fd to. */
2882
2883 r = stat_verify_regular(st);
2884 if (r < 0)
2885 return log_error_errno(r, "Image file is not a regular file, refusing: %m");
2886
2887 if (syncfs(fd) < 0)
2888 return log_error_errno(errno, "Failed to synchronize file system backing image file: %m");
2889
2890 if (fstatfs(fd, &sfs) < 0)
2891 return log_error_errno(errno, "Failed to statfs() image file: %m");
2892
2893 used = (uint64_t) st->st_blocks * 512;
2894 avail = (uint64_t) sfs.f_bsize * sfs.f_bavail;
2895
2896 if (avail > UINT64_MAX - used)
2897 sum = UINT64_MAX;
2898 else
2899 sum = avail + used;
2900
2901 *ret = DISK_SIZE_ROUND_DOWN(MIN(sum, USER_DISK_SIZE_MAX));
2902 return 0;
2903 }
2904
resize_fs_loop(UserRecord * h,HomeSetup * setup,int resize_type,uint64_t old_fs_size,uint64_t new_fs_size,uint64_t * ret_fs_size)2905 static int resize_fs_loop(
2906 UserRecord *h,
2907 HomeSetup *setup,
2908 int resize_type,
2909 uint64_t old_fs_size,
2910 uint64_t new_fs_size,
2911 uint64_t *ret_fs_size) {
2912
2913 uint64_t current_fs_size;
2914 unsigned n_iterations = 0;
2915 int r;
2916
2917 assert(h);
2918 assert(setup);
2919 assert(setup->root_fd >= 0);
2920
2921 /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
2922 * this only when we *shrink* the fs — if we grow the fs there's no need to bisect.) */
2923
2924 current_fs_size = old_fs_size;
2925 for (uint64_t lower_boundary = new_fs_size, upper_boundary = old_fs_size, try_fs_size = new_fs_size;;) {
2926 bool worked;
2927
2928 n_iterations++;
2929
2930 /* Now resize the file system */
2931 if (resize_type == CAN_RESIZE_ONLINE) {
2932 r = resize_fs(setup->root_fd, try_fs_size, NULL);
2933 if (r < 0) {
2934 if (!ERRNO_IS_DISK_SPACE(r) || new_fs_size > old_fs_size) /* Not a disk space issue? Not trying to shrink? */
2935 return log_error_errno(r, "Failed to resize file system: %m");
2936
2937 log_debug_errno(r, "Shrinking from %s to %s didn't work, not enough space for contained data.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2938 worked = false;
2939 } else {
2940 log_debug("Successfully resized from %s to %s.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2941 current_fs_size = try_fs_size;
2942 worked = true;
2943 }
2944
2945 /* If we hit a disk space issue and are shrinking the fs, then maybe it helps to
2946 * increase the image size. */
2947 } else {
2948 r = ext4_offline_resize_fs(setup, try_fs_size, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
2949 if (r < 0)
2950 return r;
2951
2952 /* For now, when we fail to shrink an ext4 image we'll not try again via the
2953 * bisection logic. We might add that later, but give this involves shelling out
2954 * multiple programs it's a bit too cumbersome to my taste. */
2955
2956 worked = true;
2957 current_fs_size = try_fs_size;
2958 }
2959
2960 if (new_fs_size > old_fs_size) /* If we are growing we are done after one iteration */
2961 break;
2962
2963 /* If we are shrinking then let's adjust our bisection boundaries and try again. */
2964 if (worked)
2965 upper_boundary = MIN(upper_boundary, try_fs_size);
2966 else
2967 lower_boundary = MAX(lower_boundary, try_fs_size);
2968
2969 /* OK, this attempt to shrink didn't work. Let's try between the old size and what worked. */
2970 if (lower_boundary >= upper_boundary) {
2971 log_debug("Image can't be shrunk further (range to try is empty).");
2972 break;
2973 }
2974
2975 /* Let's find a new value to try half-way between the lower boundary and the upper boundary
2976 * to try now. */
2977 try_fs_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
2978 if (try_fs_size <= lower_boundary || try_fs_size >= upper_boundary) {
2979 log_debug("Image can't be shrunk further (remaining range to try too small).");
2980 break;
2981 }
2982 }
2983
2984 log_debug("Bisection loop completed after %u iterations.", n_iterations);
2985
2986 if (ret_fs_size)
2987 *ret_fs_size = current_fs_size;
2988
2989 return 0;
2990 }
2991
resize_image_loop(UserRecord * h,HomeSetup * setup,uint64_t old_image_size,uint64_t new_image_size,uint64_t * ret_image_size)2992 static int resize_image_loop(
2993 UserRecord *h,
2994 HomeSetup *setup,
2995 uint64_t old_image_size,
2996 uint64_t new_image_size,
2997 uint64_t *ret_image_size) {
2998
2999 uint64_t current_image_size;
3000 unsigned n_iterations = 0;
3001 int r;
3002
3003 assert(h);
3004 assert(setup);
3005 assert(setup->image_fd >= 0);
3006
3007 /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
3008 * this only when we *grow* the image — if we shrink the image then there's no need to bisect.) */
3009
3010 current_image_size = old_image_size;
3011 for (uint64_t lower_boundary = old_image_size, upper_boundary = new_image_size, try_image_size = new_image_size;;) {
3012 bool worked;
3013
3014 n_iterations++;
3015
3016 r = home_truncate(h, setup->image_fd, try_image_size);
3017 if (r < 0) {
3018 if (!ERRNO_IS_DISK_SPACE(r) || new_image_size < old_image_size) /* Not a disk space issue? Not trying to grow? */
3019 return r;
3020
3021 log_debug_errno(r, "Growing from %s to %s didn't work, not enough space on backing disk.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3022 worked = false;
3023 } else if (r > 0) { /* Success: allocation worked */
3024 log_debug("Resizing from %s to %s via allocation worked successfully.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3025 current_image_size = try_image_size;
3026 worked = true;
3027 } else { /* Success, but through truncation, not allocation. */
3028 log_debug("Resizing from %s to %s via truncation worked successfully.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(try_image_size));
3029 current_image_size = try_image_size;
3030 break; /* there's no point in the bisection logic if this was plain truncation and
3031 * not allocation, let's exit immediately. */
3032 }
3033
3034 if (new_image_size < old_image_size) /* If we are shrinking we are done after one iteration */
3035 break;
3036
3037 /* If we are growing then let's adjust our bisection boundaries and try again */
3038 if (worked)
3039 lower_boundary = MAX(lower_boundary, try_image_size);
3040 else
3041 upper_boundary = MIN(upper_boundary, try_image_size);
3042
3043 if (lower_boundary >= upper_boundary) {
3044 log_debug("Image can't be grown further (range to try is empty).");
3045 break;
3046 }
3047
3048 try_image_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
3049 if (try_image_size <= lower_boundary || try_image_size >= upper_boundary) {
3050 log_debug("Image can't be grown further (remaining range to try too small).");
3051 break;
3052 }
3053 }
3054
3055 log_debug("Bisection loop completed after %u iterations.", n_iterations);
3056
3057 if (ret_image_size)
3058 *ret_image_size = current_image_size;
3059
3060 return 0;
3061 }
3062
home_resize_luks(UserRecord * h,HomeSetupFlags flags,HomeSetup * setup,PasswordCache * cache,UserRecord ** ret_home)3063 int home_resize_luks(
3064 UserRecord *h,
3065 HomeSetupFlags flags,
3066 HomeSetup *setup,
3067 PasswordCache *cache,
3068 UserRecord **ret_home) {
3069
3070 uint64_t old_image_size, new_image_size, old_fs_size, new_fs_size, crypto_offset, crypto_offset_bytes,
3071 new_partition_size, smallest_fs_size, resized_fs_size;
3072 _cleanup_(user_record_unrefp) UserRecord *header_home = NULL, *embedded_home = NULL, *new_home = NULL;
3073 _cleanup_(fdisk_unref_tablep) struct fdisk_table *table = NULL;
3074 struct fdisk_partition *partition = NULL;
3075 _cleanup_close_ int opened_image_fd = -1;
3076 _cleanup_free_ char *whole_disk = NULL;
3077 int r, resize_type, image_fd = -1;
3078 sd_id128_t disk_uuid;
3079 const char *ip, *ipo;
3080 struct statfs sfs;
3081 struct stat st;
3082 enum {
3083 INTENTION_DONT_KNOW = 0, /* These happen to match the return codes of CMP() */
3084 INTENTION_SHRINK = -1,
3085 INTENTION_GROW = 1,
3086 } intention = INTENTION_DONT_KNOW;
3087
3088 assert(h);
3089 assert(user_record_storage(h) == USER_LUKS);
3090 assert(setup);
3091
3092 r = dlopen_cryptsetup();
3093 if (r < 0)
3094 return r;
3095
3096 assert_se(ipo = user_record_image_path(h));
3097 ip = strdupa_safe(ipo); /* copy out since original might change later in home record object */
3098
3099 if (setup->image_fd < 0) {
3100 setup->image_fd = open_image_file(h, NULL, &st);
3101 if (setup->image_fd < 0)
3102 return setup->image_fd;
3103 } else {
3104 if (fstat(setup->image_fd, &st) < 0)
3105 return log_error_errno(errno, "Failed to stat image file %s: %m", ip);
3106 }
3107
3108 image_fd = setup->image_fd;
3109
3110 if (S_ISBLK(st.st_mode)) {
3111 dev_t parent;
3112
3113 r = block_get_whole_disk(st.st_rdev, &parent);
3114 if (r < 0)
3115 return log_error_errno(r, "Failed to acquire whole block device for %s: %m", ip);
3116 if (r > 0) {
3117 /* If we shall resize a file system on a partition device, then let's figure out the
3118 * whole disk device and operate on that instead, since we need to rewrite the
3119 * partition table to resize the partition. */
3120
3121 log_info("Operating on partition device %s, using parent device.", ip);
3122
3123 r = device_path_make_major_minor(st.st_mode, parent, &whole_disk);
3124 if (r < 0)
3125 return log_error_errno(r, "Failed to derive whole disk path for %s: %m", ip);
3126
3127 opened_image_fd = open(whole_disk, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
3128 if (opened_image_fd < 0)
3129 return log_error_errno(errno, "Failed to open whole block device %s: %m", whole_disk);
3130
3131 image_fd = opened_image_fd;
3132
3133 if (fstat(image_fd, &st) < 0)
3134 return log_error_errno(errno, "Failed to stat whole block device %s: %m", whole_disk);
3135 if (!S_ISBLK(st.st_mode))
3136 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Whole block device %s is not actually a block device, refusing.", whole_disk);
3137 } else
3138 log_info("Operating on whole block device %s.", ip);
3139
3140 if (ioctl(image_fd, BLKGETSIZE64, &old_image_size) < 0)
3141 return log_error_errno(errno, "Failed to determine size of original block device: %m");
3142
3143 if (flock(image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
3144 return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
3145
3146 new_image_size = old_image_size; /* we can't resize physical block devices */
3147 } else {
3148 r = stat_verify_regular(&st);
3149 if (r < 0)
3150 return log_error_errno(r, "Image %s is not a block device nor regular file: %m", ip);
3151
3152 old_image_size = st.st_size;
3153
3154 /* Note an asymetry here: when we operate on loopback files the specified disk size we get we
3155 * apply onto the loopback file as a whole. When we operate on block devices we instead apply
3156 * to the partition itself only. */
3157
3158 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3159 new_image_size = 0;
3160 intention = INTENTION_SHRINK;
3161 } else {
3162 uint64_t new_image_size_rounded;
3163
3164 new_image_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3165
3166 if (old_image_size >= new_image_size_rounded && old_image_size <= h->disk_size) {
3167 /* If exact match, or a match after we rounded down, don't do a thing */
3168 log_info("Image size already matching, skipping operation.");
3169 return 0;
3170 }
3171
3172 new_image_size = new_image_size_rounded;
3173 intention = CMP(new_image_size, old_image_size); /* Is this a shrink */
3174 }
3175 }
3176
3177 r = home_setup_luks(
3178 h,
3179 flags,
3180 whole_disk,
3181 setup,
3182 cache,
3183 FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES) ? NULL : &header_home);
3184 if (r < 0)
3185 return r;
3186
3187 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3188 r = home_load_embedded_identity(h, setup->root_fd, header_home, USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, cache, &embedded_home, &new_home);
3189 if (r < 0)
3190 return r;
3191 }
3192
3193 r = home_maybe_shift_uid(h, flags, setup);
3194 if (r < 0)
3195 return r;
3196
3197 log_info("offset = %" PRIu64 ", size = %" PRIu64 ", image = %" PRIu64, setup->partition_offset, setup->partition_size, old_image_size);
3198
3199 if ((UINT64_MAX - setup->partition_offset) < setup->partition_size ||
3200 setup->partition_offset + setup->partition_size > old_image_size)
3201 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Old partition doesn't fit in backing storage, refusing.");
3202
3203 if (S_ISREG(st.st_mode)) {
3204 uint64_t partition_table_extra, largest_size;
3205
3206 partition_table_extra = old_image_size - setup->partition_size;
3207
3208 r = get_largest_image_size(setup->image_fd, &st, &largest_size);
3209 if (r < 0)
3210 return r;
3211 if (new_image_size > largest_size)
3212 new_image_size = largest_size;
3213
3214 if (new_image_size < partition_table_extra)
3215 new_image_size = partition_table_extra;
3216
3217 new_partition_size = DISK_SIZE_ROUND_DOWN(new_image_size - partition_table_extra);
3218 } else {
3219 assert(S_ISBLK(st.st_mode));
3220
3221 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3222 new_partition_size = 0;
3223 intention = INTENTION_SHRINK;
3224 } else {
3225 uint64_t new_partition_size_rounded;
3226
3227 new_partition_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3228
3229 if (setup->partition_size >= new_partition_size_rounded &&
3230 setup->partition_size <= h->disk_size) {
3231 log_info("Partition size already matching, skipping operation.");
3232 return 0;
3233 }
3234
3235 new_partition_size = new_partition_size_rounded;
3236 intention = CMP(new_partition_size, setup->partition_size);
3237 }
3238 }
3239
3240 if ((UINT64_MAX - setup->partition_offset) < new_partition_size ||
3241 setup->partition_offset + new_partition_size > new_image_size)
3242 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "New partition doesn't fit into backing storage, refusing.");
3243
3244 crypto_offset = sym_crypt_get_data_offset(setup->crypt_device);
3245 if (crypto_offset > UINT64_MAX/512U)
3246 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS2 data offset out of range, refusing.");
3247 crypto_offset_bytes = (uint64_t) crypto_offset * 512U;
3248 if (setup->partition_size <= crypto_offset_bytes)
3249 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Weird, old crypto payload offset doesn't actually fit in partition size?");
3250
3251 /* Make sure at least the LUKS header fit in */
3252 if (new_partition_size <= crypto_offset_bytes) {
3253 uint64_t add;
3254
3255 add = DISK_SIZE_ROUND_UP(crypto_offset_bytes) - new_partition_size;
3256 new_partition_size += add;
3257 if (S_ISREG(st.st_mode))
3258 new_image_size += add;
3259 }
3260
3261 old_fs_size = setup->partition_size - crypto_offset_bytes;
3262 new_fs_size = DISK_SIZE_ROUND_DOWN(new_partition_size - crypto_offset_bytes);
3263
3264 r = get_smallest_fs_size(setup->root_fd, &smallest_fs_size);
3265 if (r < 0)
3266 return r;
3267
3268 if (new_fs_size < smallest_fs_size) {
3269 uint64_t add;
3270
3271 add = DISK_SIZE_ROUND_UP(smallest_fs_size) - new_fs_size;
3272 new_fs_size += add;
3273 new_partition_size += add;
3274 if (S_ISREG(st.st_mode))
3275 new_image_size += add;
3276 }
3277
3278 if (new_fs_size == old_fs_size) {
3279 log_info("New file system size identical to old file system size, skipping operation.");
3280 return 0;
3281 }
3282
3283 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_GROW) && new_fs_size > old_fs_size) {
3284 log_info("New file system size would be larger than old, but shrinking requested, skipping operation.");
3285 return 0;
3286 }
3287
3288 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SHRINK) && new_fs_size < old_fs_size) {
3289 log_info("New file system size would be smaller than old, but growing requested, skipping operation.");
3290 return 0;
3291 }
3292
3293 if (CMP(new_fs_size, old_fs_size) != intention) {
3294 if (intention < 0)
3295 log_info("Shrink operation would enlarge file system, skipping operation.");
3296 else {
3297 assert(intention > 0);
3298 log_info("Grow operation would shrink file system, skipping operation.");
3299 }
3300 return 0;
3301 }
3302
3303 /* Before we start doing anything, let's figure out if we actually can */
3304 resize_type = can_resize_fs(setup->root_fd, old_fs_size, new_fs_size);
3305 if (resize_type < 0)
3306 return resize_type;
3307 if (resize_type == CAN_RESIZE_OFFLINE && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3308 return log_error_errno(SYNTHETIC_ERRNO(ETXTBSY), "File systems of this type can only be resized offline, but is currently online.");
3309
3310 log_info("Ready to resize image size %s → %s, partition size %s → %s, file system size %s → %s.",
3311 FORMAT_BYTES(old_image_size),
3312 FORMAT_BYTES(new_image_size),
3313 FORMAT_BYTES(setup->partition_size),
3314 FORMAT_BYTES(new_partition_size),
3315 FORMAT_BYTES(old_fs_size),
3316 FORMAT_BYTES(new_fs_size));
3317
3318 r = prepare_resize_partition(
3319 image_fd,
3320 setup->partition_offset,
3321 setup->partition_size,
3322 &disk_uuid,
3323 &table,
3324 &partition);
3325 if (r < 0)
3326 return r;
3327
3328 if (new_fs_size > old_fs_size) { /* → Grow */
3329
3330 if (S_ISREG(st.st_mode)) {
3331 uint64_t resized_image_size;
3332
3333 /* Grow file size */
3334 r = resize_image_loop(h, setup, old_image_size, new_image_size, &resized_image_size);
3335 if (r < 0)
3336 return r;
3337
3338 if (resized_image_size == old_image_size) {
3339 log_info("Couldn't change image size.");
3340 return 0;
3341 }
3342
3343 assert(resized_image_size > old_image_size);
3344
3345 log_info("Growing of image file from %s to %s completed.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(resized_image_size));
3346
3347 if (resized_image_size < new_image_size) {
3348 uint64_t sub;
3349
3350 /* If the growing we managed to do is smaller than what we wanted we need to
3351 * adjust the partition/file system sizes we are going for, too */
3352 sub = new_image_size - resized_image_size;
3353 assert(new_partition_size >= sub);
3354 new_partition_size -= sub;
3355 assert(new_fs_size >= sub);
3356 new_fs_size -= sub;
3357 }
3358
3359 new_image_size = resized_image_size;
3360 } else {
3361 assert(S_ISBLK(st.st_mode));
3362 assert(new_image_size == old_image_size);
3363 }
3364
3365 /* Make sure loopback device sees the new bigger size */
3366 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3367 if (r == -ENOTTY)
3368 log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3369 else if (r < 0)
3370 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3371 else
3372 log_info("Refreshing loop device size completed.");
3373
3374 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3375 if (r < 0)
3376 return r;
3377 if (r > 0)
3378 log_info("Growing of partition completed.");
3379
3380 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3381 log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3382
3383 /* Tell LUKS about the new bigger size too */
3384 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512U);
3385 if (r < 0)
3386 return log_error_errno(r, "Failed to grow LUKS device: %m");
3387
3388 log_info("LUKS device growing completed.");
3389 } else {
3390 /* → Shrink */
3391
3392 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3393 r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
3394 if (r < 0)
3395 return r;
3396 }
3397
3398 if (S_ISREG(st.st_mode)) {
3399 if (user_record_luks_discard(h))
3400 /* Before we shrink, let's trim the file system, so that we need less space on disk during the shrinking */
3401 (void) run_fitrim(setup->root_fd);
3402 else {
3403 /* If discard is off, let's ensure all backing blocks are allocated, so that our resize operation doesn't fail half-way */
3404 r = run_fallocate(image_fd, &st);
3405 if (r < 0)
3406 return r;
3407 }
3408 }
3409 }
3410
3411 /* Now try to resize the file system. The requested size might not always be possible, in which case
3412 * we'll try to get as close as we can get. The result is returned in 'resized_fs_size' */
3413 r = resize_fs_loop(h, setup, resize_type, old_fs_size, new_fs_size, &resized_fs_size);
3414 if (r < 0)
3415 return r;
3416
3417 if (resized_fs_size == old_fs_size) {
3418 log_info("Couldn't change file system size.");
3419 return 0;
3420 }
3421
3422 log_info("File system resizing from %s to %s completed.", FORMAT_BYTES(old_fs_size), FORMAT_BYTES(resized_fs_size));
3423
3424 if (resized_fs_size > new_fs_size) {
3425 uint64_t add;
3426
3427 /* If the shrinking we managed to do is larger than what we wanted we need to adjust the partition/image sizes. */
3428 add = resized_fs_size - new_fs_size;
3429 new_partition_size += add;
3430 if (S_ISREG(st.st_mode))
3431 new_image_size += add;
3432 }
3433
3434 new_fs_size = resized_fs_size;
3435
3436 /* Immediately sync afterwards */
3437 r = home_sync_and_statfs(setup->root_fd, NULL);
3438 if (r < 0)
3439 return r;
3440
3441 if (new_fs_size < old_fs_size) { /* → Shrink */
3442
3443 /* Shrink the LUKS device now, matching the new file system size */
3444 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512);
3445 if (r < 0)
3446 return log_error_errno(r, "Failed to shrink LUKS device: %m");
3447
3448 log_info("LUKS device shrinking completed.");
3449
3450 /* Refresh the loop devices size */
3451 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3452 if (r == -ENOTTY)
3453 log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3454 else if (r < 0)
3455 return log_error_errno(r, "Failed to refresh loopback device size: %m");
3456 else
3457 log_info("Refreshing loop device size completed.");
3458
3459 if (S_ISREG(st.st_mode)) {
3460 /* Shrink the image file */
3461 if (ftruncate(image_fd, new_image_size) < 0)
3462 return log_error_errno(errno, "Failed to shrink image file %s: %m", ip);
3463
3464 log_info("Shrinking of image file completed.");
3465 } else {
3466 assert(S_ISBLK(st.st_mode));
3467 assert(new_image_size == old_image_size);
3468 }
3469
3470 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3471 if (r < 0)
3472 return r;
3473 if (r > 0)
3474 log_info("Shrinking of partition completed.");
3475
3476 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3477 log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3478
3479 } else { /* → Grow */
3480 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3481 r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
3482 if (r < 0)
3483 return r;
3484 }
3485 }
3486
3487 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3488 r = home_store_header_identity_luks(new_home, setup, header_home);
3489 if (r < 0)
3490 return r;
3491
3492 r = home_extend_embedded_identity(new_home, h, setup);
3493 if (r < 0)
3494 return r;
3495 }
3496
3497 if (user_record_luks_discard(h))
3498 (void) run_fitrim(setup->root_fd);
3499
3500 r = home_sync_and_statfs(setup->root_fd, &sfs);
3501 if (r < 0)
3502 return r;
3503
3504 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_UNDO)) {
3505 r = home_setup_done(setup);
3506 if (r < 0)
3507 return r;
3508 }
3509
3510 log_info("Resizing completed.");
3511
3512 print_size_summary(new_image_size, new_fs_size, &sfs);
3513
3514 if (ret_home)
3515 *ret_home = TAKE_PTR(new_home);
3516
3517 return 0;
3518 }
3519
home_passwd_luks(UserRecord * h,HomeSetupFlags flags,HomeSetup * setup,const PasswordCache * cache,char ** effective_passwords)3520 int home_passwd_luks(
3521 UserRecord *h,
3522 HomeSetupFlags flags,
3523 HomeSetup *setup,
3524 const PasswordCache *cache, /* the passwords acquired via PKCS#11/FIDO2 security tokens */
3525 char **effective_passwords /* new passwords */) {
3526
3527 size_t volume_key_size, max_key_slots, n_effective;
3528 _cleanup_(erase_and_freep) void *volume_key = NULL;
3529 struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
3530 const char *type;
3531 char **list;
3532 int r;
3533
3534 assert(h);
3535 assert(user_record_storage(h) == USER_LUKS);
3536 assert(setup);
3537
3538 r = dlopen_cryptsetup();
3539 if (r < 0)
3540 return r;
3541
3542 type = sym_crypt_get_type(setup->crypt_device);
3543 if (!type)
3544 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine crypto device type.");
3545
3546 r = sym_crypt_keyslot_max(type);
3547 if (r <= 0)
3548 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine number of key slots.");
3549 max_key_slots = r;
3550
3551 r = sym_crypt_get_volume_key_size(setup->crypt_device);
3552 if (r <= 0)
3553 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine volume key size.");
3554 volume_key_size = (size_t) r;
3555
3556 volume_key = malloc(volume_key_size);
3557 if (!volume_key)
3558 return log_oom();
3559
3560 r = -ENOKEY;
3561 FOREACH_POINTER(list,
3562 cache ? cache->keyring_passswords : NULL,
3563 cache ? cache->pkcs11_passwords : NULL,
3564 cache ? cache->fido2_passwords : NULL,
3565 h->password) {
3566
3567 r = luks_try_passwords(h, setup->crypt_device, list, volume_key, &volume_key_size, NULL);
3568 if (r != -ENOKEY)
3569 break;
3570 }
3571 if (r == -ENOKEY)
3572 return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Failed to unlock LUKS superblock with supplied passwords.");
3573 if (r < 0)
3574 return log_error_errno(r, "Failed to unlocks LUKS superblock: %m");
3575
3576 n_effective = strv_length(effective_passwords);
3577
3578 build_good_pbkdf(&good_pbkdf, h);
3579 build_minimal_pbkdf(&minimal_pbkdf, h);
3580
3581 for (size_t i = 0; i < max_key_slots; i++) {
3582 r = sym_crypt_keyslot_destroy(setup->crypt_device, i);
3583 if (r < 0 && !IN_SET(r, -ENOENT, -EINVAL)) /* Returns EINVAL or ENOENT if there's no key in this slot already */
3584 return log_error_errno(r, "Failed to destroy LUKS password: %m");
3585
3586 if (i >= n_effective) {
3587 if (r >= 0)
3588 log_info("Destroyed LUKS key slot %zu.", i);
3589 continue;
3590 }
3591
3592 if (password_cache_contains(cache, effective_passwords[i])) { /* Is this a FIDO2 or PKCS#11 password? */
3593 log_debug("Using minimal PBKDF for slot %zu", i);
3594 r = sym_crypt_set_pbkdf_type(setup->crypt_device, &minimal_pbkdf);
3595 } else {
3596 log_debug("Using good PBKDF for slot %zu", i);
3597 r = sym_crypt_set_pbkdf_type(setup->crypt_device, &good_pbkdf);
3598 }
3599 if (r < 0)
3600 return log_error_errno(r, "Failed to tweak PBKDF for slot %zu: %m", i);
3601
3602 r = sym_crypt_keyslot_add_by_volume_key(
3603 setup->crypt_device,
3604 i,
3605 volume_key,
3606 volume_key_size,
3607 effective_passwords[i],
3608 strlen(effective_passwords[i]));
3609 if (r < 0)
3610 return log_error_errno(r, "Failed to set up LUKS password: %m");
3611
3612 log_info("Updated LUKS key slot %zu.", i);
3613
3614 /* If we changed the password, then make sure to update the copy in the keyring, so that
3615 * auto-rebalance continues to work. We only do this if we operate on an active home dir. */
3616 if (i == 0 && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3617 upload_to_keyring(h, effective_passwords[i], NULL);
3618 }
3619
3620 return 1;
3621 }
3622
home_lock_luks(UserRecord * h,HomeSetup * setup)3623 int home_lock_luks(UserRecord *h, HomeSetup *setup) {
3624 const char *p;
3625 int r;
3626
3627 assert(h);
3628 assert(setup);
3629 assert(setup->root_fd < 0);
3630 assert(!setup->crypt_device);
3631
3632 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3633 if (r < 0)
3634 return r;
3635
3636 log_info("Discovered used LUKS device %s.", setup->dm_node);
3637
3638 assert_se(p = user_record_home_directory(h));
3639 r = syncfs_path(AT_FDCWD, p);
3640 if (r < 0) /* Snake oil, but let's better be safe than sorry */
3641 return log_error_errno(r, "Failed to synchronize file system %s: %m", p);
3642
3643 log_info("File system synchronized.");
3644
3645 /* Note that we don't invoke FIFREEZE here, it appears libcryptsetup/device-mapper already does that on its own for us */
3646
3647 r = sym_crypt_suspend(setup->crypt_device, setup->dm_name);
3648 if (r < 0)
3649 return log_error_errno(r, "Failed to suspend cryptsetup device: %s: %m", setup->dm_node);
3650
3651 log_info("LUKS device suspended.");
3652 return 0;
3653 }
3654
luks_try_resume(struct crypt_device * cd,const char * dm_name,char ** password)3655 static int luks_try_resume(
3656 struct crypt_device *cd,
3657 const char *dm_name,
3658 char **password) {
3659
3660 int r;
3661
3662 assert(cd);
3663 assert(dm_name);
3664
3665 STRV_FOREACH(pp, password) {
3666 r = sym_crypt_resume_by_passphrase(
3667 cd,
3668 dm_name,
3669 CRYPT_ANY_SLOT,
3670 *pp,
3671 strlen(*pp));
3672 if (r >= 0) {
3673 log_info("Resumed LUKS device %s.", dm_name);
3674 return 0;
3675 }
3676
3677 log_debug_errno(r, "Password %zu didn't work for resuming device: %m", (size_t) (pp - password));
3678 }
3679
3680 return -ENOKEY;
3681 }
3682
home_unlock_luks(UserRecord * h,HomeSetup * setup,const PasswordCache * cache)3683 int home_unlock_luks(UserRecord *h, HomeSetup *setup, const PasswordCache *cache) {
3684 char **list;
3685 int r;
3686
3687 assert(h);
3688 assert(setup);
3689 assert(!setup->crypt_device);
3690
3691 r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3692 if (r < 0)
3693 return r;
3694
3695 log_info("Discovered used LUKS device %s.", setup->dm_node);
3696
3697 r = -ENOKEY;
3698 FOREACH_POINTER(list,
3699 cache ? cache->pkcs11_passwords : NULL,
3700 cache ? cache->fido2_passwords : NULL,
3701 h->password) {
3702 r = luks_try_resume(setup->crypt_device, setup->dm_name, list);
3703 if (r != -ENOKEY)
3704 break;
3705 }
3706 if (r == -ENOKEY)
3707 return log_error_errno(r, "No valid password for LUKS superblock.");
3708 if (r < 0)
3709 return log_error_errno(r, "Failed to resume LUKS superblock: %m");
3710
3711 log_info("LUKS device resumed.");
3712 return 0;
3713 }
3714
device_is_gone(HomeSetup * setup)3715 static int device_is_gone(HomeSetup *setup) {
3716 _cleanup_(sd_device_unrefp) sd_device *d = NULL;
3717 struct stat st;
3718 int r;
3719
3720 assert(setup);
3721
3722 if (!setup->dm_node)
3723 return true;
3724
3725 if (stat(setup->dm_node, &st) < 0) {
3726 if (errno != ENOENT)
3727 return log_error_errno(errno, "Failed to stat block device node %s: %m", setup->dm_node);
3728
3729 return true;
3730 }
3731
3732 r = sd_device_new_from_stat_rdev(&d, &st);
3733 if (r < 0) {
3734 if (r != -ENODEV)
3735 return log_error_errno(errno, "Failed to allocate device object from block device node %s: %m", setup->dm_node);
3736
3737 return true;
3738 }
3739
3740 return false;
3741 }
3742
device_monitor_handler(sd_device_monitor * monitor,sd_device * device,void * userdata)3743 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
3744 HomeSetup *setup = userdata;
3745 int r;
3746
3747 assert(setup);
3748
3749 if (!device_for_action(device, SD_DEVICE_REMOVE))
3750 return 0;
3751
3752 /* We don't really care for the device object passed to us, we just check if the device node still
3753 * exists */
3754
3755 r = device_is_gone(setup);
3756 if (r < 0)
3757 return r;
3758 if (r > 0) /* Yay! we are done! */
3759 (void) sd_event_exit(sd_device_monitor_get_event(monitor), 0);
3760
3761 return 0;
3762 }
3763
wait_for_block_device_gone(HomeSetup * setup,usec_t timeout_usec)3764 int wait_for_block_device_gone(HomeSetup *setup, usec_t timeout_usec) {
3765 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
3766 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
3767 int r;
3768
3769 assert(setup);
3770
3771 /* So here's the thing: we enable "deferred deactivation" on our dm-crypt volumes. This means they
3772 * are automatically torn down once not used anymore (i.e. once unmounted). Which is great. It also
3773 * means that when we deactivate a home directory and try to tear down the volume that backs it, it
3774 * possibly is already torn down or in the process of being torn down, since we race against the
3775 * automatic tearing down. Which is fine, we handle errors from that. However, we lose the ability to
3776 * naturally wait for the tear down operation to complete: if we are not the ones who tear down the
3777 * device we are also not the ones who naturally block on that operation. Hence let's add some code
3778 * to actively wait for the device to go away, via sd-device. We'll call this whenever tearing down a
3779 * LUKS device, to ensure the device is really really gone before we proceed. Net effect: "homectl
3780 * deactivate foo && homectl activate foo" will work reliably, i.e. deactivation immediately followed
3781 * by activation will work. Also, by the time deactivation completes we can guarantee that all data
3782 * is sync'ed down to the lowest block layer as all higher levels are fully and entirely
3783 * destructed. */
3784
3785 if (!setup->dm_name)
3786 return 0;
3787
3788 assert(setup->dm_node);
3789 log_debug("Waiting until %s disappears.", setup->dm_node);
3790
3791 r = sd_event_new(&event);
3792 if (r < 0)
3793 return log_error_errno(r, "Failed to allocate event loop: %m");
3794
3795 r = sd_device_monitor_new(&m);
3796 if (r < 0)
3797 return log_error_errno(r, "Failed to allocate device monitor: %m");
3798
3799 r = sd_device_monitor_filter_add_match_subsystem_devtype(m, "block", "disk");
3800 if (r < 0)
3801 return log_error_errno(r, "Failed to configure device monitor match: %m");
3802
3803 r = sd_device_monitor_attach_event(m, event);
3804 if (r < 0)
3805 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
3806
3807 r = sd_device_monitor_start(m, device_monitor_handler, setup);
3808 if (r < 0)
3809 return log_error_errno(r, "Failed to start device monitor: %m");
3810
3811 r = device_is_gone(setup);
3812 if (r < 0)
3813 return r;
3814 if (r > 0) {
3815 log_debug("%s has already disappeared before entering wait loop.", setup->dm_node);
3816 return 0; /* gone already */
3817 }
3818
3819 if (timeout_usec != USEC_INFINITY) {
3820 r = sd_event_add_time_relative(event, NULL, CLOCK_MONOTONIC, timeout_usec, 0, NULL, NULL);
3821 if (r < 0)
3822 return log_error_errno(r, "Failed to add timer event: %m");
3823 }
3824
3825 r = sd_event_loop(event);
3826 if (r < 0)
3827 return log_error_errno(r, "Failed to run event loop: %m");
3828
3829 r = device_is_gone(setup);
3830 if (r < 0)
3831 return r;
3832 if (r == 0)
3833 return log_error_errno(r, "Device %s still around.", setup->dm_node);
3834
3835 log_debug("Successfully waited until device %s disappeared.", setup->dm_node);
3836 return 0;
3837 }
3838
home_auto_shrink_luks(UserRecord * h,HomeSetup * setup,PasswordCache * cache)3839 int home_auto_shrink_luks(UserRecord *h, HomeSetup *setup, PasswordCache *cache) {
3840 struct statfs sfs;
3841 int r;
3842
3843 assert(h);
3844 assert(user_record_storage(h) == USER_LUKS);
3845 assert(setup);
3846 assert(setup->root_fd >= 0);
3847
3848 if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW)
3849 return 0;
3850
3851 if (fstatfs(setup->root_fd, &sfs) < 0)
3852 return log_error_errno(errno, "Failed to statfs home directory: %m");
3853
3854 if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
3855 log_debug("Not auto-shrinking file system, since selected file system cannot do both online shrink and grow.");
3856 return 0;
3857 }
3858
3859 r = home_resize_luks(
3860 h,
3861 HOME_SETUP_ALREADY_ACTIVATED|
3862 HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
3863 HOME_SETUP_RESIZE_MINIMIZE|
3864 HOME_SETUP_RESIZE_DONT_GROW|
3865 HOME_SETUP_RESIZE_DONT_UNDO,
3866 setup,
3867 cache,
3868 NULL);
3869 if (r < 0)
3870 return r;
3871
3872 return 1;
3873 }
3874