1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <linux/loop.h>
4 #include <poll.h>
5 #include <sys/file.h>
6 #include <sys/ioctl.h>
7 #include <sys/mount.h>
8 #include <sys/xattr.h>
9 
10 #if HAVE_VALGRIND_MEMCHECK_H
11 #include <valgrind/memcheck.h>
12 #endif
13 
14 #include "sd-daemon.h"
15 #include "sd-device.h"
16 #include "sd-event.h"
17 
18 #include "blkid-util.h"
19 #include "blockdev-util.h"
20 #include "btrfs-util.h"
21 #include "chattr-util.h"
22 #include "devnum-util.h"
23 #include "dm-util.h"
24 #include "env-util.h"
25 #include "errno-util.h"
26 #include "fd-util.h"
27 #include "fdisk-util.h"
28 #include "fileio.h"
29 #include "filesystems.h"
30 #include "fs-util.h"
31 #include "fsck-util.h"
32 #include "gpt.h"
33 #include "home-util.h"
34 #include "homework-luks.h"
35 #include "homework-mount.h"
36 #include "id128-util.h"
37 #include "io-util.h"
38 #include "keyring-util.h"
39 #include "memory-util.h"
40 #include "missing_magic.h"
41 #include "mkdir.h"
42 #include "mkfs-util.h"
43 #include "mount-util.h"
44 #include "openssl-util.h"
45 #include "parse-util.h"
46 #include "path-util.h"
47 #include "process-util.h"
48 #include "random-util.h"
49 #include "resize-fs.h"
50 #include "strv.h"
51 #include "sync-util.h"
52 #include "tmpfile-util.h"
53 #include "udev-util.h"
54 #include "user-util.h"
55 
56 /* Round down to the nearest 4K size. Given that newer hardware generally prefers 4K sectors, let's align our
57  * partitions to that too. In the worst case we'll waste 3.5K per partition that way, but I think I can live
58  * with that. */
59 #define DISK_SIZE_ROUND_DOWN(x) ((x) & ~UINT64_C(4095))
60 
61 /* Rounds up to the nearest 4K boundary. Returns UINT64_MAX on overflow */
62 #define DISK_SIZE_ROUND_UP(x)                                           \
63         ({                                                              \
64                 uint64_t _x = (x);                                      \
65                 _x > UINT64_MAX - 4095U ? UINT64_MAX : (_x + 4095U) & ~UINT64_C(4095); \
66         })
67 
68 /* How much larger will the image on disk be than the fs inside it, i.e. the space we pay for the GPT and
69  * LUKS2 envelope. (As measured on cryptsetup 2.4.1) */
70 #define GPT_LUKS2_OVERHEAD UINT64_C(18874368)
71 
72 static int resize_image_loop(UserRecord *h, HomeSetup *setup, uint64_t old_image_size, uint64_t new_image_size, uint64_t *ret_image_size);
73 
run_mark_dirty(int fd,bool b)74 int run_mark_dirty(int fd, bool b) {
75         char x = '1';
76         int r, ret;
77 
78         /* Sets or removes the 'user.home-dirty' xattr on the specified file. We use this to detect when a
79          * home directory was not properly unmounted. */
80 
81         assert(fd >= 0);
82 
83         r = fd_verify_regular(fd);
84         if (r < 0)
85                 return r;
86 
87         if (b) {
88                 ret = fsetxattr(fd, "user.home-dirty", &x, 1, XATTR_CREATE);
89                 if (ret < 0 && errno != EEXIST)
90                         return log_debug_errno(errno, "Could not mark home directory as dirty: %m");
91 
92         } else {
93                 r = fsync_full(fd);
94                 if (r < 0)
95                         return log_debug_errno(r, "Failed to synchronize image before marking it clean: %m");
96 
97                 ret = fremovexattr(fd, "user.home-dirty");
98                 if (ret < 0 && errno != ENODATA)
99                         return log_debug_errno(errno, "Could not mark home directory as clean: %m");
100         }
101 
102         r = fsync_full(fd);
103         if (r < 0)
104                 return log_debug_errno(r, "Failed to synchronize dirty flag to disk: %m");
105 
106         return ret >= 0;
107 }
108 
run_mark_dirty_by_path(const char * path,bool b)109 int run_mark_dirty_by_path(const char *path, bool b) {
110         _cleanup_close_ int fd = -1;
111 
112         assert(path);
113 
114         fd = open(path, O_RDWR|O_CLOEXEC|O_NOCTTY);
115         if (fd < 0)
116                 return log_debug_errno(errno, "Failed to open %s to mark dirty or clean: %m", path);
117 
118         return run_mark_dirty(fd, b);
119 }
120 
probe_file_system_by_fd(int fd,char ** ret_fstype,sd_id128_t * ret_uuid)121 static int probe_file_system_by_fd(
122                 int fd,
123                 char **ret_fstype,
124                 sd_id128_t *ret_uuid) {
125 
126         _cleanup_(blkid_free_probep) blkid_probe b = NULL;
127         _cleanup_free_ char *s = NULL;
128         const char *fstype = NULL, *uuid = NULL;
129         sd_id128_t id;
130         int r;
131 
132         assert(fd >= 0);
133         assert(ret_fstype);
134         assert(ret_uuid);
135 
136         b = blkid_new_probe();
137         if (!b)
138                 return -ENOMEM;
139 
140         errno = 0;
141         r = blkid_probe_set_device(b, fd, 0, 0);
142         if (r != 0)
143                 return errno > 0 ? -errno : -ENOMEM;
144 
145         (void) blkid_probe_enable_superblocks(b, 1);
146         (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_UUID);
147 
148         errno = 0;
149         r = blkid_do_safeprobe(b);
150         if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */
151                 return -ENOPKG;
152         if (r != 0)
153                 return errno > 0 ? -errno : -EIO;
154 
155         (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
156         if (!fstype)
157                 return -ENOPKG;
158 
159         (void) blkid_probe_lookup_value(b, "UUID", &uuid, NULL);
160         if (!uuid)
161                 return -ENOPKG;
162 
163         r = sd_id128_from_string(uuid, &id);
164         if (r < 0)
165                 return r;
166 
167         s = strdup(fstype);
168         if (!s)
169                 return -ENOMEM;
170 
171         *ret_fstype = TAKE_PTR(s);
172         *ret_uuid = id;
173 
174         return 0;
175 }
176 
probe_file_system_by_path(const char * path,char ** ret_fstype,sd_id128_t * ret_uuid)177 static int probe_file_system_by_path(const char *path, char **ret_fstype, sd_id128_t *ret_uuid) {
178         _cleanup_close_ int fd = -1;
179 
180         fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
181         if (fd < 0)
182                 return -errno;
183 
184         return probe_file_system_by_fd(fd, ret_fstype, ret_uuid);
185 }
186 
block_get_size_by_fd(int fd,uint64_t * ret)187 static int block_get_size_by_fd(int fd, uint64_t *ret) {
188         struct stat st;
189 
190         assert(fd >= 0);
191         assert(ret);
192 
193         if (fstat(fd, &st) < 0)
194                 return -errno;
195 
196         if (!S_ISBLK(st.st_mode))
197                 return -ENOTBLK;
198 
199         return RET_NERRNO(ioctl(fd, BLKGETSIZE64, ret));
200 }
201 
block_get_size_by_path(const char * path,uint64_t * ret)202 static int block_get_size_by_path(const char *path, uint64_t *ret) {
203         _cleanup_close_ int fd = -1;
204 
205         fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
206         if (fd < 0)
207                 return -errno;
208 
209         return block_get_size_by_fd(fd, ret);
210 }
211 
run_fsck(const char * node,const char * fstype)212 static int run_fsck(const char *node, const char *fstype) {
213         int r, exit_status;
214         pid_t fsck_pid;
215 
216         assert(node);
217         assert(fstype);
218 
219         r = fsck_exists(fstype);
220         if (r < 0)
221                 return log_error_errno(r, "Failed to check if fsck for file system %s exists: %m", fstype);
222         if (r == 0) {
223                 log_warning("No fsck for file system %s installed, ignoring.", fstype);
224                 return 0;
225         }
226 
227         r = safe_fork("(fsck)",
228                       FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
229                       &fsck_pid);
230         if (r < 0)
231                 return r;
232         if (r == 0) {
233                 /* Child */
234                 execl("/sbin/fsck", "/sbin/fsck", "-aTl", node, NULL);
235                 log_open();
236                 log_error_errno(errno, "Failed to execute fsck: %m");
237                 _exit(FSCK_OPERATIONAL_ERROR);
238         }
239 
240         exit_status = wait_for_terminate_and_check("fsck", fsck_pid, WAIT_LOG_ABNORMAL);
241         if (exit_status < 0)
242                 return exit_status;
243         if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
244                 log_warning("fsck failed with exit status %i.", exit_status);
245 
246                 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
247                         return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
248 
249                 log_warning("Ignoring fsck error.");
250         }
251 
252         log_info("File system check completed.");
253 
254         return 1;
255 }
256 
257 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(key_serial_t, keyring_unlink, -1);
258 
upload_to_keyring(UserRecord * h,const char * password,key_serial_t * ret_key_serial)259 static int upload_to_keyring(
260                 UserRecord *h,
261                 const char *password,
262                 key_serial_t *ret_key_serial) {
263 
264         _cleanup_free_ char *name = NULL;
265         key_serial_t serial;
266 
267         assert(h);
268         assert(password);
269 
270         /* If auto-shrink-on-logout is turned on, we need to keep the key we used to unlock the LUKS volume
271          * around, since we'll need it when automatically resizing (since we can't ask the user there
272          * again). We do this by uploading it into the kernel keyring, specifically the "session" one. This
273          * is done under the assumption systemd-homed gets its private per-session keyring (i.e. default
274          * service behaviour, given that KeyringMode=private is the default). It will survive between our
275          * systemd-homework invocations that way.
276          *
277          * If auto-shrink-on-logout is disabled we'll skip this step, to be frugal with sensitive data. */
278 
279         if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW) {  /* Won't need it */
280                 if (ret_key_serial)
281                         *ret_key_serial = -1;
282                 return 0;
283         }
284 
285         name = strjoin("homework-user-", h->user_name);
286         if (!name)
287                 return -ENOMEM;
288 
289         serial = add_key("user", name, password, strlen(password), KEY_SPEC_SESSION_KEYRING);
290         if (serial == -1)
291                 return -errno;
292 
293         if (ret_key_serial)
294                 *ret_key_serial = serial;
295 
296         return 1;
297 }
298 
luks_try_passwords(UserRecord * h,struct crypt_device * cd,char ** passwords,void * volume_key,size_t * volume_key_size,key_serial_t * ret_key_serial)299 static int luks_try_passwords(
300                 UserRecord *h,
301                 struct crypt_device *cd,
302                 char **passwords,
303                 void *volume_key,
304                 size_t *volume_key_size,
305                 key_serial_t *ret_key_serial) {
306 
307         int r;
308 
309         assert(h);
310         assert(cd);
311 
312         STRV_FOREACH(pp, passwords) {
313                 size_t vks = *volume_key_size;
314 
315                 r = sym_crypt_volume_key_get(
316                                 cd,
317                                 CRYPT_ANY_SLOT,
318                                 volume_key,
319                                 &vks,
320                                 *pp,
321                                 strlen(*pp));
322                 if (r >= 0) {
323                         if (ret_key_serial) {
324                                 /* If ret_key_serial is non-NULL, let's try to upload the password that
325                                  * worked, and return its serial. */
326                                 r = upload_to_keyring(h, *pp, ret_key_serial);
327                                 if (r < 0) {
328                                         log_debug_errno(r, "Failed to upload LUKS password to kernel keyring, ignoring: %m");
329                                         *ret_key_serial = -1;
330                                 }
331                         }
332 
333                         *volume_key_size = vks;
334                         return 0;
335                 }
336 
337                 log_debug_errno(r, "Password %zu didn't work for unlocking LUKS superblock: %m", (size_t) (pp - passwords));
338         }
339 
340         return -ENOKEY;
341 }
342 
luks_setup(UserRecord * h,const char * node,const char * dm_name,sd_id128_t uuid,const char * cipher,const char * cipher_mode,uint64_t volume_key_size,char ** passwords,const PasswordCache * cache,bool discard,struct crypt_device ** ret,sd_id128_t * ret_found_uuid,void ** ret_volume_key,size_t * ret_volume_key_size,key_serial_t * ret_key_serial)343 static int luks_setup(
344                 UserRecord *h,
345                 const char *node,
346                 const char *dm_name,
347                 sd_id128_t uuid,
348                 const char *cipher,
349                 const char *cipher_mode,
350                 uint64_t volume_key_size,
351                 char **passwords,
352                 const PasswordCache *cache,
353                 bool discard,
354                 struct crypt_device **ret,
355                 sd_id128_t *ret_found_uuid,
356                 void **ret_volume_key,
357                 size_t *ret_volume_key_size,
358                 key_serial_t *ret_key_serial) {
359 
360         _cleanup_(keyring_unlinkp) key_serial_t key_serial = -1;
361         _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
362         _cleanup_(erase_and_freep) void *vk = NULL;
363         sd_id128_t p;
364         size_t vks;
365         char **list;
366         int r;
367 
368         assert(h);
369         assert(node);
370         assert(dm_name);
371         assert(ret);
372 
373         r = sym_crypt_init(&cd, node);
374         if (r < 0)
375                 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
376 
377         cryptsetup_enable_logging(cd);
378 
379         r = sym_crypt_load(cd, CRYPT_LUKS2, NULL);
380         if (r < 0)
381                 return log_error_errno(r, "Failed to load LUKS superblock: %m");
382 
383         r = sym_crypt_get_volume_key_size(cd);
384         if (r <= 0)
385                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
386         vks = (size_t) r;
387 
388         if (!sd_id128_is_null(uuid) || ret_found_uuid) {
389                 const char *s;
390 
391                 s = sym_crypt_get_uuid(cd);
392                 if (!s)
393                         return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
394 
395                 r = sd_id128_from_string(s, &p);
396                 if (r < 0)
397                         return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
398 
399                 /* Check that the UUID matches, if specified */
400                 if (!sd_id128_is_null(uuid) &&
401                     !sd_id128_equal(uuid, p))
402                         return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has wrong UUID.");
403         }
404 
405         if (cipher && !streq_ptr(cipher, sym_crypt_get_cipher(cd)))
406                 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher.");
407 
408         if (cipher_mode && !streq_ptr(cipher_mode, sym_crypt_get_cipher_mode(cd)))
409                 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong cipher mode.");
410 
411         if (volume_key_size != UINT64_MAX && vks != volume_key_size)
412                 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock declares wrong volume key size.");
413 
414         vk = malloc(vks);
415         if (!vk)
416                 return log_oom();
417 
418         r = -ENOKEY;
419         FOREACH_POINTER(list,
420                         cache ? cache->keyring_passswords : NULL,
421                         cache ? cache->pkcs11_passwords : NULL,
422                         cache ? cache->fido2_passwords : NULL,
423                         passwords) {
424                 r = luks_try_passwords(h, cd, list, vk, &vks, ret_key_serial ? &key_serial : NULL);
425                 if (r != -ENOKEY)
426                         break;
427         }
428         if (r == -ENOKEY)
429                 return log_error_errno(r, "No valid password for LUKS superblock.");
430         if (r < 0)
431                 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
432 
433         r = sym_crypt_activate_by_volume_key(
434                         cd,
435                         dm_name,
436                         vk, vks,
437                         discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
438         if (r < 0)
439                 return log_error_errno(r, "Failed to unlock LUKS superblock: %m");
440 
441         log_info("Setting up LUKS device /dev/mapper/%s completed.", dm_name);
442 
443         *ret = TAKE_PTR(cd);
444 
445         if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
446                 *ret_found_uuid = p;
447         if (ret_volume_key)
448                 *ret_volume_key = TAKE_PTR(vk);
449         if (ret_volume_key_size)
450                 *ret_volume_key_size = vks;
451         if (ret_key_serial)
452                 *ret_key_serial = TAKE_KEY_SERIAL(key_serial);
453 
454         return 0;
455 }
456 
make_dm_names(UserRecord * h,HomeSetup * setup)457 static int make_dm_names(UserRecord *h, HomeSetup *setup) {
458         assert(h);
459         assert(h->user_name);
460         assert(setup);
461 
462         if (!setup->dm_name) {
463                 setup->dm_name = strjoin("home-", h->user_name);
464                 if (!setup->dm_name)
465                         return log_oom();
466         }
467 
468         if (!setup->dm_node) {
469                 setup->dm_node = path_join("/dev/mapper/", setup->dm_name);
470                 if (!setup->dm_node)
471                         return log_oom();
472         }
473 
474         return 0;
475 }
476 
acquire_open_luks_device(UserRecord * h,HomeSetup * setup,bool graceful)477 static int acquire_open_luks_device(
478                 UserRecord *h,
479                 HomeSetup *setup,
480                 bool graceful) {
481 
482         _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
483         int r;
484 
485         assert(h);
486         assert(setup);
487         assert(!setup->crypt_device);
488 
489         r = dlopen_cryptsetup();
490         if (r < 0)
491                 return r;
492 
493         r = make_dm_names(h, setup);
494         if (r < 0)
495                 return r;
496 
497         r = sym_crypt_init_by_name(&cd, setup->dm_name);
498         if ((ERRNO_IS_DEVICE_ABSENT(r) || r == -EINVAL) && graceful)
499                 return 0;
500         if (r < 0)
501                 return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
502 
503         cryptsetup_enable_logging(cd);
504 
505         setup->crypt_device = TAKE_PTR(cd);
506         return 1;
507 }
508 
luks_open(UserRecord * h,HomeSetup * setup,const PasswordCache * cache,sd_id128_t * ret_found_uuid,void ** ret_volume_key,size_t * ret_volume_key_size)509 static int luks_open(
510                 UserRecord *h,
511                 HomeSetup *setup,
512                 const PasswordCache *cache,
513                 sd_id128_t *ret_found_uuid,
514                 void **ret_volume_key,
515                 size_t *ret_volume_key_size) {
516 
517         _cleanup_(erase_and_freep) void *vk = NULL;
518         sd_id128_t p;
519         char **list;
520         size_t vks;
521         int r;
522 
523         assert(h);
524         assert(setup);
525         assert(!setup->crypt_device);
526 
527         /* Opens a LUKS device that is already set up. Re-validates the password while doing so (which also
528          * provides us with the volume key, which we want). */
529 
530         r = acquire_open_luks_device(h, setup, /* graceful= */ false);
531         if (r < 0)
532                 return r;
533 
534         r = sym_crypt_load(setup->crypt_device, CRYPT_LUKS2, NULL);
535         if (r < 0)
536                 return log_error_errno(r, "Failed to load LUKS superblock: %m");
537 
538         r = sym_crypt_get_volume_key_size(setup->crypt_device);
539         if (r <= 0)
540                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine LUKS volume key size");
541         vks = (size_t) r;
542 
543         if (ret_found_uuid) {
544                 const char *s;
545 
546                 s = sym_crypt_get_uuid(setup->crypt_device);
547                 if (!s)
548                         return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has no UUID.");
549 
550                 r = sd_id128_from_string(s, &p);
551                 if (r < 0)
552                         return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "LUKS superblock has invalid UUID.");
553         }
554 
555         vk = malloc(vks);
556         if (!vk)
557                 return log_oom();
558 
559         r = -ENOKEY;
560         FOREACH_POINTER(list,
561                         cache ? cache->keyring_passswords : NULL,
562                         cache ? cache->pkcs11_passwords : NULL,
563                         cache ? cache->fido2_passwords : NULL,
564                         h->password) {
565                 r = luks_try_passwords(h, setup->crypt_device, list, vk, &vks, NULL);
566                 if (r != -ENOKEY)
567                         break;
568         }
569         if (r == -ENOKEY)
570                 return log_error_errno(r, "No valid password for LUKS superblock.");
571         if (r < 0)
572                 return log_error_errno(r, "Failed to unlocks LUKS superblock: %m");
573 
574         log_info("Discovered used LUKS device /dev/mapper/%s, and validated password.", setup->dm_name);
575 
576         /* This is needed so that crypt_resize() can operate correctly for pre-existing LUKS devices. We need
577          * to tell libcryptsetup the volume key explicitly, so that it is in the kernel keyring. */
578         r = sym_crypt_activate_by_volume_key(setup->crypt_device, NULL, vk, vks, CRYPT_ACTIVATE_KEYRING_KEY);
579         if (r < 0)
580                 return log_error_errno(r, "Failed to upload volume key again: %m");
581 
582         log_info("Successfully re-activated LUKS device.");
583 
584         if (ret_found_uuid)
585                 *ret_found_uuid = p;
586         if (ret_volume_key)
587                 *ret_volume_key = TAKE_PTR(vk);
588         if (ret_volume_key_size)
589                 *ret_volume_key_size = vks;
590 
591         return 0;
592 }
593 
fs_validate(const char * dm_node,sd_id128_t uuid,char ** ret_fstype,sd_id128_t * ret_found_uuid)594 static int fs_validate(
595                 const char *dm_node,
596                 sd_id128_t uuid,
597                 char **ret_fstype,
598                 sd_id128_t *ret_found_uuid) {
599 
600         _cleanup_free_ char *fstype = NULL;
601         sd_id128_t u;
602         int r;
603 
604         assert(dm_node);
605         assert(ret_fstype);
606 
607         r = probe_file_system_by_path(dm_node, &fstype, &u);
608         if (r < 0)
609                 return log_error_errno(r, "Failed to probe file system: %m");
610 
611         /* Limit the set of supported file systems a bit, as protection against little tested kernel file
612          * systems. Also, we only support the resize ioctls for these file systems. */
613         if (!supported_fstype(fstype))
614                 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Image contains unsupported file system: %s", strna(fstype));
615 
616         if (!sd_id128_is_null(uuid) &&
617             !sd_id128_equal(uuid, u))
618                 return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), "File system has wrong UUID.");
619 
620         log_info("Probing file system completed (found %s).", fstype);
621 
622         *ret_fstype = TAKE_PTR(fstype);
623 
624         if (ret_found_uuid) /* Return the UUID actually found if the caller wants to know */
625                 *ret_found_uuid = u;
626 
627         return 0;
628 }
629 
luks_validate(int fd,const char * label,sd_id128_t partition_uuid,sd_id128_t * ret_partition_uuid,uint64_t * ret_offset,uint64_t * ret_size)630 static int luks_validate(
631                 int fd,
632                 const char *label,
633                 sd_id128_t partition_uuid,
634                 sd_id128_t *ret_partition_uuid,
635                 uint64_t *ret_offset,
636                 uint64_t *ret_size) {
637 
638         _cleanup_(blkid_free_probep) blkid_probe b = NULL;
639         sd_id128_t found_partition_uuid = SD_ID128_NULL;
640         const char *fstype = NULL, *pttype = NULL;
641         blkid_loff_t offset = 0, size = 0;
642         blkid_partlist pl;
643         bool found = false;
644         int r, n;
645 
646         assert(fd >= 0);
647         assert(label);
648         assert(ret_offset);
649         assert(ret_size);
650 
651         b = blkid_new_probe();
652         if (!b)
653                 return -ENOMEM;
654 
655         errno = 0;
656         r = blkid_probe_set_device(b, fd, 0, 0);
657         if (r != 0)
658                 return errno > 0 ? -errno : -ENOMEM;
659 
660         (void) blkid_probe_enable_superblocks(b, 1);
661         (void) blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
662         (void) blkid_probe_enable_partitions(b, 1);
663         (void) blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
664 
665         errno = 0;
666         r = blkid_do_safeprobe(b);
667         if (IN_SET(r, -2, 1)) /* nothing found or ambiguous result */
668                 return -ENOPKG;
669         if (r != 0)
670                 return errno > 0 ? -errno : -EIO;
671 
672         (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
673         if (streq_ptr(fstype, "crypto_LUKS")) {
674                 /* Directly a LUKS image */
675                 *ret_offset = 0;
676                 *ret_size = UINT64_MAX; /* full disk */
677                 *ret_partition_uuid = SD_ID128_NULL;
678                 return 0;
679         } else if (fstype)
680                 return -ENOPKG;
681 
682         (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
683         if (!streq_ptr(pttype, "gpt"))
684                 return -ENOPKG;
685 
686         errno = 0;
687         pl = blkid_probe_get_partitions(b);
688         if (!pl)
689                 return errno > 0 ? -errno : -ENOMEM;
690 
691         errno = 0;
692         n = blkid_partlist_numof_partitions(pl);
693         if (n < 0)
694                 return errno > 0 ? -errno : -EIO;
695 
696         for (int i = 0; i < n; i++) {
697                 blkid_partition pp;
698                 sd_id128_t id = SD_ID128_NULL;
699                 const char *sid;
700 
701                 errno = 0;
702                 pp = blkid_partlist_get_partition(pl, i);
703                 if (!pp)
704                         return errno > 0 ? -errno : -EIO;
705 
706                 if (id128_equal_string(blkid_partition_get_type_string(pp), GPT_USER_HOME) <= 0)
707                         continue;
708 
709                 if (!streq_ptr(blkid_partition_get_name(pp), label))
710                         continue;
711 
712                 sid = blkid_partition_get_uuid(pp);
713                 if (sid) {
714                         r = sd_id128_from_string(sid, &id);
715                         if (r < 0)
716                                 log_debug_errno(r, "Couldn't parse partition UUID %s, weird: %m", sid);
717 
718                         if (!sd_id128_is_null(partition_uuid) && !sd_id128_equal(id, partition_uuid))
719                                 continue;
720                 }
721 
722                 if (found)
723                         return -ENOPKG;
724 
725                 offset = blkid_partition_get_start(pp);
726                 size = blkid_partition_get_size(pp);
727                 found_partition_uuid = id;
728 
729                 found = true;
730         }
731 
732         if (!found)
733                 return -ENOPKG;
734 
735         if (offset < 0)
736                 return -EINVAL;
737         if ((uint64_t) offset > UINT64_MAX / 512U)
738                 return -EINVAL;
739         if (size <= 0)
740                 return -EINVAL;
741         if ((uint64_t) size > UINT64_MAX / 512U)
742                 return -EINVAL;
743 
744         *ret_offset = offset * 512U;
745         *ret_size = size * 512U;
746         *ret_partition_uuid = found_partition_uuid;
747 
748         return 0;
749 }
750 
crypt_device_to_evp_cipher(struct crypt_device * cd,const EVP_CIPHER ** ret)751 static int crypt_device_to_evp_cipher(struct crypt_device *cd, const EVP_CIPHER **ret) {
752         _cleanup_free_ char *cipher_name = NULL;
753         const char *cipher, *cipher_mode, *e;
754         size_t key_size, key_bits;
755         const EVP_CIPHER *cc;
756         int r;
757 
758         assert(cd);
759 
760         /* Let's find the right OpenSSL EVP_CIPHER object that matches the encryption settings of the LUKS
761          * device */
762 
763         cipher = sym_crypt_get_cipher(cd);
764         if (!cipher)
765                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher from LUKS device.");
766 
767         cipher_mode = sym_crypt_get_cipher_mode(cd);
768         if (!cipher_mode)
769                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot get cipher mode from LUKS device.");
770 
771         e = strchr(cipher_mode, '-');
772         if (e)
773                 cipher_mode = strndupa_safe(cipher_mode, e - cipher_mode);
774 
775         r = sym_crypt_get_volume_key_size(cd);
776         if (r <= 0)
777                 return log_error_errno(r < 0 ? r : SYNTHETIC_ERRNO(EINVAL), "Cannot get volume key size from LUKS device.");
778 
779         key_size = r;
780         key_bits = key_size * 8;
781         if (streq(cipher_mode, "xts"))
782                 key_bits /= 2;
783 
784         if (asprintf(&cipher_name, "%s-%zu-%s", cipher, key_bits, cipher_mode) < 0)
785                 return log_oom();
786 
787         cc = EVP_get_cipherbyname(cipher_name);
788         if (!cc)
789                 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Selected cipher mode '%s' not supported, can't encrypt JSON record.", cipher_name);
790 
791         /* Verify that our key length calculations match what OpenSSL thinks */
792         r = EVP_CIPHER_key_length(cc);
793         if (r < 0 || (uint64_t) r != key_size)
794                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Key size of selected cipher doesn't meet our expectations.");
795 
796         *ret = cc;
797         return 0;
798 }
799 
luks_validate_home_record(struct crypt_device * cd,UserRecord * h,const void * volume_key,PasswordCache * cache,UserRecord ** ret_luks_home_record)800 static int luks_validate_home_record(
801                 struct crypt_device *cd,
802                 UserRecord *h,
803                 const void *volume_key,
804                 PasswordCache *cache,
805                 UserRecord **ret_luks_home_record) {
806 
807         int r;
808 
809         assert(cd);
810         assert(h);
811 
812         for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
813                 _cleanup_(json_variant_unrefp) JsonVariant *v = NULL, *rr = NULL;
814                 _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
815                 _cleanup_(user_record_unrefp) UserRecord *lhr = NULL;
816                 _cleanup_free_ void *encrypted = NULL, *iv = NULL;
817                 size_t decrypted_size, encrypted_size, iv_size;
818                 int decrypted_size_out1, decrypted_size_out2;
819                 _cleanup_free_ char *decrypted = NULL;
820                 const char *text, *type;
821                 crypt_token_info state;
822                 JsonVariant *jr, *jiv;
823                 unsigned line, column;
824                 const EVP_CIPHER *cc;
825 
826                 state = sym_crypt_token_status(cd, token, &type);
827                 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, give up */
828                         break;
829                 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
830                         continue;
831                 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
832                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
833 
834                 if (!streq(type, "systemd-homed"))
835                         continue;
836 
837                 r = sym_crypt_token_json_get(cd, token, &text);
838                 if (r < 0)
839                         return log_error_errno(r, "Failed to read LUKS token %i: %m", token);
840 
841                 r = json_parse(text, JSON_PARSE_SENSITIVE, &v, &line, &column);
842                 if (r < 0)
843                         return log_error_errno(r, "Failed to parse LUKS token JSON data %u:%u: %m", line, column);
844 
845                 jr = json_variant_by_key(v, "record");
846                 if (!jr)
847                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'record' field.");
848                 jiv = json_variant_by_key(v, "iv");
849                 if (!jiv)
850                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS token lacks 'iv' field.");
851 
852                 r = json_variant_unbase64(jr, &encrypted, &encrypted_size);
853                 if (r < 0)
854                         return log_error_errno(r, "Failed to base64 decode record: %m");
855 
856                 r = json_variant_unbase64(jiv, &iv, &iv_size);
857                 if (r < 0)
858                         return log_error_errno(r, "Failed to base64 decode IV: %m");
859 
860                 r = crypt_device_to_evp_cipher(cd, &cc);
861                 if (r < 0)
862                         return r;
863                 if (iv_size > INT_MAX || EVP_CIPHER_iv_length(cc) != (int) iv_size)
864                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "IV size doesn't match.");
865 
866                 context = EVP_CIPHER_CTX_new();
867                 if (!context)
868                         return log_oom();
869 
870                 if (EVP_DecryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
871                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize decryption context.");
872 
873                 decrypted_size = encrypted_size + EVP_CIPHER_key_length(cc) * 2;
874                 decrypted = new(char, decrypted_size);
875                 if (!decrypted)
876                         return log_oom();
877 
878                 if (EVP_DecryptUpdate(context, (uint8_t*) decrypted, &decrypted_size_out1, encrypted, encrypted_size) != 1)
879                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to decrypt JSON record.");
880 
881                 assert((size_t) decrypted_size_out1 <= decrypted_size);
882 
883                 if (EVP_DecryptFinal_ex(context, (uint8_t*) decrypted + decrypted_size_out1, &decrypted_size_out2) != 1)
884                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish decryption of JSON record.");
885 
886                 assert((size_t) decrypted_size_out1 + (size_t) decrypted_size_out2 < decrypted_size);
887                 decrypted_size = (size_t) decrypted_size_out1 + (size_t) decrypted_size_out2;
888 
889                 if (memchr(decrypted, 0, decrypted_size))
890                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Inner NUL byte in JSON record, refusing.");
891 
892                 decrypted[decrypted_size] = 0;
893 
894                 r = json_parse(decrypted, JSON_PARSE_SENSITIVE, &rr, NULL, NULL);
895                 if (r < 0)
896                         return log_error_errno(r, "Failed to parse decrypted JSON record, refusing.");
897 
898                 lhr = user_record_new();
899                 if (!lhr)
900                         return log_oom();
901 
902                 r = user_record_load(lhr, rr, USER_RECORD_LOAD_EMBEDDED|USER_RECORD_PERMISSIVE);
903                 if (r < 0)
904                         return log_error_errno(r, "Failed to parse user record: %m");
905 
906                 if (!user_record_compatible(h, lhr))
907                         return log_error_errno(SYNTHETIC_ERRNO(EREMCHG), "LUKS home record not compatible with host record, refusing.");
908 
909                 r = user_record_authenticate(lhr, h, cache, /* strict_verify= */ true);
910                 if (r < 0)
911                         return r;
912                 assert(r > 0); /* Insist that a password was verified */
913 
914                 *ret_luks_home_record = TAKE_PTR(lhr);
915                 return 0;
916         }
917 
918         return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Couldn't find home record in LUKS2 header, refusing.");
919 }
920 
format_luks_token_text(struct crypt_device * cd,UserRecord * hr,const void * volume_key,char ** ret)921 static int format_luks_token_text(
922                 struct crypt_device *cd,
923                 UserRecord *hr,
924                 const void *volume_key,
925                 char **ret) {
926 
927         int r, encrypted_size_out1 = 0, encrypted_size_out2 = 0, iv_size, key_size;
928         _cleanup_(EVP_CIPHER_CTX_freep) EVP_CIPHER_CTX *context = NULL;
929         _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
930         _cleanup_free_ void *iv = NULL, *encrypted = NULL;
931         size_t text_length, encrypted_size;
932         _cleanup_free_ char *text = NULL;
933         const EVP_CIPHER *cc;
934 
935         assert(cd);
936         assert(hr);
937         assert(volume_key);
938         assert(ret);
939 
940         r = crypt_device_to_evp_cipher(cd, &cc);
941         if (r < 0)
942                 return r;
943 
944         key_size = EVP_CIPHER_key_length(cc);
945         iv_size = EVP_CIPHER_iv_length(cc);
946 
947         if (iv_size > 0) {
948                 iv = malloc(iv_size);
949                 if (!iv)
950                         return log_oom();
951 
952                 r = genuine_random_bytes(iv, iv_size, RANDOM_BLOCK);
953                 if (r < 0)
954                         return log_error_errno(r, "Failed to generate IV: %m");
955         }
956 
957         context = EVP_CIPHER_CTX_new();
958         if (!context)
959                 return log_oom();
960 
961         if (EVP_EncryptInit_ex(context, cc, NULL, volume_key, iv) != 1)
962                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to initialize encryption context.");
963 
964         r = json_variant_format(hr->json, 0, &text);
965         if (r < 0)
966                 return log_error_errno(r, "Failed to format user record for LUKS: %m");
967 
968         text_length = strlen(text);
969         encrypted_size = text_length + 2*key_size - 1;
970 
971         encrypted = malloc(encrypted_size);
972         if (!encrypted)
973                 return log_oom();
974 
975         if (EVP_EncryptUpdate(context, encrypted, &encrypted_size_out1, (uint8_t*) text, text_length) != 1)
976                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to encrypt JSON record.");
977 
978         assert((size_t) encrypted_size_out1 <= encrypted_size);
979 
980         if (EVP_EncryptFinal_ex(context, (uint8_t*) encrypted + encrypted_size_out1, &encrypted_size_out2) != 1)
981                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to finish encryption of JSON record. ");
982 
983         assert((size_t) encrypted_size_out1 + (size_t) encrypted_size_out2 <= encrypted_size);
984 
985         r = json_build(&v,
986                        JSON_BUILD_OBJECT(
987                                        JSON_BUILD_PAIR("type", JSON_BUILD_CONST_STRING("systemd-homed")),
988                                        JSON_BUILD_PAIR("keyslots", JSON_BUILD_EMPTY_ARRAY),
989                                        JSON_BUILD_PAIR("record", JSON_BUILD_BASE64(encrypted, encrypted_size_out1 + encrypted_size_out2)),
990                                        JSON_BUILD_PAIR("iv", JSON_BUILD_BASE64(iv, iv_size))));
991         if (r < 0)
992                 return log_error_errno(r, "Failed to prepare LUKS JSON token object: %m");
993 
994         r = json_variant_format(v, 0, ret);
995         if (r < 0)
996                 return log_error_errno(r, "Failed to format encrypted user record for LUKS: %m");
997 
998         return 0;
999 }
1000 
home_store_header_identity_luks(UserRecord * h,HomeSetup * setup,UserRecord * old_home)1001 int home_store_header_identity_luks(
1002                 UserRecord *h,
1003                 HomeSetup *setup,
1004                 UserRecord *old_home) {
1005 
1006         _cleanup_(user_record_unrefp) UserRecord *header_home = NULL;
1007         _cleanup_free_ char *text = NULL;
1008         int r;
1009 
1010         assert(h);
1011 
1012         if (!setup->crypt_device)
1013                 return 0;
1014 
1015         assert(setup->volume_key);
1016 
1017         /* Let's store the user's identity record in the LUKS2 "token" header data fields, in an encrypted
1018          * fashion. Why that? If we'd rely on the record being embedded in the payload file system itself we
1019          * would have to mount the file system before we can validate the JSON record, its signatures and
1020          * whether it matches what we are looking for. However, kernel file system implementations are
1021          * generally not ready to be used on untrusted media. Hence let's store the record independently of
1022          * the file system, so that we can validate it first, and only then mount the file system. To keep
1023          * things simple we use the same encryption settings for this record as for the file system itself. */
1024 
1025         r = user_record_clone(h, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &header_home);
1026         if (r < 0)
1027                 return log_error_errno(r, "Failed to determine new header record: %m");
1028 
1029         if (old_home && user_record_equal(old_home, header_home)) {
1030                 log_debug("Not updating header home record.");
1031                 return 0;
1032         }
1033 
1034         r = format_luks_token_text(setup->crypt_device, header_home, setup->volume_key, &text);
1035         if (r < 0)
1036                 return r;
1037 
1038         for (int token = 0; token < sym_crypt_token_max(CRYPT_LUKS2); token++) {
1039                 crypt_token_info state;
1040                 const char *type;
1041 
1042                 state = sym_crypt_token_status(setup->crypt_device, token, &type);
1043                 if (state == CRYPT_TOKEN_INACTIVE) /* First unconfigured token, we are done */
1044                         break;
1045                 if (IN_SET(state, CRYPT_TOKEN_INTERNAL, CRYPT_TOKEN_INTERNAL_UNKNOWN, CRYPT_TOKEN_EXTERNAL))
1046                         continue; /* Not ours */
1047                 if (state != CRYPT_TOKEN_EXTERNAL_UNKNOWN)
1048                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unexpected token state of token %i: %i", token, (int) state);
1049 
1050                 if (!streq(type, "systemd-homed"))
1051                         continue;
1052 
1053                 r = sym_crypt_token_json_set(setup->crypt_device, token, text);
1054                 if (r < 0)
1055                         return log_error_errno(r, "Failed to set JSON token for slot %i: %m", token);
1056 
1057                 /* Now, let's free the text so that for all further matching tokens we all crypt_json_token_set()
1058                  * with a NULL text in order to invalidate the tokens. */
1059                 text = mfree(text);
1060         }
1061 
1062         if (text)
1063                 return log_error_errno(SYNTHETIC_ERRNO(EBADMSG), "Didn't find any record token to update.");
1064 
1065         log_info("Wrote LUKS header user record.");
1066 
1067         return 1;
1068 }
1069 
run_fitrim(int root_fd)1070 int run_fitrim(int root_fd) {
1071         struct fstrim_range range = {
1072                 .len = UINT64_MAX,
1073         };
1074 
1075         /* If discarding is on, discard everything right after mounting, so that the discard setting takes
1076          * effect on activation. (Also, optionally, trim on logout) */
1077 
1078         assert(root_fd >= 0);
1079 
1080         if (ioctl(root_fd, FITRIM, &range) < 0) {
1081                 if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EBADF) {
1082                         log_debug_errno(errno, "File system does not support FITRIM, not trimming.");
1083                         return 0;
1084                 }
1085 
1086                 return log_warning_errno(errno, "Failed to invoke FITRIM, ignoring: %m");
1087         }
1088 
1089         log_info("Discarded unused %s.", FORMAT_BYTES(range.len));
1090         return 1;
1091 }
1092 
run_fallocate(int backing_fd,const struct stat * st)1093 int run_fallocate(int backing_fd, const struct stat *st) {
1094         struct stat stbuf;
1095 
1096         assert(backing_fd >= 0);
1097 
1098         /* If discarding is off, let's allocate the whole image before mounting, so that the setting takes
1099          * effect on activation */
1100 
1101         if (!st) {
1102                 if (fstat(backing_fd, &stbuf) < 0)
1103                         return log_error_errno(errno, "Failed to fstat(): %m");
1104 
1105                 st = &stbuf;
1106         }
1107 
1108         if (!S_ISREG(st->st_mode))
1109                 return 0;
1110 
1111         if (st->st_blocks >= DIV_ROUND_UP(st->st_size, 512)) {
1112                 log_info("Backing file is fully allocated already.");
1113                 return 0;
1114         }
1115 
1116         if (fallocate(backing_fd, FALLOC_FL_KEEP_SIZE, 0, st->st_size) < 0) {
1117 
1118                 if (ERRNO_IS_NOT_SUPPORTED(errno)) {
1119                         log_debug_errno(errno, "fallocate() not supported on file system, ignoring.");
1120                         return 0;
1121                 }
1122 
1123                 if (ERRNO_IS_DISK_SPACE(errno)) {
1124                         log_debug_errno(errno, "Not enough disk space to fully allocate home.");
1125                         return -ENOSPC; /* make recognizable */
1126                 }
1127 
1128                 return log_error_errno(errno, "Failed to allocate backing file blocks: %m");
1129         }
1130 
1131         log_info("Allocated additional %s.",
1132                  FORMAT_BYTES((DIV_ROUND_UP(st->st_size, 512) - st->st_blocks) * 512));
1133         return 1;
1134 }
1135 
run_fallocate_by_path(const char * backing_path)1136 int run_fallocate_by_path(const char *backing_path) {
1137         _cleanup_close_ int backing_fd = -1;
1138 
1139         backing_fd = open(backing_path, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1140         if (backing_fd < 0)
1141                 return log_error_errno(errno, "Failed to open '%s' for fallocate(): %m", backing_path);
1142 
1143         return run_fallocate(backing_fd, NULL);
1144 }
1145 
lock_image_fd(int image_fd,const char * ip)1146 static int lock_image_fd(int image_fd, const char *ip) {
1147         int r;
1148 
1149         /* If the $SYSTEMD_LUKS_LOCK environment variable is set we'll take an exclusive BSD lock on the
1150          * image file, and send it to our parent. homed will keep it open to ensure no other instance of
1151          * homed (across the network or such) will also mount the file. */
1152 
1153         assert(image_fd >= 0);
1154         assert(ip);
1155 
1156         r = getenv_bool("SYSTEMD_LUKS_LOCK");
1157         if (r == -ENXIO)
1158                 return 0;
1159         if (r < 0)
1160                 return log_error_errno(r, "Failed to parse $SYSTEMD_LUKS_LOCK environment variable: %m");
1161         if (r == 0)
1162                 return 0;
1163 
1164         if (flock(image_fd, LOCK_EX|LOCK_NB) < 0) {
1165 
1166                 if (errno == EAGAIN)
1167                         log_error_errno(errno, "Image file '%s' already locked, can't use.", ip);
1168                 else
1169                         log_error_errno(errno, "Failed to lock image file '%s': %m", ip);
1170 
1171                 return errno != EAGAIN ? -errno : -EADDRINUSE; /* Make error recognizable */
1172         }
1173 
1174         log_info("Successfully locked image file '%s'.", ip);
1175 
1176         /* Now send it to our parent to keep safe while the home dir is active */
1177         r = sd_pid_notify_with_fds(0, false, "SYSTEMD_LUKS_LOCK_FD=1", &image_fd, 1);
1178         if (r < 0)
1179                 log_warning_errno(r, "Failed to send LUKS lock fd to parent, ignoring: %m");
1180 
1181         return 0;
1182 }
1183 
open_image_file(UserRecord * h,const char * force_image_path,struct stat * ret_stat)1184 static int open_image_file(
1185                 UserRecord *h,
1186                 const char *force_image_path,
1187                 struct stat *ret_stat) {
1188 
1189         _cleanup_close_ int image_fd = -1;
1190         struct stat st;
1191         const char *ip;
1192         int r;
1193 
1194         assert(h || force_image_path);
1195 
1196         ip = force_image_path ?: user_record_image_path(h);
1197 
1198         image_fd = open(ip, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1199         if (image_fd < 0)
1200                 return log_error_errno(errno, "Failed to open image file %s: %m", ip);
1201 
1202         if (fstat(image_fd, &st) < 0)
1203                 return log_error_errno(errno, "Failed to fstat() image file: %m");
1204         if (!S_ISREG(st.st_mode) && !S_ISBLK(st.st_mode))
1205                 return log_error_errno(
1206                                 S_ISDIR(st.st_mode) ? SYNTHETIC_ERRNO(EISDIR) : SYNTHETIC_ERRNO(EBADFD),
1207                                 "Image file %s is not a regular file or block device: %m", ip);
1208 
1209         /* Locking block devices doesn't really make sense, as this might interfere with
1210          * udev's workings, and these locks aren't network propagated anyway, hence not what
1211          * we are after here. */
1212         if (S_ISREG(st.st_mode)) {
1213                 r = lock_image_fd(image_fd, ip);
1214                 if (r < 0)
1215                         return r;
1216         }
1217 
1218         if (ret_stat)
1219                 *ret_stat = st;
1220 
1221         return TAKE_FD(image_fd);
1222 }
1223 
home_setup_luks(UserRecord * h,HomeSetupFlags flags,const char * force_image_path,HomeSetup * setup,PasswordCache * cache,UserRecord ** ret_luks_home)1224 int home_setup_luks(
1225                 UserRecord *h,
1226                 HomeSetupFlags flags,
1227                 const char *force_image_path,
1228                 HomeSetup *setup,
1229                 PasswordCache *cache,
1230                 UserRecord **ret_luks_home) {
1231 
1232         sd_id128_t found_partition_uuid = SD_ID128_NULL, found_luks_uuid = SD_ID128_NULL, found_fs_uuid = SD_ID128_NULL;
1233         _cleanup_(user_record_unrefp) UserRecord *luks_home = NULL;
1234         _cleanup_(erase_and_freep) void *volume_key = NULL;
1235         size_t volume_key_size = 0;
1236         uint64_t offset, size;
1237         struct stat st;
1238         int r;
1239 
1240         assert(h);
1241         assert(setup);
1242         assert(user_record_storage(h) == USER_LUKS);
1243 
1244         r = dlopen_cryptsetup();
1245         if (r < 0)
1246                 return r;
1247 
1248         r = make_dm_names(h, setup);
1249         if (r < 0)
1250                 return r;
1251 
1252         /* Reuse the image fd if it has already been opened by an earlier step */
1253         if (setup->image_fd < 0) {
1254                 setup->image_fd = open_image_file(h, force_image_path, &st);
1255                 if (setup->image_fd < 0)
1256                         return setup->image_fd;
1257         } else if (fstat(setup->image_fd, &st) < 0)
1258                 return log_error_errno(errno, "Failed to stat image: %m");
1259 
1260         if (FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED)) {
1261                 struct loop_info64 info;
1262                 const char *n;
1263 
1264                 if (!setup->crypt_device) {
1265                         r = luks_open(h,
1266                                       setup,
1267                                       cache,
1268                                       &found_luks_uuid,
1269                                       &volume_key,
1270                                       &volume_key_size);
1271                         if (r < 0)
1272                                 return r;
1273                 }
1274 
1275                 if (ret_luks_home) {
1276                         r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1277                         if (r < 0)
1278                                 return r;
1279                 }
1280 
1281                 n = sym_crypt_get_device_name(setup->crypt_device);
1282                 if (!n)
1283                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine backing device for DM %s.", setup->dm_name);
1284 
1285                 if (!setup->loop) {
1286                         r = loop_device_open(n, O_RDWR, &setup->loop);
1287                         if (r < 0)
1288                                 return log_error_errno(r, "Failed to open loopback device %s: %m", n);
1289                 }
1290 
1291                 if (ioctl(setup->loop->fd, LOOP_GET_STATUS64, &info) < 0) {
1292                         _cleanup_free_ char *sysfs = NULL;
1293 
1294                         if (!IN_SET(errno, ENOTTY, EINVAL))
1295                                 return log_error_errno(errno, "Failed to get block device metrics of %s: %m", n);
1296 
1297                         if (ioctl(setup->loop->fd, BLKGETSIZE64, &size) < 0)
1298                                 return log_error_errno(r, "Failed to read block device size of %s: %m", n);
1299 
1300                         if (fstat(setup->loop->fd, &st) < 0)
1301                                 return log_error_errno(r, "Failed to stat block device %s: %m", n);
1302                         assert(S_ISBLK(st.st_mode));
1303 
1304                         if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1305                                 return log_oom();
1306 
1307                         if (access(sysfs, F_OK) < 0) {
1308                                 if (errno != ENOENT)
1309                                         return log_error_errno(errno, "Failed to determine whether %s exists: %m", sysfs);
1310 
1311                                 offset = 0;
1312                         } else {
1313                                 _cleanup_free_ char *buffer = NULL;
1314 
1315                                 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/start", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
1316                                         return log_oom();
1317 
1318                                 r = read_one_line_file(sysfs, &buffer);
1319                                 if (r < 0)
1320                                         return log_error_errno(r, "Failed to read partition start offset: %m");
1321 
1322                                 r = safe_atou64(buffer, &offset);
1323                                 if (r < 0)
1324                                         return log_error_errno(r, "Failed to parse partition start offset: %m");
1325 
1326                                 if (offset > UINT64_MAX / 512U)
1327                                         return log_error_errno(SYNTHETIC_ERRNO(E2BIG), "Offset too large for 64 byte range, refusing.");
1328 
1329                                 offset *= 512U;
1330                         }
1331                 } else {
1332 #if HAVE_VALGRIND_MEMCHECK_H
1333                         VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info));
1334 #endif
1335 
1336                         offset = info.lo_offset;
1337                         size = info.lo_sizelimit;
1338                 }
1339 
1340                 found_partition_uuid = found_fs_uuid = SD_ID128_NULL;
1341 
1342                 log_info("Discovered used loopback device %s.", setup->loop->node);
1343 
1344                 if (setup->root_fd < 0) {
1345                         setup->root_fd = open(user_record_home_directory(h), O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1346                         if (setup->root_fd < 0)
1347                                 return log_error_errno(errno, "Failed to open home directory: %m");
1348                 }
1349         } else {
1350                 _cleanup_free_ char *fstype = NULL, *subdir = NULL;
1351                 const char *ip;
1352 
1353                 /* When we aren't reopening the home directory we are allocating it fresh, hence the relevant
1354                  * objects can't be allocated yet. */
1355                 assert(setup->root_fd < 0);
1356                 assert(!setup->crypt_device);
1357                 assert(!setup->loop);
1358 
1359                 ip = force_image_path ?: user_record_image_path(h);
1360 
1361                 subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
1362                 if (!subdir)
1363                         return log_oom();
1364 
1365                 r = luks_validate(setup->image_fd, user_record_user_name_and_realm(h), h->partition_uuid, &found_partition_uuid, &offset, &size);
1366                 if (r < 0)
1367                         return log_error_errno(r, "Failed to validate disk label: %m");
1368 
1369                 /* Everything before this point left the image untouched. We are now starting to make
1370                  * changes, hence mark the image dirty */
1371                 if (run_mark_dirty(setup->image_fd, true) > 0)
1372                         setup->do_mark_clean = true;
1373 
1374                 if (!user_record_luks_discard(h)) {
1375                         r = run_fallocate(setup->image_fd, &st);
1376                         if (r < 0)
1377                                 return r;
1378                 }
1379 
1380                 r = loop_device_make(setup->image_fd, O_RDWR, offset, size, 0, &setup->loop);
1381                 if (r == -ENOENT) {
1382                         log_error_errno(r, "Loopback block device support is not available on this system.");
1383                         return -ENOLINK; /* make recognizable */
1384                 }
1385                 if (r < 0)
1386                         return log_error_errno(r, "Failed to allocate loopback context: %m");
1387 
1388                 log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
1389 
1390                 r = luks_setup(h,
1391                                setup->loop->node ?: ip,
1392                                setup->dm_name,
1393                                h->luks_uuid,
1394                                h->luks_cipher,
1395                                h->luks_cipher_mode,
1396                                h->luks_volume_key_size,
1397                                h->password,
1398                                cache,
1399                                user_record_luks_discard(h) || user_record_luks_offline_discard(h),
1400                                &setup->crypt_device,
1401                                &found_luks_uuid,
1402                                &volume_key,
1403                                &volume_key_size,
1404                                &setup->key_serial);
1405                 if (r < 0)
1406                         return r;
1407 
1408                 setup->undo_dm = true;
1409 
1410                 if (ret_luks_home) {
1411                         r = luks_validate_home_record(setup->crypt_device, h, volume_key, cache, &luks_home);
1412                         if (r < 0)
1413                                 return r;
1414                 }
1415 
1416                 r = fs_validate(setup->dm_node, h->file_system_uuid, &fstype, &found_fs_uuid);
1417                 if (r < 0)
1418                         return r;
1419 
1420                 r = run_fsck(setup->dm_node, fstype);
1421                 if (r < 0)
1422                         return r;
1423 
1424                 r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
1425                 if (r < 0)
1426                         return r;
1427 
1428                 setup->undo_mount = true;
1429 
1430                 setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1431                 if (setup->root_fd < 0)
1432                         return log_error_errno(errno, "Failed to open home directory: %m");
1433 
1434                 if (user_record_luks_discard(h))
1435                         (void) run_fitrim(setup->root_fd);
1436 
1437                 setup->do_offline_fallocate = !(setup->do_offline_fitrim = user_record_luks_offline_discard(h));
1438         }
1439 
1440         if (!sd_id128_is_null(found_partition_uuid))
1441                 setup->found_partition_uuid = found_partition_uuid;
1442         if (!sd_id128_is_null(found_luks_uuid))
1443                 setup->found_luks_uuid = found_luks_uuid;
1444         if (!sd_id128_is_null(found_fs_uuid))
1445                 setup->found_fs_uuid = found_fs_uuid;
1446 
1447         setup->partition_offset = offset;
1448         setup->partition_size = size;
1449 
1450         if (volume_key) {
1451                 erase_and_free(setup->volume_key);
1452                 setup->volume_key = TAKE_PTR(volume_key);
1453                 setup->volume_key_size = volume_key_size;
1454         }
1455 
1456         if (ret_luks_home)
1457                 *ret_luks_home = TAKE_PTR(luks_home);
1458 
1459         return 0;
1460 }
1461 
print_size_summary(uint64_t host_size,uint64_t encrypted_size,const struct statfs * sfs)1462 static void print_size_summary(uint64_t host_size, uint64_t encrypted_size, const struct statfs *sfs) {
1463         assert(sfs);
1464 
1465         log_info("Image size is %s, file system size is %s, file system payload size is %s, file system free is %s.",
1466                  FORMAT_BYTES(host_size),
1467                  FORMAT_BYTES(encrypted_size),
1468                  FORMAT_BYTES((uint64_t) sfs->f_blocks * (uint64_t) sfs->f_frsize),
1469                  FORMAT_BYTES((uint64_t) sfs->f_bfree * (uint64_t) sfs->f_frsize));
1470 }
1471 
home_auto_grow_luks(UserRecord * h,HomeSetup * setup,PasswordCache * cache)1472 static int home_auto_grow_luks(
1473                 UserRecord *h,
1474                 HomeSetup *setup,
1475                 PasswordCache *cache) {
1476 
1477         struct statfs sfs;
1478 
1479         assert(h);
1480         assert(setup);
1481 
1482         if (!IN_SET(user_record_auto_resize_mode(h), AUTO_RESIZE_GROW, AUTO_RESIZE_SHRINK_AND_GROW))
1483                 return 0;
1484 
1485         assert(setup->root_fd >= 0);
1486 
1487         if (fstatfs(setup->root_fd, &sfs) < 0)
1488                 return log_error_errno(errno, "Failed to statfs home directory: %m");
1489 
1490         if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
1491                 log_debug("Not auto-grow file system, since selected file system cannot do both online shrink and grow.");
1492                 return 0;
1493         }
1494 
1495         log_debug("Initiating auto-grow...");
1496 
1497         return home_resize_luks(
1498                         h,
1499                         HOME_SETUP_ALREADY_ACTIVATED|
1500                         HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
1501                         HOME_SETUP_RESIZE_DONT_SHRINK|
1502                         HOME_SETUP_RESIZE_DONT_UNDO,
1503                         setup,
1504                         cache,
1505                         NULL);
1506 }
1507 
home_activate_luks(UserRecord * h,HomeSetupFlags flags,HomeSetup * setup,PasswordCache * cache,UserRecord ** ret_home)1508 int home_activate_luks(
1509                 UserRecord *h,
1510                 HomeSetupFlags flags,
1511                 HomeSetup *setup,
1512                 PasswordCache *cache,
1513                 UserRecord **ret_home) {
1514 
1515         _cleanup_(user_record_unrefp) UserRecord *new_home = NULL, *luks_home_record = NULL;
1516         uint64_t host_size, encrypted_size;
1517         const char *hdo, *hd;
1518         struct statfs sfs;
1519         int r;
1520 
1521         assert(h);
1522         assert(user_record_storage(h) == USER_LUKS);
1523         assert(setup);
1524         assert(ret_home);
1525 
1526         r = dlopen_cryptsetup();
1527         if (r < 0)
1528                 return r;
1529 
1530         assert_se(hdo = user_record_home_directory(h));
1531         hd = strdupa_safe(hdo); /* copy the string out, since it might change later in the home record object */
1532 
1533         r = home_get_state_luks(h, setup);
1534         if (r < 0)
1535                 return r;
1536         if (r > 0)
1537                 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
1538 
1539         r = home_setup_luks(
1540                         h,
1541                         0,
1542                         NULL,
1543                         setup,
1544                         cache,
1545                         &luks_home_record);
1546         if (r < 0)
1547                 return r;
1548 
1549         r = home_auto_grow_luks(h, setup, cache);
1550         if (r < 0)
1551                 return r;
1552 
1553         r = block_get_size_by_fd(setup->loop->fd, &host_size);
1554         if (r < 0)
1555                 return log_error_errno(r, "Failed to get loopback block device size: %m");
1556 
1557         r = block_get_size_by_path(setup->dm_node, &encrypted_size);
1558         if (r < 0)
1559                 return log_error_errno(r, "Failed to get LUKS block device size: %m");
1560 
1561         r = home_refresh(
1562                         h,
1563                         flags,
1564                         setup,
1565                         luks_home_record,
1566                         cache,
1567                         &sfs,
1568                         &new_home);
1569         if (r < 0)
1570                 return r;
1571 
1572         r = home_extend_embedded_identity(new_home, h, setup);
1573         if (r < 0)
1574                 return r;
1575 
1576         setup->root_fd = safe_close(setup->root_fd);
1577 
1578         r = home_move_mount(user_record_user_name_and_realm(h), hd);
1579         if (r < 0)
1580                 return r;
1581 
1582         setup->undo_mount = false;
1583         setup->do_offline_fitrim = false;
1584 
1585         loop_device_relinquish(setup->loop);
1586 
1587         r = sym_crypt_deactivate_by_name(NULL, setup->dm_name, CRYPT_DEACTIVATE_DEFERRED);
1588         if (r < 0)
1589                 log_warning_errno(r, "Failed to relinquish DM device, ignoring: %m");
1590 
1591         setup->undo_dm = false;
1592         setup->do_offline_fallocate = false;
1593         setup->do_mark_clean = false;
1594         setup->do_drop_caches = false;
1595         TAKE_KEY_SERIAL(setup->key_serial); /* Leave key in kernel keyring */
1596 
1597         log_info("Activation completed.");
1598 
1599         print_size_summary(host_size, encrypted_size, &sfs);
1600 
1601         *ret_home = TAKE_PTR(new_home);
1602         return 1;
1603 }
1604 
home_deactivate_luks(UserRecord * h,HomeSetup * setup)1605 int home_deactivate_luks(UserRecord *h, HomeSetup *setup) {
1606         bool we_detached;
1607         int r;
1608 
1609         assert(h);
1610         assert(setup);
1611 
1612         /* Note that the DM device and loopback device are set to auto-detach, hence strictly speaking we
1613          * don't have to explicitly have to detach them. However, we do that nonetheless (in case of the DM
1614          * device), to avoid races: by explicitly detaching them we know when the detaching is complete. We
1615          * don't bother about the loopback device because unlike the DM device it doesn't have a fixed
1616          * name. */
1617 
1618         if (!setup->crypt_device) {
1619                 r = acquire_open_luks_device(h, setup, /* graceful= */ true);
1620                 if (r < 0)
1621                         return log_error_errno(r, "Failed to initialize cryptsetup context for %s: %m", setup->dm_name);
1622                 if (r == 0) {
1623                         log_debug("LUKS device %s has already been detached.", setup->dm_name);
1624                         we_detached = false;
1625                 }
1626         }
1627 
1628         if (setup->crypt_device) {
1629                 log_info("Discovered used LUKS device %s.", setup->dm_node);
1630 
1631                 cryptsetup_enable_logging(setup->crypt_device);
1632 
1633                 r = sym_crypt_deactivate_by_name(setup->crypt_device, setup->dm_name, 0);
1634                 if (ERRNO_IS_DEVICE_ABSENT(r) || r == -EINVAL) {
1635                         log_debug_errno(r, "LUKS device %s is already detached.", setup->dm_node);
1636                         we_detached = false;
1637                 } else if (r < 0)
1638                         return log_info_errno(r, "LUKS device %s couldn't be deactivated: %m", setup->dm_node);
1639                 else {
1640                         log_info("LUKS device detaching completed.");
1641                         we_detached = true;
1642                 }
1643         }
1644 
1645         (void) wait_for_block_device_gone(setup, USEC_PER_SEC * 30);
1646         setup->undo_dm = false;
1647 
1648         if (user_record_luks_offline_discard(h))
1649                 log_debug("Not allocating on logout.");
1650         else
1651                 (void) run_fallocate_by_path(user_record_image_path(h));
1652 
1653         run_mark_dirty_by_path(user_record_image_path(h), false);
1654         return we_detached;
1655 }
1656 
home_trim_luks(UserRecord * h,HomeSetup * setup)1657 int home_trim_luks(UserRecord *h, HomeSetup *setup) {
1658         assert(h);
1659         assert(setup);
1660         assert(setup->root_fd >= 0);
1661 
1662         if (!user_record_luks_offline_discard(h)) {
1663                 log_debug("Not trimming on logout.");
1664                 return 0;
1665         }
1666 
1667         (void) run_fitrim(setup->root_fd);
1668         return 0;
1669 }
1670 
build_good_pbkdf(struct crypt_pbkdf_type * buffer,UserRecord * hr)1671 static struct crypt_pbkdf_type* build_good_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1672         assert(buffer);
1673         assert(hr);
1674 
1675         *buffer = (struct crypt_pbkdf_type) {
1676                 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1677                 .type = user_record_luks_pbkdf_type(hr),
1678                 .time_ms = user_record_luks_pbkdf_time_cost_usec(hr) / USEC_PER_MSEC,
1679                 .max_memory_kb = user_record_luks_pbkdf_memory_cost(hr) / 1024,
1680                 .parallel_threads = user_record_luks_pbkdf_parallel_threads(hr),
1681         };
1682 
1683         return buffer;
1684 }
1685 
build_minimal_pbkdf(struct crypt_pbkdf_type * buffer,UserRecord * hr)1686 static struct crypt_pbkdf_type* build_minimal_pbkdf(struct crypt_pbkdf_type *buffer, UserRecord *hr) {
1687         assert(buffer);
1688         assert(hr);
1689 
1690         /* For PKCS#11 derived keys (which are generated randomly and are of high quality already) we use a
1691          * minimal PBKDF */
1692         *buffer = (struct crypt_pbkdf_type) {
1693                 .hash = user_record_luks_pbkdf_hash_algorithm(hr),
1694                 .type = CRYPT_KDF_PBKDF2,
1695                 .iterations = 1,
1696                 .time_ms = 1,
1697         };
1698 
1699         return buffer;
1700 }
1701 
luks_format(const char * node,const char * dm_name,sd_id128_t uuid,const char * label,const PasswordCache * cache,char ** effective_passwords,bool discard,UserRecord * hr,struct crypt_device ** ret)1702 static int luks_format(
1703                 const char *node,
1704                 const char *dm_name,
1705                 sd_id128_t uuid,
1706                 const char *label,
1707                 const PasswordCache *cache,
1708                 char **effective_passwords,
1709                 bool discard,
1710                 UserRecord *hr,
1711                 struct crypt_device **ret) {
1712 
1713         _cleanup_(user_record_unrefp) UserRecord *reduced = NULL;
1714         _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
1715         _cleanup_(erase_and_freep) void *volume_key = NULL;
1716         struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
1717         _cleanup_free_ char *text = NULL;
1718         size_t volume_key_size;
1719         int slot = 0, r;
1720 
1721         assert(node);
1722         assert(dm_name);
1723         assert(hr);
1724         assert(ret);
1725 
1726         r = sym_crypt_init(&cd, node);
1727         if (r < 0)
1728                 return log_error_errno(r, "Failed to allocate libcryptsetup context: %m");
1729 
1730         cryptsetup_enable_logging(cd);
1731 
1732         /* Normally we'd, just leave volume key generation to libcryptsetup. However, we can't, since we
1733          * can't extract the volume key from the library again, but we need it in order to encrypt the JSON
1734          * record. Hence, let's generate it on our own, so that we can keep track of it. */
1735 
1736         volume_key_size = user_record_luks_volume_key_size(hr);
1737         volume_key = malloc(volume_key_size);
1738         if (!volume_key)
1739                 return log_oom();
1740 
1741         r = genuine_random_bytes(volume_key, volume_key_size, RANDOM_BLOCK);
1742         if (r < 0)
1743                 return log_error_errno(r, "Failed to generate volume key: %m");
1744 
1745 #if HAVE_CRYPT_SET_METADATA_SIZE
1746         /* Increase the metadata space to 4M, the largest LUKS2 supports */
1747         r = sym_crypt_set_metadata_size(cd, 4096U*1024U, 0);
1748         if (r < 0)
1749                 return log_error_errno(r, "Failed to change LUKS2 metadata size: %m");
1750 #endif
1751 
1752         build_good_pbkdf(&good_pbkdf, hr);
1753         build_minimal_pbkdf(&minimal_pbkdf, hr);
1754 
1755         r = sym_crypt_format(
1756                         cd,
1757                         CRYPT_LUKS2,
1758                         user_record_luks_cipher(hr),
1759                         user_record_luks_cipher_mode(hr),
1760                         SD_ID128_TO_UUID_STRING(uuid),
1761                         volume_key,
1762                         volume_key_size,
1763                         &(struct crypt_params_luks2) {
1764                                 .label = label,
1765                                 .subsystem = "systemd-home",
1766                                 .sector_size = 512U,
1767                                 .pbkdf = &good_pbkdf,
1768                         });
1769         if (r < 0)
1770                 return log_error_errno(r, "Failed to format LUKS image: %m");
1771 
1772         log_info("LUKS formatting completed.");
1773 
1774         STRV_FOREACH(pp, effective_passwords) {
1775 
1776                 if (password_cache_contains(cache, *pp)) { /* is this a fido2 or pkcs11 password? */
1777                         log_debug("Using minimal PBKDF for slot %i", slot);
1778                         r = sym_crypt_set_pbkdf_type(cd, &minimal_pbkdf);
1779                 } else {
1780                         log_debug("Using good PBKDF for slot %i", slot);
1781                         r = sym_crypt_set_pbkdf_type(cd, &good_pbkdf);
1782                 }
1783                 if (r < 0)
1784                         return log_error_errno(r, "Failed to tweak PBKDF for slot %i: %m", slot);
1785 
1786                 r = sym_crypt_keyslot_add_by_volume_key(
1787                                 cd,
1788                                 slot,
1789                                 volume_key,
1790                                 volume_key_size,
1791                                 *pp,
1792                                 strlen(*pp));
1793                 if (r < 0)
1794                         return log_error_errno(r, "Failed to set up LUKS password for slot %i: %m", slot);
1795 
1796                 log_info("Writing password to LUKS keyslot %i completed.", slot);
1797                 slot++;
1798         }
1799 
1800         r = sym_crypt_activate_by_volume_key(
1801                         cd,
1802                         dm_name,
1803                         volume_key,
1804                         volume_key_size,
1805                         discard ? CRYPT_ACTIVATE_ALLOW_DISCARDS : 0);
1806         if (r < 0)
1807                 return log_error_errno(r, "Failed to activate LUKS superblock: %m");
1808 
1809         log_info("LUKS activation by volume key succeeded.");
1810 
1811         r = user_record_clone(hr, USER_RECORD_EXTRACT_EMBEDDED|USER_RECORD_PERMISSIVE, &reduced);
1812         if (r < 0)
1813                 return log_error_errno(r, "Failed to prepare home record for LUKS: %m");
1814 
1815         r = format_luks_token_text(cd, reduced, volume_key, &text);
1816         if (r < 0)
1817                 return r;
1818 
1819         r = sym_crypt_token_json_set(cd, CRYPT_ANY_TOKEN, text);
1820         if (r < 0)
1821                 return log_error_errno(r, "Failed to set LUKS JSON token: %m");
1822 
1823         log_info("Writing user record as LUKS token completed.");
1824 
1825         if (ret)
1826                 *ret = TAKE_PTR(cd);
1827 
1828         return 0;
1829 }
1830 
make_partition_table(int fd,const char * label,sd_id128_t uuid,uint64_t * ret_offset,uint64_t * ret_size,sd_id128_t * ret_disk_uuid)1831 static int make_partition_table(
1832                 int fd,
1833                 const char *label,
1834                 sd_id128_t uuid,
1835                 uint64_t *ret_offset,
1836                 uint64_t *ret_size,
1837                 sd_id128_t *ret_disk_uuid) {
1838 
1839         _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *p = NULL, *q = NULL;
1840         _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL;
1841         _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
1842         _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL;
1843         uint64_t offset, size, first_lba, start, last_lba, end;
1844         sd_id128_t disk_uuid;
1845         int r;
1846 
1847         assert(fd >= 0);
1848         assert(label);
1849         assert(ret_offset);
1850         assert(ret_size);
1851 
1852         t = fdisk_new_parttype();
1853         if (!t)
1854                 return log_oom();
1855 
1856         r = fdisk_parttype_set_typestr(t, GPT_USER_HOME_STR);
1857         if (r < 0)
1858                 return log_error_errno(r, "Failed to initialize partition type: %m");
1859 
1860         c = fdisk_new_context();
1861         if (!c)
1862                 return log_oom();
1863 
1864         if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
1865                 return log_oom();
1866 
1867         r = fdisk_assign_device(c, path, 0);
1868         if (r < 0)
1869                 return log_error_errno(r, "Failed to open device: %m");
1870 
1871         r = fdisk_create_disklabel(c, "gpt");
1872         if (r < 0)
1873                 return log_error_errno(r, "Failed to create GPT disk label: %m");
1874 
1875         p = fdisk_new_partition();
1876         if (!p)
1877                 return log_oom();
1878 
1879         r = fdisk_partition_set_type(p, t);
1880         if (r < 0)
1881                 return log_error_errno(r, "Failed to set partition type: %m");
1882 
1883         r = fdisk_partition_partno_follow_default(p, 1);
1884         if (r < 0)
1885                 return log_error_errno(r, "Failed to place partition at first free partition index: %m");
1886 
1887         first_lba = fdisk_get_first_lba(c); /* Boundary where usable space starts */
1888         assert(first_lba <= UINT64_MAX/512);
1889         start = DISK_SIZE_ROUND_UP(first_lba * 512); /* Round up to multiple of 4K */
1890 
1891         log_debug("Starting partition at offset %" PRIu64, start);
1892 
1893         if (start == UINT64_MAX)
1894                 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Overflow while rounding up start LBA.");
1895 
1896         last_lba = fdisk_get_last_lba(c); /* One sector before boundary where usable space ends */
1897         assert(last_lba < UINT64_MAX/512);
1898         end = DISK_SIZE_ROUND_DOWN((last_lba + 1) * 512); /* Round down to multiple of 4K */
1899 
1900         if (end <= start)
1901                 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Resulting partition size zero or negative.");
1902 
1903         r = fdisk_partition_set_start(p, start / 512);
1904         if (r < 0)
1905                 return log_error_errno(r, "Failed to place partition at offset %" PRIu64 ": %m", start);
1906 
1907         r = fdisk_partition_set_size(p, (end - start) / 512);
1908         if (r < 0)
1909                 return log_error_errno(r, "Failed to end partition at offset %" PRIu64 ": %m", end);
1910 
1911         r = fdisk_partition_set_name(p, label);
1912         if (r < 0)
1913                 return log_error_errno(r, "Failed to set partition name: %m");
1914 
1915         r = fdisk_partition_set_uuid(p, SD_ID128_TO_UUID_STRING(uuid));
1916         if (r < 0)
1917                 return log_error_errno(r, "Failed to set partition UUID: %m");
1918 
1919         r = fdisk_add_partition(c, p, NULL);
1920         if (r < 0)
1921                 return log_error_errno(r, "Failed to add partition: %m");
1922 
1923         r = fdisk_write_disklabel(c);
1924         if (r < 0)
1925                 return log_error_errno(r, "Failed to write disk label: %m");
1926 
1927         r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
1928         if (r < 0)
1929                 return log_error_errno(r, "Failed to determine disk label UUID: %m");
1930 
1931         r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
1932         if (r < 0)
1933                 return log_error_errno(r, "Failed to parse disk label UUID: %m");
1934 
1935         r = fdisk_get_partition(c, 0, &q);
1936         if (r < 0)
1937                 return log_error_errno(r, "Failed to read created partition metadata: %m");
1938 
1939         assert(fdisk_partition_has_start(q));
1940         offset = fdisk_partition_get_start(q);
1941         if (offset > UINT64_MAX / 512U)
1942                 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition offset too large.");
1943 
1944         assert(fdisk_partition_has_size(q));
1945         size = fdisk_partition_get_size(q);
1946         if (size > UINT64_MAX / 512U)
1947                 return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "Partition size too large.");
1948 
1949         *ret_offset = offset * 512U;
1950         *ret_size = size * 512U;
1951         *ret_disk_uuid = disk_uuid;
1952 
1953         return 0;
1954 }
1955 
supported_fs_size(const char * fstype,uint64_t host_size)1956 static bool supported_fs_size(const char *fstype, uint64_t host_size) {
1957         uint64_t m;
1958 
1959         m = minimal_size_by_fs_name(fstype);
1960         if (m == UINT64_MAX)
1961                 return false;
1962 
1963         return host_size >= m;
1964 }
1965 
wait_for_devlink(const char * path)1966 static int wait_for_devlink(const char *path) {
1967         _cleanup_close_ int inotify_fd = -1;
1968         usec_t until;
1969         int r;
1970 
1971         /* let's wait for a device link to show up in /dev, with a timeout. This is good to do since we
1972          * return a /dev/disk/by-uuid/… link to our callers and they likely want to access it right-away,
1973          * hence let's wait until udev has caught up with our changes, and wait for the symlink to be
1974          * created. */
1975 
1976         until = usec_add(now(CLOCK_MONOTONIC), 45 * USEC_PER_SEC);
1977 
1978         for (;;) {
1979                 _cleanup_free_ char *dn = NULL;
1980                 usec_t w;
1981 
1982                 if (laccess(path, F_OK) < 0) {
1983                         if (errno != ENOENT)
1984                                 return log_error_errno(errno, "Failed to determine whether %s exists: %m", path);
1985                 } else
1986                         return 0; /* Found it */
1987 
1988                 if (inotify_fd < 0) {
1989                         /* We need to wait for the device symlink to show up, let's create an inotify watch for it */
1990                         inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1991                         if (inotify_fd < 0)
1992                                 return log_error_errno(errno, "Failed to allocate inotify fd: %m");
1993                 }
1994 
1995                 dn = dirname_malloc(path);
1996                 for (;;) {
1997                         if (!dn)
1998                                 return log_oom();
1999 
2000                         log_info("Watching %s", dn);
2001 
2002                         if (inotify_add_watch(inotify_fd, dn, IN_CREATE|IN_MOVED_TO|IN_ONLYDIR|IN_DELETE_SELF|IN_MOVE_SELF) < 0) {
2003                                 if (errno != ENOENT)
2004                                         return log_error_errno(errno, "Failed to add watch on %s: %m", dn);
2005                         } else
2006                                 break;
2007 
2008                         if (empty_or_root(dn))
2009                                 break;
2010 
2011                         dn = dirname_malloc(dn);
2012                 }
2013 
2014                 w = now(CLOCK_MONOTONIC);
2015                 if (w >= until)
2016                         return log_error_errno(SYNTHETIC_ERRNO(ETIMEDOUT), "Device link %s still hasn't shown up, giving up.", path);
2017 
2018                 r = fd_wait_for_event(inotify_fd, POLLIN, usec_sub_unsigned(until, w));
2019                 if (r < 0)
2020                         return log_error_errno(r, "Failed to watch inotify: %m");
2021 
2022                 (void) flush_fd(inotify_fd);
2023         }
2024 }
2025 
calculate_initial_image_size(UserRecord * h,int image_fd,const char * fstype,uint64_t * ret)2026 static int calculate_initial_image_size(UserRecord *h, int image_fd, const char *fstype, uint64_t *ret) {
2027         uint64_t upper_boundary, lower_boundary;
2028         struct statfs sfs;
2029 
2030         assert(h);
2031         assert(image_fd >= 0);
2032         assert(ret);
2033 
2034         if (fstatfs(image_fd, &sfs) < 0)
2035                 return log_error_errno(errno, "statfs() on image failed: %m");
2036 
2037         upper_boundary = DISK_SIZE_ROUND_DOWN((uint64_t) sfs.f_bsize * sfs.f_bavail);
2038 
2039         if (h->disk_size != UINT64_MAX)
2040                 *ret = MIN(DISK_SIZE_ROUND_DOWN(h->disk_size), upper_boundary);
2041         else if (h->disk_size_relative == UINT64_MAX) {
2042 
2043                 if (upper_boundary > UINT64_MAX / USER_DISK_SIZE_DEFAULT_PERCENT)
2044                         return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Disk size too large.");
2045 
2046                 *ret = DISK_SIZE_ROUND_DOWN(upper_boundary * USER_DISK_SIZE_DEFAULT_PERCENT / 100);
2047 
2048                 log_info("Sizing home to %u%% of available disk space, which is %s.",
2049                          USER_DISK_SIZE_DEFAULT_PERCENT,
2050                          FORMAT_BYTES(*ret));
2051         } else {
2052                 *ret = DISK_SIZE_ROUND_DOWN((uint64_t) ((double) upper_boundary * (double) CLAMP(h->disk_size_relative, 0U, UINT32_MAX) / (double) UINT32_MAX));
2053 
2054                 log_info("Sizing home to %" PRIu64 ".%01" PRIu64 "%% of available disk space, which is %s.",
2055                          (h->disk_size_relative * 100) / UINT32_MAX,
2056                          ((h->disk_size_relative * 1000) / UINT32_MAX) % 10,
2057                          FORMAT_BYTES(*ret));
2058         }
2059 
2060         lower_boundary = minimal_size_by_fs_name(fstype);
2061         if (lower_boundary != UINT64_MAX) {
2062                 assert(GPT_LUKS2_OVERHEAD < UINT64_MAX - lower_boundary);
2063                 lower_boundary += GPT_LUKS2_OVERHEAD;
2064         }
2065         if (lower_boundary == UINT64_MAX || lower_boundary < USER_DISK_SIZE_MIN)
2066                 lower_boundary = USER_DISK_SIZE_MIN;
2067 
2068         if (*ret < lower_boundary)
2069                 *ret = lower_boundary;
2070 
2071         return 0;
2072 }
2073 
home_truncate(UserRecord * h,int fd,uint64_t size)2074 static int home_truncate(
2075                 UserRecord *h,
2076                 int fd,
2077                 uint64_t size) {
2078 
2079         bool trunc;
2080         int r;
2081 
2082         assert(h);
2083         assert(fd >= 0);
2084 
2085         trunc = user_record_luks_discard(h);
2086         if (!trunc) {
2087                 r = fallocate(fd, 0, 0, size);
2088                 if (r < 0 && ERRNO_IS_NOT_SUPPORTED(errno)) {
2089                         /* Some file systems do not support fallocate(), let's gracefully degrade
2090                          * (ZFS, reiserfs, …) and fall back to truncation */
2091                         log_notice_errno(errno, "Backing file system does not support fallocate(), falling back to ftruncate(), i.e. implicitly using non-discard mode.");
2092                         trunc = true;
2093                 }
2094         }
2095 
2096         if (trunc)
2097                 r = ftruncate(fd, size);
2098 
2099         if (r < 0) {
2100                 if (ERRNO_IS_DISK_SPACE(errno)) {
2101                         log_debug_errno(errno, "Not enough disk space to allocate home of size %s.", FORMAT_BYTES(size));
2102                         return -ENOSPC; /* make recognizable */
2103                 }
2104 
2105                 return log_error_errno(errno, "Failed to truncate home image: %m");
2106         }
2107 
2108         return !trunc; /* Return == 0 if we managed to truncate, > 0 if we managed to allocate */
2109 }
2110 
home_create_luks(UserRecord * h,HomeSetup * setup,const PasswordCache * cache,char ** effective_passwords,UserRecord ** ret_home)2111 int home_create_luks(
2112                 UserRecord *h,
2113                 HomeSetup *setup,
2114                 const PasswordCache *cache,
2115                 char **effective_passwords,
2116                 UserRecord **ret_home) {
2117 
2118         _cleanup_free_ char *subdir = NULL, *disk_uuid_path = NULL;
2119         uint64_t encrypted_size,
2120                 host_size = 0, partition_offset = 0, partition_size = 0; /* Unnecessary initialization to appease gcc */
2121         _cleanup_(user_record_unrefp) UserRecord *new_home = NULL;
2122         sd_id128_t partition_uuid, fs_uuid, luks_uuid, disk_uuid;
2123         _cleanup_close_ int mount_fd = -1;
2124         const char *fstype, *ip;
2125         struct statfs sfs;
2126         int r;
2127 
2128         assert(h);
2129         assert(h->storage < 0 || h->storage == USER_LUKS);
2130         assert(setup);
2131         assert(!setup->temporary_image_path);
2132         assert(setup->image_fd < 0);
2133         assert(ret_home);
2134 
2135         r = dlopen_cryptsetup();
2136         if (r < 0)
2137                 return r;
2138 
2139         assert_se(ip = user_record_image_path(h));
2140 
2141         fstype = user_record_file_system_type(h);
2142         if (!supported_fstype(fstype))
2143                 return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "Unsupported file system type: %s", fstype);
2144 
2145         r = mkfs_exists(fstype);
2146         if (r < 0)
2147                 return log_error_errno(r, "Failed to check if mkfs binary for %s exists: %m", fstype);
2148         if (r == 0) {
2149                 if (h->file_system_type || streq(fstype, "ext4") || !supported_fstype("ext4"))
2150                         return log_error_errno(SYNTHETIC_ERRNO(EPROTONOSUPPORT), "mkfs binary for file system type %s does not exist.", fstype);
2151 
2152                 /* If the record does not explicitly declare a file system to use, and the compiled-in
2153                  * default does not actually exist, than do an automatic fallback onto ext4, as the baseline
2154                  * fs of Linux. We won't search for a working fs type here beyond ext4, i.e. nothing fancier
2155                  * than a single, conservative fallback to baseline. This should be useful in minimal
2156                  * environments where mkfs.btrfs or so are not made available, but mkfs.ext4 as Linux' most
2157                  * boring, most basic fs is. */
2158                 log_info("Formatting tool for compiled-in default file system %s not available, falling back to ext4 instead.", fstype);
2159                 fstype = "ext4";
2160         }
2161 
2162         if (sd_id128_is_null(h->partition_uuid)) {
2163                 r = sd_id128_randomize(&partition_uuid);
2164                 if (r < 0)
2165                         return log_error_errno(r, "Failed to acquire partition UUID: %m");
2166         } else
2167                 partition_uuid = h->partition_uuid;
2168 
2169         if (sd_id128_is_null(h->luks_uuid)) {
2170                 r = sd_id128_randomize(&luks_uuid);
2171                 if (r < 0)
2172                         return log_error_errno(r, "Failed to acquire LUKS UUID: %m");
2173         } else
2174                 luks_uuid = h->luks_uuid;
2175 
2176         if (sd_id128_is_null(h->file_system_uuid)) {
2177                 r = sd_id128_randomize(&fs_uuid);
2178                 if (r < 0)
2179                         return log_error_errno(r, "Failed to acquire file system UUID: %m");
2180         } else
2181                 fs_uuid = h->file_system_uuid;
2182 
2183         r = make_dm_names(h, setup);
2184         if (r < 0)
2185                 return r;
2186 
2187         r = access(setup->dm_node, F_OK);
2188         if (r < 0) {
2189                 if (errno != ENOENT)
2190                         return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2191         } else
2192                 return log_error_errno(SYNTHETIC_ERRNO(EEXIST), "Device mapper device %s already exists, refusing.", setup->dm_node);
2193 
2194         if (path_startswith(ip, "/dev/")) {
2195                 _cleanup_free_ char *sysfs = NULL;
2196                 uint64_t block_device_size;
2197                 struct stat st;
2198 
2199                 /* Let's place the home directory on a real device, i.e. an USB stick or such */
2200 
2201                 setup->image_fd = open_image_file(h, ip, &st);
2202                 if (setup->image_fd < 0)
2203                         return setup->image_fd;
2204 
2205                 if (!S_ISBLK(st.st_mode))
2206                         return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Device is not a block device, refusing.");
2207 
2208                 if (asprintf(&sysfs, "/sys/dev/block/" DEVNUM_FORMAT_STR "/partition", DEVNUM_FORMAT_VAL(st.st_rdev)) < 0)
2209                         return log_oom();
2210                 if (access(sysfs, F_OK) < 0) {
2211                         if (errno != ENOENT)
2212                                 return log_error_errno(errno, "Failed to check whether %s exists: %m", sysfs);
2213                 } else
2214                         return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Operating on partitions is currently not supported, sorry. Please specify a top-level block device.");
2215 
2216                 if (flock(setup->image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
2217                         return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
2218 
2219                 if (ioctl(setup->image_fd, BLKGETSIZE64, &block_device_size) < 0)
2220                         return log_error_errno(errno, "Failed to read block device size: %m");
2221 
2222                 if (h->disk_size == UINT64_MAX) {
2223 
2224                         /* If a relative disk size is requested, apply it relative to the block device size */
2225                         if (h->disk_size_relative < UINT32_MAX)
2226                                 host_size = CLAMP(DISK_SIZE_ROUND_DOWN(block_device_size * h->disk_size_relative / UINT32_MAX),
2227                                                   USER_DISK_SIZE_MIN, USER_DISK_SIZE_MAX);
2228                         else
2229                                 host_size = block_device_size; /* Otherwise, take the full device */
2230 
2231                 } else if (h->disk_size > block_device_size)
2232                         return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Selected disk size larger than backing block device, refusing.");
2233                 else
2234                         host_size = DISK_SIZE_ROUND_DOWN(h->disk_size);
2235 
2236                 if (!supported_fs_size(fstype, LESS_BY(host_size, GPT_LUKS2_OVERHEAD)))
2237                         return log_error_errno(SYNTHETIC_ERRNO(ERANGE),
2238                                                "Selected file system size too small for %s.", fstype);
2239 
2240                 /* After creation we should reference this partition by its UUID instead of the block
2241                  * device. That's preferable since the user might have specified a device node such as
2242                  * /dev/sdb to us, which might look very different when replugged. */
2243                 if (asprintf(&disk_uuid_path, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(luks_uuid)) < 0)
2244                         return log_oom();
2245 
2246                 if (user_record_luks_discard(h) || user_record_luks_offline_discard(h)) {
2247                         /* If we want online or offline discard, discard once before we start using things. */
2248 
2249                         if (ioctl(setup->image_fd, BLKDISCARD, (uint64_t[]) { 0, block_device_size }) < 0)
2250                                 log_full_errno(errno == EOPNOTSUPP ? LOG_DEBUG : LOG_WARNING, errno,
2251                                                "Failed to issue full-device BLKDISCARD on device, ignoring: %m");
2252                         else
2253                                 log_info("Full device discard completed.");
2254                 }
2255         } else {
2256                 _cleanup_free_ char *t = NULL;
2257 
2258                 r = mkdir_parents(ip, 0755);
2259                 if (r < 0)
2260                         return log_error_errno(r, "Failed to create parent directory of %s: %m", ip);
2261 
2262                 r = tempfn_random(ip, "homework", &t);
2263                 if (r < 0)
2264                         return log_error_errno(r, "Failed to derive temporary file name for %s: %m", ip);
2265 
2266                 setup->image_fd = open(t, O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0600);
2267                 if (setup->image_fd < 0)
2268                         return log_error_errno(errno, "Failed to create home image %s: %m", t);
2269 
2270                 setup->temporary_image_path = TAKE_PTR(t);
2271 
2272                 r = chattr_full(t, setup->image_fd, FS_NOCOW_FL|FS_NOCOMP_FL, FS_NOCOW_FL|FS_NOCOMP_FL, NULL, NULL, CHATTR_FALLBACK_BITWISE);
2273                 if (r < 0 && r != -ENOANO) /* ENOANO → some bits didn't work; which we skip logging about because chattr_full() already debug logs about those flags */
2274                         log_full_errno(ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
2275                                        "Failed to set file attributes on %s, ignoring: %m", setup->temporary_image_path);
2276 
2277                 r = calculate_initial_image_size(h, setup->image_fd, fstype, &host_size);
2278                 if (r < 0)
2279                         return r;
2280 
2281                 r = resize_image_loop(h, setup, 0, host_size, &host_size);
2282                 if (r < 0)
2283                         return r;
2284 
2285                 log_info("Allocating image file completed.");
2286         }
2287 
2288         r = make_partition_table(
2289                         setup->image_fd,
2290                         user_record_user_name_and_realm(h),
2291                         partition_uuid,
2292                         &partition_offset,
2293                         &partition_size,
2294                         &disk_uuid);
2295         if (r < 0)
2296                 return r;
2297 
2298         log_info("Writing of partition table completed.");
2299 
2300         r = loop_device_make(setup->image_fd, O_RDWR, partition_offset, partition_size, 0, &setup->loop);
2301         if (r < 0) {
2302                 if (r == -ENOENT) { /* this means /dev/loop-control doesn't exist, i.e. we are in a container
2303                                      * or similar and loopback bock devices are not available, return a
2304                                      * recognizable error in this case. */
2305                         log_error_errno(r, "Loopback block device support is not available on this system.");
2306                         return -ENOLINK; /* Make recognizable */
2307                 }
2308 
2309                 return log_error_errno(r, "Failed to set up loopback device for %s: %m", setup->temporary_image_path);
2310         }
2311 
2312         r = loop_device_flock(setup->loop, LOCK_EX); /* make sure udev won't read before we are done */
2313         if (r < 0)
2314                 return log_error_errno(r, "Failed to take lock on loop device: %m");
2315 
2316         log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
2317 
2318         r = luks_format(setup->loop->node,
2319                         setup->dm_name,
2320                         luks_uuid,
2321                         user_record_user_name_and_realm(h),
2322                         cache,
2323                         effective_passwords,
2324                         user_record_luks_discard(h) || user_record_luks_offline_discard(h),
2325                         h,
2326                         &setup->crypt_device);
2327         if (r < 0)
2328                 return r;
2329 
2330         setup->undo_dm = true;
2331 
2332         r = block_get_size_by_path(setup->dm_node, &encrypted_size);
2333         if (r < 0)
2334                 return log_error_errno(r, "Failed to get encrypted block device size: %m");
2335 
2336         log_info("Setting up LUKS device %s completed.", setup->dm_node);
2337 
2338         r = make_filesystem(setup->dm_node, fstype, user_record_user_name_and_realm(h), fs_uuid, user_record_luks_discard(h));
2339         if (r < 0)
2340                 return r;
2341 
2342         log_info("Formatting file system completed.");
2343 
2344         r = home_unshare_and_mount(setup->dm_node, fstype, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
2345         if (r < 0)
2346                 return r;
2347 
2348         setup->undo_mount = true;
2349 
2350         subdir = path_join(HOME_RUNTIME_WORK_DIR, user_record_user_name_and_realm(h));
2351         if (!subdir)
2352                 return log_oom();
2353 
2354         /* Prefer using a btrfs subvolume if we can, fall back to directory otherwise */
2355         r = btrfs_subvol_make_fallback(subdir, 0700);
2356         if (r < 0)
2357                 return log_error_errno(r, "Failed to create user directory in mounted image file: %m");
2358 
2359         setup->root_fd = open(subdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2360         if (setup->root_fd < 0)
2361                 return log_error_errno(errno, "Failed to open user directory in mounted image file: %m");
2362 
2363         (void) home_shift_uid(setup->root_fd, NULL, UID_NOBODY, h->uid, &mount_fd);
2364 
2365         if (mount_fd >= 0) {
2366                 /* If we have established a new mount, then we can use that as new root fd to our home directory. */
2367                 safe_close(setup->root_fd);
2368 
2369                 setup->root_fd = fd_reopen(mount_fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
2370                 if (setup->root_fd < 0)
2371                         return log_error_errno(setup->root_fd, "Unable to convert mount fd into proper directory fd: %m");
2372 
2373                 mount_fd = safe_close(mount_fd);
2374         }
2375 
2376         r = home_populate(h, setup->root_fd);
2377         if (r < 0)
2378                 return r;
2379 
2380         r = home_sync_and_statfs(setup->root_fd, &sfs);
2381         if (r < 0)
2382                 return r;
2383 
2384         r = user_record_clone(h, USER_RECORD_LOAD_MASK_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE, &new_home);
2385         if (r < 0)
2386                 return log_error_errno(r, "Failed to clone record: %m");
2387 
2388         r = user_record_add_binding(
2389                         new_home,
2390                         USER_LUKS,
2391                         disk_uuid_path ?: ip,
2392                         partition_uuid,
2393                         luks_uuid,
2394                         fs_uuid,
2395                         sym_crypt_get_cipher(setup->crypt_device),
2396                         sym_crypt_get_cipher_mode(setup->crypt_device),
2397                         luks_volume_key_size_convert(setup->crypt_device),
2398                         fstype,
2399                         NULL,
2400                         h->uid,
2401                         (gid_t) h->uid);
2402         if (r < 0)
2403                 return log_error_errno(r, "Failed to add binding to record: %m");
2404 
2405         if (user_record_luks_offline_discard(h)) {
2406                 r = run_fitrim(setup->root_fd);
2407                 if (r < 0)
2408                         return r;
2409         }
2410 
2411         setup->root_fd = safe_close(setup->root_fd);
2412 
2413         r = home_setup_undo_mount(setup, LOG_ERR);
2414         if (r < 0)
2415                 return r;
2416 
2417         r = home_setup_undo_dm(setup, LOG_ERR);
2418         if (r < 0)
2419                 return r;
2420 
2421         setup->loop = loop_device_unref(setup->loop);
2422 
2423         if (!user_record_luks_offline_discard(h)) {
2424                 r= run_fallocate(setup->image_fd, NULL /* refresh stat() data */);
2425                 if (r < 0)
2426                         return r;
2427         }
2428 
2429         /* Sync everything to disk before we move things into place under the final name. */
2430         if (fsync(setup->image_fd) < 0)
2431                 return log_error_errno(r, "Failed to synchronize image to disk: %m");
2432 
2433         if (disk_uuid_path)
2434                 /* Reread partition table if this is a block device */
2435                 (void) ioctl(setup->image_fd, BLKRRPART, 0);
2436         else {
2437                 assert(setup->temporary_image_path);
2438 
2439                 if (rename(setup->temporary_image_path, ip) < 0)
2440                         return log_error_errno(errno, "Failed to rename image file: %m");
2441 
2442                 setup->temporary_image_path = mfree(setup->temporary_image_path);
2443 
2444                 /* If we operate on a file, sync the containing directory too. */
2445                 r = fsync_directory_of_file(setup->image_fd);
2446                 if (r < 0)
2447                         return log_error_errno(r, "Failed to synchronize directory of image file to disk: %m");
2448 
2449                 log_info("Moved image file into place.");
2450         }
2451 
2452         /* Let's close the image fd now. If we are operating on a real block device this will release the BSD
2453          * lock that ensures udev doesn't interfere with what we are doing */
2454         setup->image_fd = safe_close(setup->image_fd);
2455 
2456         if (disk_uuid_path)
2457                 (void) wait_for_devlink(disk_uuid_path);
2458 
2459         log_info("Creation completed.");
2460 
2461         print_size_summary(host_size, encrypted_size, &sfs);
2462 
2463         log_debug("GPT + LUKS2 overhead is %" PRIu64 " (expected %" PRIu64 ")", host_size - encrypted_size, GPT_LUKS2_OVERHEAD);
2464 
2465         *ret_home = TAKE_PTR(new_home);
2466         return 0;
2467 }
2468 
home_get_state_luks(UserRecord * h,HomeSetup * setup)2469 int home_get_state_luks(UserRecord *h, HomeSetup *setup) {
2470         int r;
2471 
2472         assert(h);
2473         assert(setup);
2474 
2475         r = make_dm_names(h, setup);
2476         if (r < 0)
2477                 return r;
2478 
2479         r = access(setup->dm_node, F_OK);
2480         if (r < 0 && errno != ENOENT)
2481                 return log_error_errno(errno, "Failed to determine whether %s exists: %m", setup->dm_node);
2482 
2483         return r >= 0;
2484 }
2485 
2486 enum {
2487         CAN_RESIZE_ONLINE,
2488         CAN_RESIZE_OFFLINE,
2489 };
2490 
can_resize_fs(int fd,uint64_t old_size,uint64_t new_size)2491 static int can_resize_fs(int fd, uint64_t old_size, uint64_t new_size) {
2492         struct statfs sfs;
2493 
2494         assert(fd >= 0);
2495 
2496         /* Filter out bogus requests early */
2497         if (old_size == 0 || old_size == UINT64_MAX ||
2498             new_size == 0 || new_size == UINT64_MAX)
2499                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid resize parameters.");
2500 
2501         if ((old_size & 511) != 0 || (new_size & 511) != 0)
2502                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Resize parameters not multiple of 512.");
2503 
2504         if (fstatfs(fd, &sfs) < 0)
2505                 return log_error_errno(errno, "Failed to fstatfs() file system: %m");
2506 
2507         if (is_fs_type(&sfs, BTRFS_SUPER_MAGIC)) {
2508 
2509                 if (new_size < BTRFS_MINIMAL_SIZE)
2510                         return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for btrfs (needs to be 256M at least.");
2511 
2512                 /* btrfs can grow and shrink online */
2513 
2514         } else if (is_fs_type(&sfs, XFS_SB_MAGIC)) {
2515 
2516                 if (new_size < XFS_MINIMAL_SIZE)
2517                         return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for xfs (needs to be 14M at least).");
2518 
2519                 /* XFS can grow, but not shrink */
2520                 if (new_size < old_size)
2521                         return log_error_errno(SYNTHETIC_ERRNO(EMSGSIZE), "Shrinking this type of file system is not supported.");
2522 
2523         } else if (is_fs_type(&sfs, EXT4_SUPER_MAGIC)) {
2524 
2525                 if (new_size < EXT4_MINIMAL_SIZE)
2526                         return log_error_errno(SYNTHETIC_ERRNO(ERANGE), "New file system size too small for ext4 (needs to be 1M at least).");
2527 
2528                 /* ext4 can grow online, and shrink offline */
2529                 if (new_size < old_size)
2530                         return CAN_RESIZE_OFFLINE;
2531 
2532         } else
2533                 return log_error_errno(SYNTHETIC_ERRNO(ESOCKTNOSUPPORT), "Resizing this type of file system is not supported.");
2534 
2535         return CAN_RESIZE_ONLINE;
2536 }
2537 
ext4_offline_resize_fs(HomeSetup * setup,uint64_t new_size,bool discard,unsigned long flags,const char * extra_mount_options)2538 static int ext4_offline_resize_fs(
2539                 HomeSetup *setup,
2540                 uint64_t new_size,
2541                 bool discard,
2542                 unsigned long flags,
2543                 const char *extra_mount_options) {
2544 
2545         _cleanup_free_ char *size_str = NULL;
2546         bool re_open = false, re_mount = false;
2547         pid_t resize_pid, fsck_pid;
2548         int r, exit_status;
2549 
2550         assert(setup);
2551         assert(setup->dm_node);
2552 
2553         /* First, unmount the file system */
2554         if (setup->root_fd >= 0) {
2555                 setup->root_fd = safe_close(setup->root_fd);
2556                 re_open = true;
2557         }
2558 
2559         if (setup->undo_mount) {
2560                 r = home_setup_undo_mount(setup, LOG_ERR);
2561                 if (r < 0)
2562                         return r;
2563 
2564                 re_mount = true;
2565         }
2566 
2567         log_info("Temporary unmounting of file system completed.");
2568 
2569         /* resize2fs requires that the file system is force checked first, do so. */
2570         r = safe_fork("(e2fsck)",
2571                       FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2572                       &fsck_pid);
2573         if (r < 0)
2574                 return r;
2575         if (r == 0) {
2576                 /* Child */
2577                 execlp("e2fsck" ,"e2fsck", "-fp", setup->dm_node, NULL);
2578                 log_open();
2579                 log_error_errno(errno, "Failed to execute e2fsck: %m");
2580                 _exit(EXIT_FAILURE);
2581         }
2582 
2583         exit_status = wait_for_terminate_and_check("e2fsck", fsck_pid, WAIT_LOG_ABNORMAL);
2584         if (exit_status < 0)
2585                 return exit_status;
2586         if ((exit_status & ~FSCK_ERROR_CORRECTED) != 0) {
2587                 log_warning("e2fsck failed with exit status %i.", exit_status);
2588 
2589                 if ((exit_status & (FSCK_SYSTEM_SHOULD_REBOOT|FSCK_ERRORS_LEFT_UNCORRECTED)) != 0)
2590                         return log_error_errno(SYNTHETIC_ERRNO(EIO), "File system is corrupted, refusing.");
2591 
2592                 log_warning("Ignoring fsck error.");
2593         }
2594 
2595         log_info("Forced file system check completed.");
2596 
2597         /* We use 512 sectors here, because resize2fs doesn't do byte sizes */
2598         if (asprintf(&size_str, "%" PRIu64 "s", new_size / 512) < 0)
2599                 return log_oom();
2600 
2601         /* Resize the thing */
2602         r = safe_fork("(e2resize)",
2603                       FORK_RESET_SIGNALS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_LOG|FORK_WAIT|FORK_STDOUT_TO_STDERR|FORK_CLOSE_ALL_FDS,
2604                       &resize_pid);
2605         if (r < 0)
2606                 return r;
2607         if (r == 0) {
2608                 /* Child */
2609                 execlp("resize2fs" ,"resize2fs", setup->dm_node, size_str, NULL);
2610                 log_open();
2611                 log_error_errno(errno, "Failed to execute resize2fs: %m");
2612                 _exit(EXIT_FAILURE);
2613         }
2614 
2615         log_info("Offline file system resize completed.");
2616 
2617         /* Re-establish mounts and reopen the directory */
2618         if (re_mount) {
2619                 r = home_mount_node(setup->dm_node, "ext4", discard, flags, extra_mount_options);
2620                 if (r < 0)
2621                         return r;
2622 
2623                 setup->undo_mount = true;
2624         }
2625 
2626         if (re_open) {
2627                 setup->root_fd = open(HOME_RUNTIME_WORK_DIR, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
2628                 if (setup->root_fd < 0)
2629                         return log_error_errno(errno, "Failed to reopen file system: %m");
2630         }
2631 
2632         log_info("File system mounted again.");
2633 
2634         return 0;
2635 }
2636 
prepare_resize_partition(int fd,uint64_t partition_offset,uint64_t old_partition_size,sd_id128_t * ret_disk_uuid,struct fdisk_table ** ret_table,struct fdisk_partition ** ret_partition)2637 static int prepare_resize_partition(
2638                 int fd,
2639                 uint64_t partition_offset,
2640                 uint64_t old_partition_size,
2641                 sd_id128_t *ret_disk_uuid,
2642                 struct fdisk_table **ret_table,
2643                 struct fdisk_partition **ret_partition) {
2644 
2645         _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2646         _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL;
2647         _cleanup_free_ char *path = NULL, *disk_uuid_as_string = NULL;
2648         struct fdisk_partition *found = NULL;
2649         sd_id128_t disk_uuid;
2650         size_t n_partitions;
2651         int r;
2652 
2653         assert(fd >= 0);
2654         assert(ret_disk_uuid);
2655         assert(ret_table);
2656 
2657         assert((partition_offset & 511) == 0);
2658         assert((old_partition_size & 511) == 0);
2659         assert(UINT64_MAX - old_partition_size >= partition_offset);
2660 
2661         if (partition_offset == 0) {
2662                 /* If the offset is at the beginning we assume no partition table, let's exit early. */
2663                 log_debug("Not rewriting partition table, operating on naked device.");
2664                 *ret_disk_uuid = SD_ID128_NULL;
2665                 *ret_table = NULL;
2666                 return 0;
2667         }
2668 
2669         c = fdisk_new_context();
2670         if (!c)
2671                 return log_oom();
2672 
2673         if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
2674                 return log_oom();
2675 
2676         r = fdisk_assign_device(c, path, 0);
2677         if (r < 0)
2678                 return log_error_errno(r, "Failed to open device: %m");
2679 
2680         if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT))
2681                 return log_error_errno(SYNTHETIC_ERRNO(ENOMEDIUM), "Disk has no GPT partition table.");
2682 
2683         r = fdisk_get_disklabel_id(c, &disk_uuid_as_string);
2684         if (r < 0)
2685                 return log_error_errno(r, "Failed to acquire disk UUID: %m");
2686 
2687         r = sd_id128_from_string(disk_uuid_as_string, &disk_uuid);
2688         if (r < 0)
2689                 return log_error_errno(r, "Failed parse disk UUID: %m");
2690 
2691         r = fdisk_get_partitions(c, &t);
2692         if (r < 0)
2693                 return log_error_errno(r, "Failed to acquire partition table: %m");
2694 
2695         n_partitions = fdisk_table_get_nents(t);
2696         for (size_t i = 0; i < n_partitions; i++)  {
2697                 struct fdisk_partition *p;
2698 
2699                 p = fdisk_table_get_partition(t, i);
2700                 if (!p)
2701                         return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m");
2702 
2703                 if (fdisk_partition_is_used(p) <= 0)
2704                         continue;
2705                 if (fdisk_partition_has_start(p) <= 0 || fdisk_partition_has_size(p) <= 0 || fdisk_partition_has_end(p) <= 0)
2706                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found partition without a size.");
2707 
2708                 if (fdisk_partition_get_start(p) == partition_offset / 512U &&
2709                     fdisk_partition_get_size(p) == old_partition_size / 512U) {
2710 
2711                         if (found)
2712                                 return log_error_errno(SYNTHETIC_ERRNO(ENOTUNIQ), "Partition found twice, refusing.");
2713 
2714                         found = p;
2715                 } else if (fdisk_partition_get_end(p) > partition_offset / 512U)
2716                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Can't extend, not last partition in image.");
2717         }
2718 
2719         if (!found)
2720                 return log_error_errno(SYNTHETIC_ERRNO(ENOPKG), "Failed to find matching partition to resize.");
2721 
2722         *ret_disk_uuid = disk_uuid;
2723         *ret_table = TAKE_PTR(t);
2724         *ret_partition = found;
2725 
2726         return 1;
2727 }
2728 
ask_cb(struct fdisk_context * c,struct fdisk_ask * ask,void * userdata)2729 static int ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *userdata) {
2730         char *result;
2731 
2732         assert(c);
2733 
2734         switch (fdisk_ask_get_type(ask)) {
2735 
2736         case FDISK_ASKTYPE_STRING:
2737                 result = new(char, 37);
2738                 if (!result)
2739                         return log_oom();
2740 
2741                 fdisk_ask_string_set_result(ask, sd_id128_to_uuid_string(*(sd_id128_t*) userdata, result));
2742                 break;
2743 
2744         default:
2745                 log_debug("Unexpected question from libfdisk, ignoring.");
2746         }
2747 
2748         return 0;
2749 }
2750 
apply_resize_partition(int fd,sd_id128_t disk_uuids,struct fdisk_table * t,struct fdisk_partition * p,size_t new_partition_size)2751 static int apply_resize_partition(
2752                 int fd,
2753                 sd_id128_t disk_uuids,
2754                 struct fdisk_table *t,
2755                 struct fdisk_partition *p,
2756                 size_t new_partition_size) {
2757 
2758         _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL;
2759         _cleanup_free_ void *two_zero_lbas = NULL;
2760         _cleanup_free_ char *path = NULL;
2761         ssize_t n;
2762         int r;
2763 
2764         assert(fd >= 0);
2765         assert(!t == !p);
2766 
2767         if (!t) /* no partition table to apply, exit early */
2768                 return 0;
2769 
2770         assert(p);
2771 
2772         /* Before writing our partition patch the final size in */
2773         r = fdisk_partition_size_explicit(p, 1);
2774         if (r < 0)
2775                 return log_error_errno(r, "Failed to enable explicit partition size: %m");
2776 
2777         r = fdisk_partition_set_size(p, new_partition_size / 512U);
2778         if (r < 0)
2779                 return log_error_errno(r, "Failed to change partition size: %m");
2780 
2781         two_zero_lbas = malloc0(1024U);
2782         if (!two_zero_lbas)
2783                 return log_oom();
2784 
2785         /* libfdisk appears to get confused by the existing PMBR. Let's explicitly flush it out. */
2786         n = pwrite(fd, two_zero_lbas, 1024U, 0);
2787         if (n < 0)
2788                 return log_error_errno(errno, "Failed to wipe partition table: %m");
2789         if (n != 1024)
2790                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short write while wiping partition table.");
2791 
2792         c = fdisk_new_context();
2793         if (!c)
2794                 return log_oom();
2795 
2796         if (asprintf(&path, "/proc/self/fd/%i", fd) < 0)
2797                 return log_oom();
2798 
2799         r = fdisk_assign_device(c, path, 0);
2800         if (r < 0)
2801                 return log_error_errno(r, "Failed to open device: %m");
2802 
2803         r = fdisk_create_disklabel(c, "gpt");
2804         if (r < 0)
2805                 return log_error_errno(r, "Failed to create GPT disk label: %m");
2806 
2807         r = fdisk_apply_table(c, t);
2808         if (r < 0)
2809                 return log_error_errno(r, "Failed to apply partition table: %m");
2810 
2811         r = fdisk_set_ask(c, ask_cb, &disk_uuids);
2812         if (r < 0)
2813                 return log_error_errno(r, "Failed to set libfdisk query function: %m");
2814 
2815         r = fdisk_set_disklabel_id(c);
2816         if (r < 0)
2817                 return log_error_errno(r, "Failed to change disklabel ID: %m");
2818 
2819         r = fdisk_write_disklabel(c);
2820         if (r < 0)
2821                 return log_error_errno(r, "Failed to write disk label: %m");
2822 
2823         return 1;
2824 }
2825 
2826 /* Always keep at least 16M free, so that we can safely log in and update the user record while doing so */
2827 #define HOME_MIN_FREE (16U*1024U*1024U)
2828 
get_smallest_fs_size(int fd,uint64_t * ret)2829 static int get_smallest_fs_size(int fd, uint64_t *ret) {
2830         uint64_t minsz, needed;
2831         struct statfs sfs;
2832 
2833         assert(fd >= 0);
2834         assert(ret);
2835 
2836         /* Determines the minimal disk size we might be able to shrink the file system referenced by the fd to. */
2837 
2838         if (syncfs(fd) < 0) /* let's sync before we query the size, so that the values returned are accurate */
2839                 return log_error_errno(errno, "Failed to synchronize home file system: %m");
2840 
2841         if (fstatfs(fd, &sfs) < 0)
2842                 return log_error_errno(errno, "Failed to statfs() home file system: %m");
2843 
2844         /* Let's determine the minimal file system size of the used fstype */
2845         minsz = minimal_size_by_fs_magic(sfs.f_type);
2846         if (minsz == UINT64_MAX)
2847                 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Don't know minimum file system size of file system type '%s' of home directory.", fs_type_to_string(sfs.f_type));
2848 
2849         if (minsz < USER_DISK_SIZE_MIN)
2850                 minsz = USER_DISK_SIZE_MIN;
2851 
2852         if (sfs.f_bfree > sfs.f_blocks)
2853                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Detected amount of free blocks is greater than the total amount of file system blocks. Refusing.");
2854 
2855         /* Calculate how much disk space is currently in use. */
2856         needed = sfs.f_blocks - sfs.f_bfree;
2857         if (needed > UINT64_MAX / sfs.f_bsize)
2858                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "File system size out of range.");
2859 
2860         needed *= sfs.f_bsize;
2861 
2862         /* Add some safety margin of free space we'll always keep */
2863         if (needed > UINT64_MAX - HOME_MIN_FREE) /* Check for overflow */
2864                 needed = UINT64_MAX;
2865         else
2866                 needed += HOME_MIN_FREE;
2867 
2868         *ret = DISK_SIZE_ROUND_UP(MAX(needed, minsz));
2869         return 0;
2870 }
2871 
get_largest_image_size(int fd,const struct stat * st,uint64_t * ret)2872 static int get_largest_image_size(int fd, const struct stat *st, uint64_t *ret) {
2873         uint64_t used, avail, sum;
2874         struct statfs sfs;
2875         int r;
2876 
2877         assert(fd >= 0);
2878         assert(st);
2879         assert(ret);
2880 
2881         /* Determines the maximum file size we might be able to grow the image file referenced by the fd to. */
2882 
2883         r = stat_verify_regular(st);
2884         if (r < 0)
2885                 return log_error_errno(r, "Image file is not a regular file, refusing: %m");
2886 
2887         if (syncfs(fd) < 0)
2888                 return log_error_errno(errno, "Failed to synchronize file system backing image file: %m");
2889 
2890         if (fstatfs(fd, &sfs) < 0)
2891                 return log_error_errno(errno, "Failed to statfs() image file: %m");
2892 
2893         used = (uint64_t) st->st_blocks * 512;
2894         avail = (uint64_t) sfs.f_bsize * sfs.f_bavail;
2895 
2896         if (avail > UINT64_MAX - used)
2897                 sum = UINT64_MAX;
2898         else
2899                 sum = avail + used;
2900 
2901         *ret = DISK_SIZE_ROUND_DOWN(MIN(sum, USER_DISK_SIZE_MAX));
2902         return 0;
2903 }
2904 
resize_fs_loop(UserRecord * h,HomeSetup * setup,int resize_type,uint64_t old_fs_size,uint64_t new_fs_size,uint64_t * ret_fs_size)2905 static int resize_fs_loop(
2906                 UserRecord *h,
2907                 HomeSetup *setup,
2908                 int resize_type,
2909                 uint64_t old_fs_size,
2910                 uint64_t new_fs_size,
2911                 uint64_t *ret_fs_size) {
2912 
2913         uint64_t current_fs_size;
2914         unsigned n_iterations = 0;
2915         int r;
2916 
2917         assert(h);
2918         assert(setup);
2919         assert(setup->root_fd >= 0);
2920 
2921         /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
2922          * this only when we *shrink* the fs — if we grow the fs there's no need to bisect.) */
2923 
2924         current_fs_size = old_fs_size;
2925         for (uint64_t lower_boundary = new_fs_size, upper_boundary = old_fs_size, try_fs_size = new_fs_size;;) {
2926                 bool worked;
2927 
2928                 n_iterations++;
2929 
2930                 /* Now resize the file system */
2931                 if (resize_type == CAN_RESIZE_ONLINE) {
2932                         r = resize_fs(setup->root_fd, try_fs_size, NULL);
2933                         if (r < 0) {
2934                                 if (!ERRNO_IS_DISK_SPACE(r) || new_fs_size > old_fs_size) /* Not a disk space issue? Not trying to shrink? */
2935                                         return log_error_errno(r, "Failed to resize file system: %m");
2936 
2937                                 log_debug_errno(r, "Shrinking from %s to %s didn't work, not enough space for contained data.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2938                                 worked = false;
2939                         } else {
2940                                 log_debug("Successfully resized from %s to %s.", FORMAT_BYTES(current_fs_size), FORMAT_BYTES(try_fs_size));
2941                                 current_fs_size = try_fs_size;
2942                                 worked = true;
2943                         }
2944 
2945                         /* If we hit a disk space issue and are shrinking the fs, then maybe it helps to
2946                          * increase the image size. */
2947                 } else {
2948                         r = ext4_offline_resize_fs(setup, try_fs_size, user_record_luks_discard(h), user_record_mount_flags(h), h->luks_extra_mount_options);
2949                         if (r < 0)
2950                                 return r;
2951 
2952                         /* For now, when we fail to shrink an ext4 image we'll not try again via the
2953                          * bisection logic. We might add that later, but give this involves shelling out
2954                          * multiple programs it's a bit too cumbersome to my taste. */
2955 
2956                         worked = true;
2957                         current_fs_size = try_fs_size;
2958                 }
2959 
2960                 if (new_fs_size > old_fs_size) /* If we are growing we are done after one iteration */
2961                         break;
2962 
2963                 /* If we are shrinking then let's adjust our bisection boundaries and try again. */
2964                 if (worked)
2965                         upper_boundary = MIN(upper_boundary, try_fs_size);
2966                 else
2967                         lower_boundary = MAX(lower_boundary, try_fs_size);
2968 
2969                 /* OK, this attempt to shrink didn't work. Let's try between the old size and what worked. */
2970                 if (lower_boundary >= upper_boundary) {
2971                         log_debug("Image can't be shrunk further (range to try is empty).");
2972                         break;
2973                 }
2974 
2975                 /* Let's find a new value to try half-way between the lower boundary and the upper boundary
2976                  * to try now. */
2977                 try_fs_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
2978                 if (try_fs_size <= lower_boundary || try_fs_size >= upper_boundary) {
2979                         log_debug("Image can't be shrunk further (remaining range to try too small).");
2980                         break;
2981                 }
2982         }
2983 
2984         log_debug("Bisection loop completed after %u iterations.", n_iterations);
2985 
2986         if (ret_fs_size)
2987                 *ret_fs_size = current_fs_size;
2988 
2989         return 0;
2990 }
2991 
resize_image_loop(UserRecord * h,HomeSetup * setup,uint64_t old_image_size,uint64_t new_image_size,uint64_t * ret_image_size)2992 static int resize_image_loop(
2993                 UserRecord *h,
2994                 HomeSetup *setup,
2995                 uint64_t old_image_size,
2996                 uint64_t new_image_size,
2997                 uint64_t *ret_image_size) {
2998 
2999         uint64_t current_image_size;
3000         unsigned n_iterations = 0;
3001         int r;
3002 
3003         assert(h);
3004         assert(setup);
3005         assert(setup->image_fd >= 0);
3006 
3007         /* A bisection loop trying to find the closest size to what the user asked for. (Well, we bisect like
3008          * this only when we *grow* the image — if we shrink the image then there's no need to bisect.) */
3009 
3010         current_image_size = old_image_size;
3011         for (uint64_t lower_boundary = old_image_size, upper_boundary = new_image_size, try_image_size = new_image_size;;) {
3012                 bool worked;
3013 
3014                 n_iterations++;
3015 
3016                 r = home_truncate(h, setup->image_fd, try_image_size);
3017                 if (r < 0) {
3018                         if (!ERRNO_IS_DISK_SPACE(r) || new_image_size < old_image_size) /* Not a disk space issue? Not trying to grow? */
3019                                 return r;
3020 
3021                         log_debug_errno(r, "Growing from %s to %s didn't work, not enough space on backing disk.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3022                         worked = false;
3023                 } else if (r > 0) { /* Success: allocation worked */
3024                         log_debug("Resizing from %s to %s via allocation worked successfully.", FORMAT_BYTES(current_image_size), FORMAT_BYTES(try_image_size));
3025                         current_image_size = try_image_size;
3026                         worked = true;
3027                 } else { /* Success, but through truncation, not allocation. */
3028                         log_debug("Resizing from %s to %s via truncation worked successfully.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(try_image_size));
3029                         current_image_size = try_image_size;
3030                         break; /* there's no point in the bisection logic if this was plain truncation and
3031                                 * not allocation, let's exit immediately. */
3032                 }
3033 
3034                 if (new_image_size < old_image_size) /* If we are shrinking we are done after one iteration */
3035                         break;
3036 
3037                 /* If we are growing then let's adjust our bisection boundaries and try again */
3038                 if (worked)
3039                         lower_boundary = MAX(lower_boundary, try_image_size);
3040                 else
3041                         upper_boundary = MIN(upper_boundary, try_image_size);
3042 
3043                 if (lower_boundary >= upper_boundary) {
3044                         log_debug("Image can't be grown further (range to try is empty).");
3045                         break;
3046                 }
3047 
3048                 try_image_size = DISK_SIZE_ROUND_DOWN(lower_boundary + (upper_boundary - lower_boundary) / 2);
3049                 if (try_image_size <= lower_boundary || try_image_size >= upper_boundary) {
3050                         log_debug("Image can't be grown further (remaining range to try too small).");
3051                         break;
3052                 }
3053         }
3054 
3055         log_debug("Bisection loop completed after %u iterations.", n_iterations);
3056 
3057         if (ret_image_size)
3058                 *ret_image_size = current_image_size;
3059 
3060         return 0;
3061 }
3062 
home_resize_luks(UserRecord * h,HomeSetupFlags flags,HomeSetup * setup,PasswordCache * cache,UserRecord ** ret_home)3063 int home_resize_luks(
3064                 UserRecord *h,
3065                 HomeSetupFlags flags,
3066                 HomeSetup *setup,
3067                 PasswordCache *cache,
3068                 UserRecord **ret_home) {
3069 
3070         uint64_t old_image_size, new_image_size, old_fs_size, new_fs_size, crypto_offset, crypto_offset_bytes,
3071                 new_partition_size, smallest_fs_size, resized_fs_size;
3072         _cleanup_(user_record_unrefp) UserRecord *header_home = NULL, *embedded_home = NULL, *new_home = NULL;
3073         _cleanup_(fdisk_unref_tablep) struct fdisk_table *table = NULL;
3074         struct fdisk_partition *partition = NULL;
3075         _cleanup_close_ int opened_image_fd = -1;
3076         _cleanup_free_ char *whole_disk = NULL;
3077         int r, resize_type, image_fd = -1;
3078         sd_id128_t disk_uuid;
3079         const char *ip, *ipo;
3080         struct statfs sfs;
3081         struct stat st;
3082         enum {
3083                 INTENTION_DONT_KNOW = 0,    /* These happen to match the return codes of CMP() */
3084                 INTENTION_SHRINK = -1,
3085                 INTENTION_GROW = 1,
3086         } intention = INTENTION_DONT_KNOW;
3087 
3088         assert(h);
3089         assert(user_record_storage(h) == USER_LUKS);
3090         assert(setup);
3091 
3092         r = dlopen_cryptsetup();
3093         if (r < 0)
3094                 return r;
3095 
3096         assert_se(ipo = user_record_image_path(h));
3097         ip = strdupa_safe(ipo); /* copy out since original might change later in home record object */
3098 
3099         if (setup->image_fd < 0) {
3100                 setup->image_fd = open_image_file(h, NULL, &st);
3101                 if (setup->image_fd < 0)
3102                         return setup->image_fd;
3103         } else {
3104                 if (fstat(setup->image_fd, &st) < 0)
3105                         return log_error_errno(errno, "Failed to stat image file %s: %m", ip);
3106         }
3107 
3108         image_fd = setup->image_fd;
3109 
3110         if (S_ISBLK(st.st_mode)) {
3111                 dev_t parent;
3112 
3113                 r = block_get_whole_disk(st.st_rdev, &parent);
3114                 if (r < 0)
3115                         return log_error_errno(r, "Failed to acquire whole block device for %s: %m", ip);
3116                 if (r > 0) {
3117                         /* If we shall resize a file system on a partition device, then let's figure out the
3118                          * whole disk device and operate on that instead, since we need to rewrite the
3119                          * partition table to resize the partition. */
3120 
3121                         log_info("Operating on partition device %s, using parent device.", ip);
3122 
3123                         r = device_path_make_major_minor(st.st_mode, parent, &whole_disk);
3124                         if (r < 0)
3125                                 return log_error_errno(r, "Failed to derive whole disk path for %s: %m", ip);
3126 
3127                         opened_image_fd = open(whole_disk, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
3128                         if (opened_image_fd < 0)
3129                                 return log_error_errno(errno, "Failed to open whole block device %s: %m", whole_disk);
3130 
3131                         image_fd = opened_image_fd;
3132 
3133                         if (fstat(image_fd, &st) < 0)
3134                                 return log_error_errno(errno, "Failed to stat whole block device %s: %m", whole_disk);
3135                         if (!S_ISBLK(st.st_mode))
3136                                 return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Whole block device %s is not actually a block device, refusing.", whole_disk);
3137                 } else
3138                         log_info("Operating on whole block device %s.", ip);
3139 
3140                 if (ioctl(image_fd, BLKGETSIZE64, &old_image_size) < 0)
3141                         return log_error_errno(errno, "Failed to determine size of original block device: %m");
3142 
3143                 if (flock(image_fd, LOCK_EX) < 0) /* make sure udev doesn't read from it while we operate on the device */
3144                         return log_error_errno(errno, "Failed to lock block device %s: %m", ip);
3145 
3146                 new_image_size = old_image_size; /* we can't resize physical block devices */
3147         } else {
3148                 r = stat_verify_regular(&st);
3149                 if (r < 0)
3150                         return log_error_errno(r, "Image %s is not a block device nor regular file: %m", ip);
3151 
3152                 old_image_size = st.st_size;
3153 
3154                 /* Note an asymetry here: when we operate on loopback files the specified disk size we get we
3155                  * apply onto the loopback file as a whole. When we operate on block devices we instead apply
3156                  * to the partition itself only. */
3157 
3158                 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3159                         new_image_size = 0;
3160                         intention = INTENTION_SHRINK;
3161                 } else {
3162                         uint64_t new_image_size_rounded;
3163 
3164                         new_image_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3165 
3166                         if (old_image_size >= new_image_size_rounded && old_image_size <= h->disk_size) {
3167                                 /* If exact match, or a match after we rounded down, don't do a thing */
3168                                 log_info("Image size already matching, skipping operation.");
3169                                 return 0;
3170                         }
3171 
3172                         new_image_size = new_image_size_rounded;
3173                         intention = CMP(new_image_size, old_image_size); /* Is this a shrink */
3174                 }
3175         }
3176 
3177         r = home_setup_luks(
3178                         h,
3179                         flags,
3180                         whole_disk,
3181                         setup,
3182                         cache,
3183                         FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES) ? NULL : &header_home);
3184         if (r < 0)
3185                 return r;
3186 
3187         if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3188                 r = home_load_embedded_identity(h, setup->root_fd, header_home, USER_RECONCILE_REQUIRE_NEWER_OR_EQUAL, cache, &embedded_home, &new_home);
3189                 if (r < 0)
3190                         return r;
3191         }
3192 
3193         r = home_maybe_shift_uid(h, flags, setup);
3194         if (r < 0)
3195                 return r;
3196 
3197         log_info("offset = %" PRIu64 ", size = %" PRIu64 ", image = %" PRIu64, setup->partition_offset, setup->partition_size, old_image_size);
3198 
3199         if ((UINT64_MAX - setup->partition_offset) < setup->partition_size ||
3200             setup->partition_offset + setup->partition_size > old_image_size)
3201                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Old partition doesn't fit in backing storage, refusing.");
3202 
3203         if (S_ISREG(st.st_mode)) {
3204                 uint64_t partition_table_extra, largest_size;
3205 
3206                 partition_table_extra = old_image_size - setup->partition_size;
3207 
3208                 r = get_largest_image_size(setup->image_fd, &st, &largest_size);
3209                 if (r < 0)
3210                         return r;
3211                 if (new_image_size > largest_size)
3212                         new_image_size = largest_size;
3213 
3214                 if (new_image_size < partition_table_extra)
3215                         new_image_size = partition_table_extra;
3216 
3217                 new_partition_size = DISK_SIZE_ROUND_DOWN(new_image_size - partition_table_extra);
3218         } else {
3219                 assert(S_ISBLK(st.st_mode));
3220 
3221                 if (FLAGS_SET(flags, HOME_SETUP_RESIZE_MINIMIZE)) {
3222                         new_partition_size = 0;
3223                         intention = INTENTION_SHRINK;
3224                 } else {
3225                         uint64_t new_partition_size_rounded;
3226 
3227                         new_partition_size_rounded = DISK_SIZE_ROUND_DOWN(h->disk_size);
3228 
3229                         if (setup->partition_size >= new_partition_size_rounded &&
3230                             setup->partition_size <= h->disk_size) {
3231                                 log_info("Partition size already matching, skipping operation.");
3232                                 return 0;
3233                         }
3234 
3235                         new_partition_size = new_partition_size_rounded;
3236                         intention = CMP(new_partition_size, setup->partition_size);
3237                 }
3238         }
3239 
3240         if ((UINT64_MAX - setup->partition_offset) < new_partition_size ||
3241             setup->partition_offset + new_partition_size > new_image_size)
3242                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "New partition doesn't fit into backing storage, refusing.");
3243 
3244         crypto_offset = sym_crypt_get_data_offset(setup->crypt_device);
3245         if (crypto_offset > UINT64_MAX/512U)
3246                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "LUKS2 data offset out of range, refusing.");
3247         crypto_offset_bytes = (uint64_t) crypto_offset * 512U;
3248         if (setup->partition_size <= crypto_offset_bytes)
3249                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Weird, old crypto payload offset doesn't actually fit in partition size?");
3250 
3251         /* Make sure at least the LUKS header fit in */
3252         if (new_partition_size <= crypto_offset_bytes) {
3253                 uint64_t add;
3254 
3255                 add = DISK_SIZE_ROUND_UP(crypto_offset_bytes) - new_partition_size;
3256                 new_partition_size += add;
3257                 if (S_ISREG(st.st_mode))
3258                         new_image_size += add;
3259         }
3260 
3261         old_fs_size = setup->partition_size - crypto_offset_bytes;
3262         new_fs_size = DISK_SIZE_ROUND_DOWN(new_partition_size - crypto_offset_bytes);
3263 
3264         r = get_smallest_fs_size(setup->root_fd, &smallest_fs_size);
3265         if (r < 0)
3266                 return r;
3267 
3268         if (new_fs_size < smallest_fs_size) {
3269                 uint64_t add;
3270 
3271                 add = DISK_SIZE_ROUND_UP(smallest_fs_size) - new_fs_size;
3272                 new_fs_size += add;
3273                 new_partition_size += add;
3274                 if (S_ISREG(st.st_mode))
3275                         new_image_size += add;
3276         }
3277 
3278         if (new_fs_size == old_fs_size) {
3279                 log_info("New file system size identical to old file system size, skipping operation.");
3280                 return 0;
3281         }
3282 
3283         if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_GROW) && new_fs_size > old_fs_size) {
3284                 log_info("New file system size would be larger than old, but shrinking requested, skipping operation.");
3285                 return 0;
3286         }
3287 
3288         if (FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SHRINK) && new_fs_size < old_fs_size) {
3289                 log_info("New file system size would be smaller than old, but growing requested, skipping operation.");
3290                 return 0;
3291         }
3292 
3293         if (CMP(new_fs_size, old_fs_size) != intention) {
3294                 if (intention < 0)
3295                         log_info("Shrink operation would enlarge file system, skipping operation.");
3296                 else {
3297                         assert(intention > 0);
3298                         log_info("Grow operation would shrink file system, skipping operation.");
3299                 }
3300                 return 0;
3301         }
3302 
3303         /* Before we start doing anything, let's figure out if we actually can */
3304         resize_type = can_resize_fs(setup->root_fd, old_fs_size, new_fs_size);
3305         if (resize_type < 0)
3306                 return resize_type;
3307         if (resize_type == CAN_RESIZE_OFFLINE && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3308                 return log_error_errno(SYNTHETIC_ERRNO(ETXTBSY), "File systems of this type can only be resized offline, but is currently online.");
3309 
3310         log_info("Ready to resize image size %s → %s, partition size %s → %s, file system size %s → %s.",
3311                  FORMAT_BYTES(old_image_size),
3312                  FORMAT_BYTES(new_image_size),
3313                  FORMAT_BYTES(setup->partition_size),
3314                  FORMAT_BYTES(new_partition_size),
3315                  FORMAT_BYTES(old_fs_size),
3316                  FORMAT_BYTES(new_fs_size));
3317 
3318         r = prepare_resize_partition(
3319                         image_fd,
3320                         setup->partition_offset,
3321                         setup->partition_size,
3322                         &disk_uuid,
3323                         &table,
3324                         &partition);
3325         if (r < 0)
3326                 return r;
3327 
3328         if (new_fs_size > old_fs_size) { /* → Grow */
3329 
3330                 if (S_ISREG(st.st_mode)) {
3331                         uint64_t resized_image_size;
3332 
3333                         /* Grow file size */
3334                         r = resize_image_loop(h, setup, old_image_size, new_image_size, &resized_image_size);
3335                         if (r < 0)
3336                                 return r;
3337 
3338                         if (resized_image_size == old_image_size) {
3339                                 log_info("Couldn't change image size.");
3340                                 return 0;
3341                         }
3342 
3343                         assert(resized_image_size > old_image_size);
3344 
3345                         log_info("Growing of image file from %s to %s completed.", FORMAT_BYTES(old_image_size), FORMAT_BYTES(resized_image_size));
3346 
3347                         if (resized_image_size < new_image_size) {
3348                                 uint64_t sub;
3349 
3350                                 /* If the growing we managed to do is smaller than what we wanted we need to
3351                                  * adjust the partition/file system sizes we are going for, too */
3352                                 sub = new_image_size - resized_image_size;
3353                                 assert(new_partition_size >= sub);
3354                                 new_partition_size -= sub;
3355                                 assert(new_fs_size >= sub);
3356                                 new_fs_size -= sub;
3357                         }
3358 
3359                         new_image_size = resized_image_size;
3360                 } else {
3361                         assert(S_ISBLK(st.st_mode));
3362                         assert(new_image_size == old_image_size);
3363                 }
3364 
3365                 /* Make sure loopback device sees the new bigger size */
3366                 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3367                 if (r == -ENOTTY)
3368                         log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3369                 else if (r < 0)
3370                         return log_error_errno(r, "Failed to refresh loopback device size: %m");
3371                 else
3372                         log_info("Refreshing loop device size completed.");
3373 
3374                 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3375                 if (r < 0)
3376                         return r;
3377                 if (r > 0)
3378                         log_info("Growing of partition completed.");
3379 
3380                 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3381                         log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3382 
3383                 /* Tell LUKS about the new bigger size too */
3384                 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512U);
3385                 if (r < 0)
3386                         return log_error_errno(r, "Failed to grow LUKS device: %m");
3387 
3388                 log_info("LUKS device growing completed.");
3389         } else {
3390                 /* → Shrink */
3391 
3392                 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3393                         r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
3394                         if (r < 0)
3395                                 return r;
3396                 }
3397 
3398                 if (S_ISREG(st.st_mode)) {
3399                         if (user_record_luks_discard(h))
3400                                 /* Before we shrink, let's trim the file system, so that we need less space on disk during the shrinking */
3401                                 (void) run_fitrim(setup->root_fd);
3402                         else {
3403                                 /* If discard is off, let's ensure all backing blocks are allocated, so that our resize operation doesn't fail half-way */
3404                                 r = run_fallocate(image_fd, &st);
3405                                 if (r < 0)
3406                                         return r;
3407                         }
3408                 }
3409         }
3410 
3411         /* Now try to resize the file system. The requested size might not always be possible, in which case
3412          * we'll try to get as close as we can get. The result is returned in 'resized_fs_size' */
3413         r = resize_fs_loop(h, setup, resize_type, old_fs_size, new_fs_size, &resized_fs_size);
3414         if (r < 0)
3415                 return r;
3416 
3417         if (resized_fs_size == old_fs_size) {
3418                 log_info("Couldn't change file system size.");
3419                 return 0;
3420         }
3421 
3422         log_info("File system resizing from %s to %s completed.", FORMAT_BYTES(old_fs_size), FORMAT_BYTES(resized_fs_size));
3423 
3424         if (resized_fs_size > new_fs_size) {
3425                 uint64_t add;
3426 
3427                 /* If the shrinking we managed to do is larger than what we wanted we need to adjust the partition/image sizes. */
3428                 add = resized_fs_size - new_fs_size;
3429                 new_partition_size += add;
3430                 if (S_ISREG(st.st_mode))
3431                         new_image_size += add;
3432         }
3433 
3434         new_fs_size = resized_fs_size;
3435 
3436         /* Immediately sync afterwards */
3437         r = home_sync_and_statfs(setup->root_fd, NULL);
3438         if (r < 0)
3439                 return r;
3440 
3441         if (new_fs_size < old_fs_size) { /* → Shrink */
3442 
3443                 /* Shrink the LUKS device now, matching the new file system size */
3444                 r = sym_crypt_resize(setup->crypt_device, setup->dm_name, new_fs_size / 512);
3445                 if (r < 0)
3446                         return log_error_errno(r, "Failed to shrink LUKS device: %m");
3447 
3448                 log_info("LUKS device shrinking completed.");
3449 
3450                 /* Refresh the loop devices size */
3451                 r = loop_device_refresh_size(setup->loop, UINT64_MAX, new_partition_size);
3452                 if (r == -ENOTTY)
3453                         log_debug_errno(r, "Device is not a loopback device, not refreshing size.");
3454                 else if (r < 0)
3455                         return log_error_errno(r, "Failed to refresh loopback device size: %m");
3456                 else
3457                         log_info("Refreshing loop device size completed.");
3458 
3459                 if (S_ISREG(st.st_mode)) {
3460                         /* Shrink the image file */
3461                         if (ftruncate(image_fd, new_image_size) < 0)
3462                                 return log_error_errno(errno, "Failed to shrink image file %s: %m", ip);
3463 
3464                         log_info("Shrinking of image file completed.");
3465                 } else {
3466                         assert(S_ISBLK(st.st_mode));
3467                         assert(new_image_size == old_image_size);
3468                 }
3469 
3470                 r = apply_resize_partition(image_fd, disk_uuid, table, partition, new_partition_size);
3471                 if (r < 0)
3472                         return r;
3473                 if (r > 0)
3474                         log_info("Shrinking of partition completed.");
3475 
3476                 if (S_ISBLK(st.st_mode) && ioctl(image_fd, BLKRRPART, 0) < 0)
3477                         log_debug_errno(errno, "BLKRRPART failed on block device, ignoring: %m");
3478 
3479         } else { /* → Grow */
3480                 if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3481                         r = home_store_embedded_identity(new_home, setup->root_fd, h->uid, embedded_home);
3482                         if (r < 0)
3483                                 return r;
3484                 }
3485         }
3486 
3487         if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES)) {
3488                 r = home_store_header_identity_luks(new_home, setup, header_home);
3489                 if (r < 0)
3490                         return r;
3491 
3492                 r = home_extend_embedded_identity(new_home, h, setup);
3493                 if (r < 0)
3494                         return r;
3495         }
3496 
3497         if (user_record_luks_discard(h))
3498                 (void) run_fitrim(setup->root_fd);
3499 
3500         r = home_sync_and_statfs(setup->root_fd, &sfs);
3501         if (r < 0)
3502                 return r;
3503 
3504         if (!FLAGS_SET(flags, HOME_SETUP_RESIZE_DONT_UNDO)) {
3505                 r = home_setup_done(setup);
3506                 if (r < 0)
3507                         return r;
3508         }
3509 
3510         log_info("Resizing completed.");
3511 
3512         print_size_summary(new_image_size, new_fs_size, &sfs);
3513 
3514         if (ret_home)
3515                 *ret_home = TAKE_PTR(new_home);
3516 
3517         return 0;
3518 }
3519 
home_passwd_luks(UserRecord * h,HomeSetupFlags flags,HomeSetup * setup,const PasswordCache * cache,char ** effective_passwords)3520 int home_passwd_luks(
3521                 UserRecord *h,
3522                 HomeSetupFlags flags,
3523                 HomeSetup *setup,
3524                 const PasswordCache *cache, /* the passwords acquired via PKCS#11/FIDO2 security tokens */
3525                 char **effective_passwords  /* new passwords */) {
3526 
3527         size_t volume_key_size, max_key_slots, n_effective;
3528         _cleanup_(erase_and_freep) void *volume_key = NULL;
3529         struct crypt_pbkdf_type good_pbkdf, minimal_pbkdf;
3530         const char *type;
3531         char **list;
3532         int r;
3533 
3534         assert(h);
3535         assert(user_record_storage(h) == USER_LUKS);
3536         assert(setup);
3537 
3538         r = dlopen_cryptsetup();
3539         if (r < 0)
3540                 return r;
3541 
3542         type = sym_crypt_get_type(setup->crypt_device);
3543         if (!type)
3544                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine crypto device type.");
3545 
3546         r = sym_crypt_keyslot_max(type);
3547         if (r <= 0)
3548                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine number of key slots.");
3549         max_key_slots = r;
3550 
3551         r = sym_crypt_get_volume_key_size(setup->crypt_device);
3552         if (r <= 0)
3553                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine volume key size.");
3554         volume_key_size = (size_t) r;
3555 
3556         volume_key = malloc(volume_key_size);
3557         if (!volume_key)
3558                 return log_oom();
3559 
3560         r = -ENOKEY;
3561         FOREACH_POINTER(list,
3562                         cache ? cache->keyring_passswords : NULL,
3563                         cache ? cache->pkcs11_passwords : NULL,
3564                         cache ? cache->fido2_passwords : NULL,
3565                         h->password) {
3566 
3567                 r = luks_try_passwords(h, setup->crypt_device, list, volume_key, &volume_key_size, NULL);
3568                 if (r != -ENOKEY)
3569                         break;
3570         }
3571         if (r == -ENOKEY)
3572                 return log_error_errno(SYNTHETIC_ERRNO(ENOKEY), "Failed to unlock LUKS superblock with supplied passwords.");
3573         if (r < 0)
3574                 return log_error_errno(r, "Failed to unlocks LUKS superblock: %m");
3575 
3576         n_effective = strv_length(effective_passwords);
3577 
3578         build_good_pbkdf(&good_pbkdf, h);
3579         build_minimal_pbkdf(&minimal_pbkdf, h);
3580 
3581         for (size_t i = 0; i < max_key_slots; i++) {
3582                 r = sym_crypt_keyslot_destroy(setup->crypt_device, i);
3583                 if (r < 0 && !IN_SET(r, -ENOENT, -EINVAL)) /* Returns EINVAL or ENOENT if there's no key in this slot already */
3584                         return log_error_errno(r, "Failed to destroy LUKS password: %m");
3585 
3586                 if (i >= n_effective) {
3587                         if (r >= 0)
3588                                 log_info("Destroyed LUKS key slot %zu.", i);
3589                         continue;
3590                 }
3591 
3592                 if (password_cache_contains(cache, effective_passwords[i])) { /* Is this a FIDO2 or PKCS#11 password? */
3593                         log_debug("Using minimal PBKDF for slot %zu", i);
3594                         r = sym_crypt_set_pbkdf_type(setup->crypt_device, &minimal_pbkdf);
3595                 } else {
3596                         log_debug("Using good PBKDF for slot %zu", i);
3597                         r = sym_crypt_set_pbkdf_type(setup->crypt_device, &good_pbkdf);
3598                 }
3599                 if (r < 0)
3600                         return log_error_errno(r, "Failed to tweak PBKDF for slot %zu: %m", i);
3601 
3602                 r = sym_crypt_keyslot_add_by_volume_key(
3603                                 setup->crypt_device,
3604                                 i,
3605                                 volume_key,
3606                                 volume_key_size,
3607                                 effective_passwords[i],
3608                                 strlen(effective_passwords[i]));
3609                 if (r < 0)
3610                         return log_error_errno(r, "Failed to set up LUKS password: %m");
3611 
3612                 log_info("Updated LUKS key slot %zu.", i);
3613 
3614                 /* If we changed the password, then make sure to update the copy in the keyring, so that
3615                  * auto-rebalance continues to work. We only do this if we operate on an active home dir. */
3616                 if (i == 0 && FLAGS_SET(flags, HOME_SETUP_ALREADY_ACTIVATED))
3617                         upload_to_keyring(h, effective_passwords[i], NULL);
3618         }
3619 
3620         return 1;
3621 }
3622 
home_lock_luks(UserRecord * h,HomeSetup * setup)3623 int home_lock_luks(UserRecord *h, HomeSetup *setup) {
3624         const char *p;
3625         int r;
3626 
3627         assert(h);
3628         assert(setup);
3629         assert(setup->root_fd < 0);
3630         assert(!setup->crypt_device);
3631 
3632         r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3633         if (r < 0)
3634                 return r;
3635 
3636         log_info("Discovered used LUKS device %s.", setup->dm_node);
3637 
3638         assert_se(p = user_record_home_directory(h));
3639         r = syncfs_path(AT_FDCWD, p);
3640         if (r < 0) /* Snake oil, but let's better be safe than sorry */
3641                 return log_error_errno(r, "Failed to synchronize file system %s: %m", p);
3642 
3643         log_info("File system synchronized.");
3644 
3645         /* Note that we don't invoke FIFREEZE here, it appears libcryptsetup/device-mapper already does that on its own for us */
3646 
3647         r = sym_crypt_suspend(setup->crypt_device, setup->dm_name);
3648         if (r < 0)
3649                 return log_error_errno(r, "Failed to suspend cryptsetup device: %s: %m", setup->dm_node);
3650 
3651         log_info("LUKS device suspended.");
3652         return 0;
3653 }
3654 
luks_try_resume(struct crypt_device * cd,const char * dm_name,char ** password)3655 static int luks_try_resume(
3656                 struct crypt_device *cd,
3657                 const char *dm_name,
3658                 char **password) {
3659 
3660         int r;
3661 
3662         assert(cd);
3663         assert(dm_name);
3664 
3665         STRV_FOREACH(pp, password) {
3666                 r = sym_crypt_resume_by_passphrase(
3667                                 cd,
3668                                 dm_name,
3669                                 CRYPT_ANY_SLOT,
3670                                 *pp,
3671                                 strlen(*pp));
3672                 if (r >= 0) {
3673                         log_info("Resumed LUKS device %s.", dm_name);
3674                         return 0;
3675                 }
3676 
3677                 log_debug_errno(r, "Password %zu didn't work for resuming device: %m", (size_t) (pp - password));
3678         }
3679 
3680         return -ENOKEY;
3681 }
3682 
home_unlock_luks(UserRecord * h,HomeSetup * setup,const PasswordCache * cache)3683 int home_unlock_luks(UserRecord *h, HomeSetup *setup, const PasswordCache *cache) {
3684         char **list;
3685         int r;
3686 
3687         assert(h);
3688         assert(setup);
3689         assert(!setup->crypt_device);
3690 
3691         r = acquire_open_luks_device(h, setup, /* graceful= */ false);
3692         if (r < 0)
3693                 return r;
3694 
3695         log_info("Discovered used LUKS device %s.", setup->dm_node);
3696 
3697         r = -ENOKEY;
3698         FOREACH_POINTER(list,
3699                         cache ? cache->pkcs11_passwords : NULL,
3700                         cache ? cache->fido2_passwords : NULL,
3701                         h->password) {
3702                 r = luks_try_resume(setup->crypt_device, setup->dm_name, list);
3703                 if (r != -ENOKEY)
3704                         break;
3705         }
3706         if (r == -ENOKEY)
3707                 return log_error_errno(r, "No valid password for LUKS superblock.");
3708         if (r < 0)
3709                 return log_error_errno(r, "Failed to resume LUKS superblock: %m");
3710 
3711         log_info("LUKS device resumed.");
3712         return 0;
3713 }
3714 
device_is_gone(HomeSetup * setup)3715 static int device_is_gone(HomeSetup *setup) {
3716         _cleanup_(sd_device_unrefp) sd_device *d = NULL;
3717         struct stat st;
3718         int r;
3719 
3720         assert(setup);
3721 
3722         if (!setup->dm_node)
3723                 return true;
3724 
3725         if (stat(setup->dm_node, &st) < 0) {
3726                 if (errno != ENOENT)
3727                         return log_error_errno(errno, "Failed to stat block device node %s: %m", setup->dm_node);
3728 
3729                 return true;
3730         }
3731 
3732         r = sd_device_new_from_stat_rdev(&d, &st);
3733         if (r < 0) {
3734                 if (r != -ENODEV)
3735                         return log_error_errno(errno, "Failed to allocate device object from block device node %s: %m", setup->dm_node);
3736 
3737                 return true;
3738         }
3739 
3740         return false;
3741 }
3742 
device_monitor_handler(sd_device_monitor * monitor,sd_device * device,void * userdata)3743 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
3744         HomeSetup *setup = userdata;
3745         int r;
3746 
3747         assert(setup);
3748 
3749         if (!device_for_action(device, SD_DEVICE_REMOVE))
3750                 return 0;
3751 
3752         /* We don't really care for the device object passed to us, we just check if the device node still
3753          * exists */
3754 
3755         r = device_is_gone(setup);
3756         if (r < 0)
3757                 return r;
3758         if (r > 0) /* Yay! we are done! */
3759                 (void) sd_event_exit(sd_device_monitor_get_event(monitor), 0);
3760 
3761         return 0;
3762 }
3763 
wait_for_block_device_gone(HomeSetup * setup,usec_t timeout_usec)3764 int wait_for_block_device_gone(HomeSetup *setup, usec_t timeout_usec) {
3765         _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *m = NULL;
3766         _cleanup_(sd_event_unrefp) sd_event *event = NULL;
3767         int r;
3768 
3769         assert(setup);
3770 
3771         /* So here's the thing: we enable "deferred deactivation" on our dm-crypt volumes. This means they
3772          * are automatically torn down once not used anymore (i.e. once unmounted). Which is great. It also
3773          * means that when we deactivate a home directory and try to tear down the volume that backs it, it
3774          * possibly is already torn down or in the process of being torn down, since we race against the
3775          * automatic tearing down. Which is fine, we handle errors from that. However, we lose the ability to
3776          * naturally wait for the tear down operation to complete: if we are not the ones who tear down the
3777          * device we are also not the ones who naturally block on that operation. Hence let's add some code
3778          * to actively wait for the device to go away, via sd-device. We'll call this whenever tearing down a
3779          * LUKS device, to ensure the device is really really gone before we proceed. Net effect: "homectl
3780          * deactivate foo && homectl activate foo" will work reliably, i.e. deactivation immediately followed
3781          * by activation will work. Also, by the time deactivation completes we can guarantee that all data
3782          * is sync'ed down to the lowest block layer as all higher levels are fully and entirely
3783          * destructed. */
3784 
3785         if (!setup->dm_name)
3786                 return 0;
3787 
3788         assert(setup->dm_node);
3789         log_debug("Waiting until %s disappears.", setup->dm_node);
3790 
3791         r = sd_event_new(&event);
3792         if (r < 0)
3793                 return log_error_errno(r, "Failed to allocate event loop: %m");
3794 
3795         r = sd_device_monitor_new(&m);
3796         if (r < 0)
3797                 return log_error_errno(r, "Failed to allocate device monitor: %m");
3798 
3799         r = sd_device_monitor_filter_add_match_subsystem_devtype(m, "block", "disk");
3800         if (r < 0)
3801                 return log_error_errno(r, "Failed to configure device monitor match: %m");
3802 
3803         r = sd_device_monitor_attach_event(m, event);
3804         if (r < 0)
3805                 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
3806 
3807         r = sd_device_monitor_start(m, device_monitor_handler, setup);
3808         if (r < 0)
3809                 return log_error_errno(r, "Failed to start device monitor: %m");
3810 
3811         r = device_is_gone(setup);
3812         if (r < 0)
3813                 return r;
3814         if (r > 0) {
3815                 log_debug("%s has already disappeared before entering wait loop.", setup->dm_node);
3816                 return 0; /* gone already */
3817         }
3818 
3819         if (timeout_usec != USEC_INFINITY) {
3820                 r = sd_event_add_time_relative(event, NULL, CLOCK_MONOTONIC, timeout_usec, 0, NULL, NULL);
3821                 if (r < 0)
3822                         return log_error_errno(r, "Failed to add timer event: %m");
3823         }
3824 
3825         r = sd_event_loop(event);
3826         if (r < 0)
3827                 return log_error_errno(r, "Failed to run event loop: %m");
3828 
3829         r = device_is_gone(setup);
3830         if (r < 0)
3831                 return r;
3832         if (r == 0)
3833                 return log_error_errno(r, "Device %s still around.", setup->dm_node);
3834 
3835         log_debug("Successfully waited until device %s disappeared.", setup->dm_node);
3836         return 0;
3837 }
3838 
home_auto_shrink_luks(UserRecord * h,HomeSetup * setup,PasswordCache * cache)3839 int home_auto_shrink_luks(UserRecord *h, HomeSetup *setup, PasswordCache *cache) {
3840         struct statfs sfs;
3841         int r;
3842 
3843         assert(h);
3844         assert(user_record_storage(h) == USER_LUKS);
3845         assert(setup);
3846         assert(setup->root_fd >= 0);
3847 
3848         if (user_record_auto_resize_mode(h) != AUTO_RESIZE_SHRINK_AND_GROW)
3849                 return 0;
3850 
3851         if (fstatfs(setup->root_fd, &sfs) < 0)
3852                 return log_error_errno(errno, "Failed to statfs home directory: %m");
3853 
3854         if (!fs_can_online_shrink_and_grow(sfs.f_type)) {
3855                 log_debug("Not auto-shrinking file system, since selected file system cannot do both online shrink and grow.");
3856                 return 0;
3857         }
3858 
3859         r = home_resize_luks(
3860                         h,
3861                         HOME_SETUP_ALREADY_ACTIVATED|
3862                         HOME_SETUP_RESIZE_DONT_SYNC_IDENTITIES|
3863                         HOME_SETUP_RESIZE_MINIMIZE|
3864                         HOME_SETUP_RESIZE_DONT_GROW|
3865                         HOME_SETUP_RESIZE_DONT_UNDO,
3866                         setup,
3867                         cache,
3868                         NULL);
3869         if (r < 0)
3870                 return r;
3871 
3872         return 1;
3873 }
3874