1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <grp.h>
4 #include <linux/fs.h>
5 #include <linux/magic.h>
6 #include <math.h>
7 #include <openssl/pem.h>
8 #include <pwd.h>
9 #include <sys/ioctl.h>
10 #include <sys/quota.h>
11 #include <sys/stat.h>
12 
13 #include "btrfs-util.h"
14 #include "bus-common-errors.h"
15 #include "bus-error.h"
16 #include "bus-log-control-api.h"
17 #include "bus-polkit.h"
18 #include "clean-ipc.h"
19 #include "conf-files.h"
20 #include "device-util.h"
21 #include "dirent-util.h"
22 #include "fd-util.h"
23 #include "fileio.h"
24 #include "format-util.h"
25 #include "fs-util.h"
26 #include "gpt.h"
27 #include "home-util.h"
28 #include "homed-conf.h"
29 #include "homed-home-bus.h"
30 #include "homed-home.h"
31 #include "homed-manager-bus.h"
32 #include "homed-manager.h"
33 #include "homed-varlink.h"
34 #include "io-util.h"
35 #include "mkdir.h"
36 #include "process-util.h"
37 #include "quota-util.h"
38 #include "random-util.h"
39 #include "resize-fs.h"
40 #include "socket-util.h"
41 #include "sort-util.h"
42 #include "stat-util.h"
43 #include "strv.h"
44 #include "sync-util.h"
45 #include "tmpfile-util.h"
46 #include "udev-util.h"
47 #include "user-record-sign.h"
48 #include "user-record-util.h"
49 #include "user-record.h"
50 #include "user-util.h"
51 
52 /* Where to look for private/public keys that are used to sign the user records. We are not using
53  * CONF_PATHS_NULSTR() here since we want to insert /var/lib/systemd/home/ in the middle. And we insert that
54  * since we want to auto-generate a persistent private/public key pair if we need to. */
55 #define KEY_PATHS_NULSTR                        \
56         "/etc/systemd/home/\0"                  \
57         "/run/systemd/home/\0"                  \
58         "/var/lib/systemd/home/\0"              \
59         "/usr/local/lib/systemd/home/\0"        \
60         "/usr/lib/systemd/home/\0"
61 
uid_is_home(uid_t uid)62 static bool uid_is_home(uid_t uid) {
63         return uid >= HOME_UID_MIN && uid <= HOME_UID_MAX;
64 }
65 /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
66 
67 #define UID_CLAMP_INTO_HOME_RANGE(rnd) (((uid_t) (rnd) % (HOME_UID_MAX - HOME_UID_MIN + 1)) + HOME_UID_MIN)
68 
69 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_uid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
70 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_name_hash_ops, char, string_hash_func, string_compare_func, Home, home_free);
71 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_worker_pid_hash_ops, void, trivial_hash_func, trivial_compare_func, Home, home_free);
72 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(homes_by_sysfs_hash_ops, char, path_hash_func, path_compare, Home, home_free);
73 
74 static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata);
75 static int manager_gc_images(Manager *m);
76 static int manager_enumerate_images(Manager *m);
77 static int manager_assess_image(Manager *m, int dir_fd, const char *dir_path, const char *dentry_name);
78 static void manager_revalidate_image(Manager *m, Home *h);
79 
manager_watch_home(Manager * m)80 static void manager_watch_home(Manager *m) {
81         struct statfs sfs;
82         int r;
83 
84         assert(m);
85 
86         m->inotify_event_source = sd_event_source_disable_unref(m->inotify_event_source);
87         m->scan_slash_home = false;
88 
89         if (statfs(get_home_root(), &sfs) < 0) {
90                 log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
91                                "Failed to statfs() %s directory, disabling automatic scanning.", get_home_root());
92                 return;
93         }
94 
95         if (is_network_fs(&sfs)) {
96                 log_info("%s is a network file system, disabling automatic scanning.", get_home_root());
97                 return;
98         }
99 
100         if (is_fs_type(&sfs, AUTOFS_SUPER_MAGIC)) {
101                 log_info("%s is on autofs, disabling automatic scanning.", get_home_root());
102                 return;
103         }
104 
105         m->scan_slash_home = true;
106 
107         r = sd_event_add_inotify(m->event, &m->inotify_event_source, get_home_root(),
108                                  IN_CREATE|IN_CLOSE_WRITE|IN_DELETE_SELF|IN_MOVE_SELF|IN_ONLYDIR|IN_MOVED_TO|IN_MOVED_FROM|IN_DELETE,
109                                  on_home_inotify, m);
110         if (r < 0)
111                 log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_WARNING, r,
112                                "Failed to create inotify watch on %s, ignoring.", get_home_root());
113 
114         (void) sd_event_source_set_description(m->inotify_event_source, "home-inotify");
115 
116         log_info("Watching %s.", get_home_root());
117 }
118 
on_home_inotify(sd_event_source * s,const struct inotify_event * event,void * userdata)119 static int on_home_inotify(sd_event_source *s, const struct inotify_event *event, void *userdata) {
120         _cleanup_free_ char *j = NULL;
121         Manager *m = userdata;
122         const char *e, *n;
123 
124         assert(m);
125         assert(event);
126 
127         if ((event->mask & (IN_Q_OVERFLOW|IN_MOVE_SELF|IN_DELETE_SELF|IN_IGNORED|IN_UNMOUNT)) != 0) {
128 
129                 if (FLAGS_SET(event->mask, IN_Q_OVERFLOW))
130                         log_debug("%s inotify queue overflow, rescanning.", get_home_root());
131                 else if (FLAGS_SET(event->mask, IN_MOVE_SELF))
132                         log_info("%s moved or renamed, recreating watch and rescanning.", get_home_root());
133                 else if (FLAGS_SET(event->mask, IN_DELETE_SELF))
134                         log_info("%s deleted, recreating watch and rescanning.", get_home_root());
135                 else if (FLAGS_SET(event->mask, IN_UNMOUNT))
136                         log_info("%s unmounted, recreating watch and rescanning.", get_home_root());
137                 else if (FLAGS_SET(event->mask, IN_IGNORED))
138                         log_info("%s watch invalidated, recreating watch and rescanning.", get_home_root());
139 
140                 manager_watch_home(m);
141                 (void) manager_gc_images(m);
142                 (void) manager_enumerate_images(m);
143                 (void) bus_manager_emit_auto_login_changed(m);
144                 return 0;
145         }
146 
147         /* For the other inotify events, let's ignore all events for file names that don't match our
148          * expectations */
149         if (isempty(event->name))
150                 return 0;
151         e = endswith(event->name, FLAGS_SET(event->mask, IN_ISDIR) ? ".homedir" : ".home");
152         if (!e)
153                 return 0;
154 
155         n = strndupa_safe(event->name, e - event->name);
156         if (!suitable_user_name(n))
157                 return 0;
158 
159         j = path_join(get_home_root(), event->name);
160         if (!j)
161                 return log_oom();
162 
163         if ((event->mask & (IN_CREATE|IN_CLOSE_WRITE|IN_MOVED_TO)) != 0) {
164                 if (FLAGS_SET(event->mask, IN_CREATE))
165                         log_debug("%s has been created, having a look.", j);
166                 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
167                         log_debug("%s has been modified, having a look.", j);
168                 else if (FLAGS_SET(event->mask, IN_MOVED_TO))
169                         log_debug("%s has been moved in, having a look.", j);
170 
171                 (void) manager_assess_image(m, -1, get_home_root(), event->name);
172                 (void) bus_manager_emit_auto_login_changed(m);
173         }
174 
175         if ((event->mask & (IN_DELETE | IN_CLOSE_WRITE | IN_MOVED_FROM)) != 0) {
176                 Home *h;
177 
178                 if (FLAGS_SET(event->mask, IN_DELETE))
179                         log_debug("%s has been deleted, revalidating.", j);
180                 else if (FLAGS_SET(event->mask, IN_CLOSE_WRITE))
181                         log_debug("%s has been closed after writing, revalidating.", j);
182                 else if (FLAGS_SET(event->mask, IN_MOVED_FROM))
183                         log_debug("%s has been moved away, revalidating.", j);
184 
185                 h = hashmap_get(m->homes_by_name, n);
186                 if (h) {
187                         manager_revalidate_image(m, h);
188                         (void) bus_manager_emit_auto_login_changed(m);
189                 }
190         }
191 
192         return 0;
193 }
194 
manager_new(Manager ** ret)195 int manager_new(Manager **ret) {
196         _cleanup_(manager_freep) Manager *m = NULL;
197         int r;
198 
199         assert(ret);
200 
201         m = new(Manager, 1);
202         if (!m)
203                 return -ENOMEM;
204 
205         *m = (Manager) {
206                 .default_storage = _USER_STORAGE_INVALID,
207                 .rebalance_interval_usec = 2 * USEC_PER_MINUTE, /* initially, rebalance every 2min */
208         };
209 
210         r = manager_parse_config_file(m);
211         if (r < 0)
212                 return r;
213 
214         r = sd_event_default(&m->event);
215         if (r < 0)
216                 return r;
217 
218         r = sd_event_add_signal(m->event, NULL, SIGINT, NULL, NULL);
219         if (r < 0)
220                 return r;
221 
222         r = sd_event_add_signal(m->event, NULL, SIGTERM, NULL, NULL);
223         if (r < 0)
224                 return r;
225 
226         (void) sd_event_set_watchdog(m->event, true);
227 
228         m->homes_by_uid = hashmap_new(&homes_by_uid_hash_ops);
229         if (!m->homes_by_uid)
230                 return -ENOMEM;
231 
232         m->homes_by_name = hashmap_new(&homes_by_name_hash_ops);
233         if (!m->homes_by_name)
234                 return -ENOMEM;
235 
236         m->homes_by_worker_pid = hashmap_new(&homes_by_worker_pid_hash_ops);
237         if (!m->homes_by_worker_pid)
238                 return -ENOMEM;
239 
240         m->homes_by_sysfs = hashmap_new(&homes_by_sysfs_hash_ops);
241         if (!m->homes_by_sysfs)
242                 return -ENOMEM;
243 
244         *ret = TAKE_PTR(m);
245         return 0;
246 }
247 
manager_free(Manager * m)248 Manager* manager_free(Manager *m) {
249         Home *h;
250 
251         assert(m);
252 
253         HASHMAP_FOREACH(h, m->homes_by_worker_pid)
254                 (void) home_wait_for_worker(h);
255 
256         m->bus = sd_bus_flush_close_unref(m->bus);
257         m->polkit_registry = bus_verify_polkit_async_registry_free(m->polkit_registry);
258 
259         m->device_monitor = sd_device_monitor_unref(m->device_monitor);
260 
261         m->inotify_event_source = sd_event_source_unref(m->inotify_event_source);
262         m->notify_socket_event_source = sd_event_source_unref(m->notify_socket_event_source);
263         m->deferred_rescan_event_source = sd_event_source_unref(m->deferred_rescan_event_source);
264         m->deferred_gc_event_source = sd_event_source_unref(m->deferred_gc_event_source);
265         m->deferred_auto_login_event_source = sd_event_source_unref(m->deferred_auto_login_event_source);
266         m->rebalance_event_source = sd_event_source_unref(m->rebalance_event_source);
267 
268         m->event = sd_event_unref(m->event);
269 
270         m->homes_by_uid = hashmap_free(m->homes_by_uid);
271         m->homes_by_name = hashmap_free(m->homes_by_name);
272         m->homes_by_worker_pid = hashmap_free(m->homes_by_worker_pid);
273         m->homes_by_sysfs = hashmap_free(m->homes_by_sysfs);
274 
275         if (m->private_key)
276                 EVP_PKEY_free(m->private_key);
277 
278         hashmap_free(m->public_keys);
279 
280         varlink_server_unref(m->varlink_server);
281         free(m->userdb_service);
282 
283         free(m->default_file_system_type);
284 
285         return mfree(m);
286 }
287 
manager_verify_user_record(Manager * m,UserRecord * hr)288 int manager_verify_user_record(Manager *m, UserRecord *hr) {
289         EVP_PKEY *pkey;
290         int r;
291 
292         assert(m);
293         assert(hr);
294 
295         if (!m->private_key && hashmap_isempty(m->public_keys)) {
296                 r = user_record_has_signature(hr);
297                 if (r < 0)
298                         return r;
299 
300                 return r ? -ENOKEY : USER_RECORD_UNSIGNED;
301         }
302 
303         /* Is it our own? */
304         if (m->private_key) {
305                 r = user_record_verify(hr, m->private_key);
306                 switch (r) {
307 
308                 case USER_RECORD_FOREIGN:
309                         /* This record is not signed by this key, but let's see below */
310                         break;
311 
312                 case USER_RECORD_SIGNED:               /* Signed by us, but also by others, let's propagate that */
313                 case USER_RECORD_SIGNED_EXCLUSIVE:     /* Signed by us, and nothing else, ditto */
314                 case USER_RECORD_UNSIGNED:             /* Not signed at all, ditto  */
315                 default:
316                         return r;
317                 }
318         }
319 
320         HASHMAP_FOREACH(pkey, m->public_keys) {
321                 r = user_record_verify(hr, pkey);
322                 switch (r) {
323 
324                 case USER_RECORD_FOREIGN:
325                         /* This record is not signed by this key, but let's see our other keys */
326                         break;
327 
328                 case USER_RECORD_SIGNED:            /* It's signed by this key we are happy with, but which is not our own. */
329                 case USER_RECORD_SIGNED_EXCLUSIVE:
330                         return USER_RECORD_FOREIGN;
331 
332                 case USER_RECORD_UNSIGNED: /* It's not signed at all */
333                 default:
334                         return r;
335                 }
336         }
337 
338         return -ENOKEY;
339 }
340 
manager_add_home_by_record(Manager * m,const char * name,int dir_fd,const char * fname)341 static int manager_add_home_by_record(
342                 Manager *m,
343                 const char *name,
344                 int dir_fd,
345                 const char *fname) {
346 
347         _cleanup_(json_variant_unrefp) JsonVariant *v = NULL;
348         _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
349         unsigned line, column;
350         int r, is_signed;
351         struct stat st;
352         Home *h;
353 
354         assert(m);
355         assert(name);
356         assert(fname);
357 
358         if (fstatat(dir_fd, fname, &st, 0) < 0)
359                 return log_error_errno(errno, "Failed to stat identity record %s: %m", fname);
360 
361         if (!S_ISREG(st.st_mode)) {
362                 log_debug("Identity record file %s is not a regular file, ignoring.", fname);
363                 return 0;
364         }
365 
366         if (st.st_size == 0)
367                 goto unlink_this_file;
368 
369         r = json_parse_file_at(NULL, dir_fd, fname, JSON_PARSE_SENSITIVE, &v, &line, &column);
370         if (r < 0)
371                 return log_error_errno(r, "Failed to parse identity record at %s:%u%u: %m", fname, line, column);
372 
373         if (json_variant_is_blank_object(v))
374                 goto unlink_this_file;
375 
376         hr = user_record_new();
377         if (!hr)
378                 return log_oom();
379 
380         r = user_record_load(hr, v, USER_RECORD_LOAD_REFUSE_SECRET|USER_RECORD_LOG|USER_RECORD_PERMISSIVE);
381         if (r < 0)
382                 return r;
383 
384         if (!streq_ptr(hr->user_name, name))
385                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
386                                        "Identity's user name %s does not match file name %s, refusing.",
387                                        hr->user_name, name);
388 
389         is_signed = manager_verify_user_record(m, hr);
390         switch (is_signed) {
391 
392         case -ENOKEY:
393                 return log_warning_errno(is_signed, "User record %s is not signed by any accepted key, ignoring.", fname);
394         case USER_RECORD_UNSIGNED:
395                 return log_warning_errno(SYNTHETIC_ERRNO(EPERM), "User record %s is not signed at all, ignoring.", fname);
396         case USER_RECORD_SIGNED:
397                 log_info("User record %s is signed by us (and others), accepting.", fname);
398                 break;
399         case USER_RECORD_SIGNED_EXCLUSIVE:
400                 log_info("User record %s is signed only by us, accepting.", fname);
401                 break;
402         case USER_RECORD_FOREIGN:
403                 log_info("User record %s is signed by registered key from others, accepting.", fname);
404                 break;
405         default:
406                 assert(is_signed < 0);
407                 return log_error_errno(is_signed, "Failed to verify signature of user record in %s: %m", fname);
408         }
409 
410         h = hashmap_get(m->homes_by_name, name);
411         if (h) {
412                 r = home_set_record(h, hr);
413                 if (r < 0)
414                         return log_error_errno(r, "Failed to update home record for %s: %m", name);
415 
416                 /* If we acquired a record now for a previously unallocated entry, then reset the state. This
417                  * makes sure home_get_state() will check for the availability of the image file dynamically
418                  * in order to detect to distinguish HOME_INACTIVE and HOME_ABSENT. */
419                 if (h->state == HOME_UNFIXATED)
420                         h->state = _HOME_STATE_INVALID;
421         } else {
422                 r = home_new(m, hr, NULL, &h);
423                 if (r < 0)
424                         return log_error_errno(r, "Failed to allocate new home object: %m");
425 
426                 log_info("Added registered home for user %s.", hr->user_name);
427         }
428 
429         /* Only entries we exclusively signed are writable to us, hence remember the result */
430         h->signed_locally = is_signed == USER_RECORD_SIGNED_EXCLUSIVE;
431 
432         return 1;
433 
434 unlink_this_file:
435         /* If this is an empty file, then let's just remove it. An empty file is not useful in any case, and
436          * apparently xfs likes to leave empty files around when not unmounted cleanly (see
437          * https://github.com/systemd/systemd/issues/15178 for example). Note that we don't delete non-empty
438          * files even if they are invalid, because that's just too risky, we might delete data the user still
439          * needs. But empty files are never useful, hence let's just remove them. */
440 
441         if (unlinkat(dir_fd, fname, 0) < 0)
442                 return log_error_errno(errno, "Failed to remove empty user record file %s: %m", fname);
443 
444         log_notice("Discovered empty user record file %s/%s, removed automatically.", home_record_dir(), fname);
445         return 0;
446 }
447 
manager_enumerate_records(Manager * m)448 static int manager_enumerate_records(Manager *m) {
449         _cleanup_closedir_ DIR *d = NULL;
450 
451         assert(m);
452 
453         d = opendir(home_record_dir());
454         if (!d)
455                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
456                                       "Failed to open %s: %m", home_record_dir());
457 
458         FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read record directory: %m")) {
459                 _cleanup_free_ char *n = NULL;
460                 const char *e;
461 
462                 if (!dirent_is_file(de))
463                         continue;
464 
465                 e = endswith(de->d_name, ".identity");
466                 if (!e)
467                         continue;
468 
469                 n = strndup(de->d_name, e - de->d_name);
470                 if (!n)
471                         return log_oom();
472 
473                 if (!suitable_user_name(n))
474                         continue;
475 
476                 (void) manager_add_home_by_record(m, n, dirfd(d), de->d_name);
477         }
478 
479         return 0;
480 }
481 
search_quota(uid_t uid,const char * exclude_quota_path)482 static int search_quota(uid_t uid, const char *exclude_quota_path) {
483         struct stat exclude_st = {};
484         dev_t previous_devno = 0;
485         int r;
486 
487         /* Checks whether the specified UID owns any files on the files system, but ignore any file system
488          * backing the specified file. The file is used when operating on home directories, where it's OK if
489          * the UID of them already owns files. */
490 
491         if (exclude_quota_path && stat(exclude_quota_path, &exclude_st) < 0) {
492                 if (errno != ENOENT)
493                         return log_warning_errno(errno, "Failed to stat %s, ignoring: %m", exclude_quota_path);
494         }
495 
496         /* Check a few usual suspects where regular users might own files. Note that this is by no means
497          * comprehensive, but should cover most cases. Note that in an ideal world every user would be
498          * registered in NSS and avoid our own UID range, but for all other cases, it's a good idea to be
499          * paranoid and check quota if we can. */
500         FOREACH_STRING(where, get_home_root(), "/tmp/", "/var/", "/var/mail/", "/var/tmp/", "/var/spool/") {
501                 struct dqblk req;
502                 struct stat st;
503 
504                 if (stat(where, &st) < 0) {
505                         log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
506                                        "Failed to stat %s, ignoring: %m", where);
507                         continue;
508                 }
509 
510                 if (major(st.st_dev) == 0) {
511                         log_debug("Directory %s is not on a real block device, not checking quota for UID use.", where);
512                         continue;
513                 }
514 
515                 if (st.st_dev == exclude_st.st_dev) { /* If an exclude path is specified, then ignore quota
516                                                        * reported on the same block device as that path. */
517                         log_debug("Directory %s is where the home directory is located, not checking quota for UID use.", where);
518                         continue;
519                 }
520 
521                 if (st.st_dev == previous_devno) { /* Does this directory have the same devno as the previous
522                                                     * one we tested? If so, there's no point in testing this
523                                                     * again. */
524                         log_debug("Directory %s is on same device as previous tested directory, not checking quota for UID use a second time.", where);
525                         continue;
526                 }
527 
528                 previous_devno = st.st_dev;
529 
530                 r = quotactl_devnum(QCMD_FIXED(Q_GETQUOTA, USRQUOTA), st.st_dev, uid, &req);
531                 if (r < 0) {
532                         if (ERRNO_IS_NOT_SUPPORTED(r))
533                                 log_debug_errno(r, "No UID quota support on %s, ignoring.", where);
534                         else if (ERRNO_IS_PRIVILEGE(r))
535                                 log_debug_errno(r, "UID quota support for %s prohibited, ignoring.", where);
536                         else
537                                 log_warning_errno(r, "Failed to query quota on %s, ignoring: %m", where);
538 
539                         continue;
540                 }
541 
542                 if ((FLAGS_SET(req.dqb_valid, QIF_SPACE) && req.dqb_curspace > 0) ||
543                     (FLAGS_SET(req.dqb_valid, QIF_INODES) && req.dqb_curinodes > 0)) {
544                         log_debug_errno(errno, "Quota reports UID " UID_FMT " occupies disk space on %s.", uid, where);
545                         return 1;
546                 }
547         }
548 
549         return 0;
550 }
551 
manager_acquire_uid(Manager * m,uid_t start_uid,const char * user_name,const char * exclude_quota_path,uid_t * ret)552 static int manager_acquire_uid(
553                 Manager *m,
554                 uid_t start_uid,
555                 const char *user_name,
556                 const char *exclude_quota_path,
557                 uid_t *ret) {
558 
559         static const uint8_t hash_key[] = {
560                 0xa3, 0xb8, 0x82, 0x69, 0x9a, 0x71, 0xf7, 0xa9,
561                 0xe0, 0x7c, 0xf6, 0xf1, 0x21, 0x69, 0xd2, 0x1e
562         };
563 
564         enum {
565                 PHASE_SUGGESTED,
566                 PHASE_HASHED,
567                 PHASE_RANDOM
568         } phase = PHASE_SUGGESTED;
569 
570         unsigned n_tries = 100;
571         int r;
572 
573         assert(m);
574         assert(ret);
575 
576         for (;;) {
577                 struct passwd *pw;
578                 struct group *gr;
579                 uid_t candidate;
580                 Home *other;
581 
582                 if (--n_tries <= 0)
583                         return -EBUSY;
584 
585                 switch (phase) {
586 
587                 case PHASE_SUGGESTED:
588                         phase = PHASE_HASHED;
589 
590                         if (!uid_is_home(start_uid))
591                                 continue;
592 
593                         candidate = start_uid;
594                         break;
595 
596                 case PHASE_HASHED:
597                         phase = PHASE_RANDOM;
598 
599                         if (!user_name)
600                                 continue;
601 
602                         candidate = UID_CLAMP_INTO_HOME_RANGE(siphash24(user_name, strlen(user_name), hash_key));
603                         break;
604 
605                 case PHASE_RANDOM:
606                         random_bytes(&candidate, sizeof(candidate));
607                         candidate = UID_CLAMP_INTO_HOME_RANGE(candidate);
608                         break;
609 
610                 default:
611                         assert_not_reached();
612                 }
613 
614                 other = hashmap_get(m->homes_by_uid, UID_TO_PTR(candidate));
615                 if (other) {
616                         log_debug("Candidate UID " UID_FMT " already used by another home directory (%s), let's try another.",
617                                   candidate, other->user_name);
618                         continue;
619                 }
620 
621                 pw = getpwuid(candidate);
622                 if (pw) {
623                         log_debug("Candidate UID " UID_FMT " already registered by another user in NSS (%s), let's try another.",
624                                   candidate, pw->pw_name);
625                         continue;
626                 }
627 
628                 gr = getgrgid((gid_t) candidate);
629                 if (gr) {
630                         log_debug("Candidate UID " UID_FMT " already registered by another group in NSS (%s), let's try another.",
631                                   candidate, gr->gr_name);
632                         continue;
633                 }
634 
635                 r = search_ipc(candidate, (gid_t) candidate);
636                 if (r < 0)
637                         continue;
638                 if (r > 0) {
639                         log_debug_errno(r, "Candidate UID " UID_FMT " already owns IPC objects, let's try another: %m",
640                                         candidate);
641                         continue;
642                 }
643 
644                 r = search_quota(candidate, exclude_quota_path);
645                 if (r != 0)
646                         continue;
647 
648                 *ret = candidate;
649                 return 0;
650         }
651 }
652 
manager_add_home_by_image(Manager * m,const char * user_name,const char * realm,const char * image_path,const char * sysfs,UserStorage storage,uid_t start_uid)653 static int manager_add_home_by_image(
654                 Manager *m,
655                 const char *user_name,
656                 const char *realm,
657                 const char *image_path,
658                 const char *sysfs,
659                 UserStorage storage,
660                 uid_t start_uid) {
661 
662         _cleanup_(user_record_unrefp) UserRecord *hr = NULL;
663         uid_t uid;
664         Home *h;
665         int r;
666 
667         assert(m);
668 
669         assert(m);
670         assert(user_name);
671         assert(image_path);
672         assert(storage >= 0);
673         assert(storage < _USER_STORAGE_MAX);
674 
675         h = hashmap_get(m->homes_by_name, user_name);
676         if (h) {
677                 bool same;
678 
679                 if (h->state != HOME_UNFIXATED) {
680                         log_debug("Found an image for user %s which already has a record, skipping.", user_name);
681                         return 0; /* ignore images that synthesize a user we already have a record for */
682                 }
683 
684                 same = user_record_storage(h->record) == storage;
685                 if (same) {
686                         if (h->sysfs && sysfs)
687                                 same = path_equal(h->sysfs, sysfs);
688                         else if (!!h->sysfs != !!sysfs)
689                                 same = false;
690                         else {
691                                 const char *p;
692 
693                                 p = user_record_image_path(h->record);
694                                 same = p && path_equal(p, image_path);
695                         }
696                 }
697 
698                 if (!same) {
699                         log_debug("Found multiple images for user '%s', ignoring image '%s'.", user_name, image_path);
700                         return 0;
701                 }
702         } else {
703                 /* Check NSS, in case there's another user or group by this name */
704                 if (getpwnam(user_name) || getgrnam(user_name)) {
705                         log_debug("Found an existing user or group by name '%s', ignoring image '%s'.", user_name, image_path);
706                         return 0;
707                 }
708         }
709 
710         if (h && uid_is_valid(h->uid))
711                 uid = h->uid;
712         else {
713                 r = manager_acquire_uid(m, start_uid, user_name,
714                                         IN_SET(storage, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT) ? image_path : NULL,
715                                         &uid);
716                 if (r < 0)
717                         return log_warning_errno(r, "Failed to acquire unused UID for %s: %m", user_name);
718         }
719 
720         hr = user_record_new();
721         if (!hr)
722                 return log_oom();
723 
724         r = user_record_synthesize(hr, user_name, realm, image_path, storage, uid, (gid_t) uid);
725         if (r < 0)
726                 return log_error_errno(r, "Failed to synthesize home record for %s (image %s): %m", user_name, image_path);
727 
728         if (h) {
729                 r = home_set_record(h, hr);
730                 if (r < 0)
731                         return log_error_errno(r, "Failed to update home record for %s: %m", user_name);
732         } else {
733                 r = home_new(m, hr, sysfs, &h);
734                 if (r < 0)
735                         return log_error_errno(r, "Failed to allocate new home object: %m");
736 
737                 h->state = HOME_UNFIXATED;
738 
739                 log_info("Discovered new home for user %s through image %s.", user_name, image_path);
740         }
741 
742         return 1;
743 }
744 
manager_augment_record_with_uid(Manager * m,UserRecord * hr)745 int manager_augment_record_with_uid(
746                 Manager *m,
747                 UserRecord *hr) {
748 
749         const char *exclude_quota_path = NULL;
750         uid_t start_uid = UID_INVALID, uid;
751         int r;
752 
753         assert(m);
754         assert(hr);
755 
756         if (uid_is_valid(hr->uid))
757                 return 0;
758 
759         if (IN_SET(hr->storage, USER_CLASSIC, USER_SUBVOLUME, USER_DIRECTORY, USER_FSCRYPT)) {
760                 const char * ip;
761 
762                 ip = user_record_image_path(hr);
763                 if (ip) {
764                         struct stat st;
765 
766                         if (stat(ip, &st) < 0) {
767                                 if (errno != ENOENT)
768                                         log_warning_errno(errno, "Failed to stat(%s): %m", ip);
769                         }  else if (uid_is_home(st.st_uid)) {
770                                 start_uid = st.st_uid;
771                                 exclude_quota_path = ip;
772                         }
773                 }
774         }
775 
776         r = manager_acquire_uid(m, start_uid, hr->user_name, exclude_quota_path, &uid);
777         if (r < 0)
778                 return r;
779 
780         log_debug("Acquired new UID " UID_FMT " for %s.", uid, hr->user_name);
781 
782         r = user_record_add_binding(
783                         hr,
784                         _USER_STORAGE_INVALID,
785                         NULL,
786                         SD_ID128_NULL,
787                         SD_ID128_NULL,
788                         SD_ID128_NULL,
789                         NULL,
790                         NULL,
791                         UINT64_MAX,
792                         NULL,
793                         NULL,
794                         uid,
795                         (gid_t) uid);
796         if (r < 0)
797                 return r;
798 
799         return 1;
800 }
801 
manager_assess_image(Manager * m,int dir_fd,const char * dir_path,const char * dentry_name)802 static int manager_assess_image(
803                 Manager *m,
804                 int dir_fd,
805                 const char *dir_path,
806                 const char *dentry_name) {
807 
808         char *luks_suffix, *directory_suffix;
809         _cleanup_free_ char *path = NULL;
810         struct stat st;
811         int r;
812 
813         assert(m);
814         assert(dir_path);
815         assert(dentry_name);
816 
817         luks_suffix = endswith(dentry_name, ".home");
818         if (luks_suffix)
819                 directory_suffix = NULL;
820         else
821                 directory_suffix = endswith(dentry_name, ".homedir");
822 
823         /* Early filter out: by name */
824         if (!luks_suffix && !directory_suffix)
825                 return 0;
826 
827         path = path_join(dir_path, dentry_name);
828         if (!path)
829                 return log_oom();
830 
831         /* Follow symlinks here, to allow people to link in stuff to make them available locally. */
832         if (dir_fd >= 0)
833                 r = fstatat(dir_fd, dentry_name, &st, 0);
834         else
835                 r = stat(path, &st);
836         if (r < 0)
837                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
838                                       "Failed to stat() directory entry '%s', ignoring: %m", dentry_name);
839 
840         if (S_ISREG(st.st_mode)) {
841                 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
842 
843                 if (!luks_suffix)
844                         return 0;
845 
846                 n = strndup(dentry_name, luks_suffix - dentry_name);
847                 if (!n)
848                         return log_oom();
849 
850                 r = split_user_name_realm(n, &user_name, &realm);
851                 if (r == -EINVAL) /* Not the right format: ignore */
852                         return 0;
853                 if (r < 0)
854                         return log_error_errno(r, "Failed to split image name into user name/realm: %m");
855 
856                 return manager_add_home_by_image(m, user_name, realm, path, NULL, USER_LUKS, UID_INVALID);
857         }
858 
859         if (S_ISDIR(st.st_mode)) {
860                 _cleanup_free_ char *n = NULL, *user_name = NULL, *realm = NULL;
861                 _cleanup_close_ int fd = -1;
862                 UserStorage storage;
863 
864                 if (!directory_suffix)
865                         return 0;
866 
867                 n = strndup(dentry_name, directory_suffix - dentry_name);
868                 if (!n)
869                         return log_oom();
870 
871                 r = split_user_name_realm(n, &user_name, &realm);
872                 if (r == -EINVAL) /* Not the right format: ignore */
873                         return 0;
874                 if (r < 0)
875                         return log_error_errno(r, "Failed to split image name into user name/realm: %m");
876 
877                 if (dir_fd >= 0)
878                         fd = openat(dir_fd, dentry_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
879                 else
880                         fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
881                 if (fd < 0)
882                         return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_WARNING, errno,
883                                               "Failed to open directory '%s', ignoring: %m", path);
884 
885                 if (fstat(fd, &st) < 0)
886                         return log_warning_errno(errno, "Failed to fstat() %s, ignoring: %m", path);
887 
888                 assert(S_ISDIR(st.st_mode)); /* Must hold, we used O_DIRECTORY above */
889 
890                 r = btrfs_is_subvol_fd(fd);
891                 if (r < 0)
892                         return log_warning_errno(errno, "Failed to determine whether %s is a btrfs subvolume: %m", path);
893                 if (r > 0)
894                         storage = USER_SUBVOLUME;
895                 else {
896                         struct fscrypt_policy policy;
897 
898                         if (ioctl(fd, FS_IOC_GET_ENCRYPTION_POLICY, &policy) < 0) {
899 
900                                 if (errno == ENODATA)
901                                         log_debug_errno(errno, "Determined %s is not fscrypt encrypted.", path);
902                                 else if (ERRNO_IS_NOT_SUPPORTED(errno))
903                                         log_debug_errno(errno, "Determined %s is not fscrypt encrypted because kernel or file system doesn't support it.", path);
904                                 else
905                                         log_debug_errno(errno, "FS_IOC_GET_ENCRYPTION_POLICY failed with unexpected error code on %s, ignoring: %m", path);
906 
907                                 storage = USER_DIRECTORY;
908                         } else
909                                 storage = USER_FSCRYPT;
910                 }
911 
912                 return manager_add_home_by_image(m, user_name, realm, path, NULL, storage, st.st_uid);
913         }
914 
915         return 0;
916 }
917 
manager_enumerate_images(Manager * m)918 int manager_enumerate_images(Manager *m) {
919         _cleanup_closedir_ DIR *d = NULL;
920 
921         assert(m);
922 
923         if (!m->scan_slash_home)
924                 return 0;
925 
926         d = opendir(get_home_root());
927         if (!d)
928                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno,
929                                       "Failed to open %s: %m", get_home_root());
930 
931         FOREACH_DIRENT(de, d, return log_error_errno(errno, "Failed to read %s directory: %m", get_home_root()))
932                 (void) manager_assess_image(m, dirfd(d), get_home_root(), de->d_name);
933 
934         return 0;
935 }
936 
manager_connect_bus(Manager * m)937 static int manager_connect_bus(Manager *m) {
938         _cleanup_free_ char *b = NULL;
939         const char *suffix, *busname;
940         int r;
941 
942         assert(m);
943         assert(!m->bus);
944 
945         r = sd_bus_default_system(&m->bus);
946         if (r < 0)
947                 return log_error_errno(r, "Failed to connect to system bus: %m");
948 
949         r = bus_add_implementation(m->bus, &manager_object, m);
950         if (r < 0)
951                 return r;
952 
953         r = bus_log_control_api_register(m->bus);
954         if (r < 0)
955                 return r;
956 
957         suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
958         if (suffix) {
959                 b = strjoin("org.freedesktop.home1.", suffix);
960                 if (!b)
961                         return log_oom();
962                 busname = b;
963         } else
964                 busname = "org.freedesktop.home1";
965 
966         r = sd_bus_request_name_async(m->bus, NULL, busname, 0, NULL, NULL);
967         if (r < 0)
968                 return log_error_errno(r, "Failed to request name: %m");
969 
970         r = sd_bus_attach_event(m->bus, m->event, 0);
971         if (r < 0)
972                 return log_error_errno(r, "Failed to attach bus to event loop: %m");
973 
974         (void) sd_bus_set_exit_on_disconnect(m->bus, true);
975 
976         return 0;
977 }
978 
manager_bind_varlink(Manager * m)979 static int manager_bind_varlink(Manager *m) {
980         _cleanup_free_ char *p = NULL;
981         const char *suffix, *socket_path;
982         int r;
983 
984         assert(m);
985         assert(!m->varlink_server);
986 
987         r = varlink_server_new(&m->varlink_server, VARLINK_SERVER_ACCOUNT_UID|VARLINK_SERVER_INHERIT_USERDATA);
988         if (r < 0)
989                 return log_error_errno(r, "Failed to allocate varlink server object: %m");
990 
991         varlink_server_set_userdata(m->varlink_server, m);
992 
993         r = varlink_server_bind_method_many(
994                         m->varlink_server,
995                         "io.systemd.UserDatabase.GetUserRecord",  vl_method_get_user_record,
996                         "io.systemd.UserDatabase.GetGroupRecord", vl_method_get_group_record,
997                         "io.systemd.UserDatabase.GetMemberships", vl_method_get_memberships);
998         if (r < 0)
999                 return log_error_errno(r, "Failed to register varlink methods: %m");
1000 
1001         (void) mkdir_p("/run/systemd/userdb", 0755);
1002 
1003         /* To make things easier to debug, when working from a homed managed home directory, let's optionally
1004          * use a different varlink socket name */
1005         suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1006         if (suffix) {
1007                 p = strjoin("/run/systemd/userdb/io.systemd.Home.", suffix);
1008                 if (!p)
1009                         return log_oom();
1010                 socket_path = p;
1011         } else
1012                 socket_path = "/run/systemd/userdb/io.systemd.Home";
1013 
1014         r = varlink_server_listen_address(m->varlink_server, socket_path, 0666);
1015         if (r < 0)
1016                 return log_error_errno(r, "Failed to bind to varlink socket: %m");
1017 
1018         r = varlink_server_attach_event(m->varlink_server, m->event, SD_EVENT_PRIORITY_NORMAL);
1019         if (r < 0)
1020                 return log_error_errno(r, "Failed to attach varlink connection to event loop: %m");
1021 
1022         assert(!m->userdb_service);
1023         m->userdb_service = strdup(basename(socket_path));
1024         if (!m->userdb_service)
1025                 return log_oom();
1026 
1027         /* Avoid recursion */
1028         if (setenv("SYSTEMD_BYPASS_USERDB", m->userdb_service, 1) < 0)
1029                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to set $SYSTEMD_BYPASS_USERDB: %m");
1030 
1031         return 0;
1032 }
1033 
read_datagram(int fd,struct ucred * ret_sender,void ** ret,int * ret_passed_fd)1034 static ssize_t read_datagram(
1035                 int fd,
1036                 struct ucred *ret_sender,
1037                 void **ret,
1038                 int *ret_passed_fd) {
1039 
1040         CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))) control;
1041         _cleanup_free_ void *buffer = NULL;
1042         _cleanup_close_ int passed_fd = -1;
1043         struct ucred *sender = NULL;
1044         struct cmsghdr *cmsg;
1045         struct msghdr mh;
1046         struct iovec iov;
1047         ssize_t n, m;
1048 
1049         assert(fd >= 0);
1050         assert(ret_sender);
1051         assert(ret);
1052         assert(ret_passed_fd);
1053 
1054         n = next_datagram_size_fd(fd);
1055         if (n < 0)
1056                 return n;
1057 
1058         buffer = malloc(n + 2);
1059         if (!buffer)
1060                 return -ENOMEM;
1061 
1062         /* Pass one extra byte, as a size check */
1063         iov = IOVEC_MAKE(buffer, n + 1);
1064 
1065         mh = (struct msghdr) {
1066                 .msg_iov = &iov,
1067                 .msg_iovlen = 1,
1068                 .msg_control = &control,
1069                 .msg_controllen = sizeof(control),
1070         };
1071 
1072         m = recvmsg_safe(fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1073         if (m < 0)
1074                 return m;
1075 
1076         /* Ensure the size matches what we determined before */
1077         if (m != n) {
1078                 cmsg_close_all(&mh);
1079                 return -EMSGSIZE;
1080         }
1081 
1082         CMSG_FOREACH(cmsg, &mh) {
1083                 if (cmsg->cmsg_level == SOL_SOCKET &&
1084                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1085                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
1086                         assert(!sender);
1087                         sender = (struct ucred*) CMSG_DATA(cmsg);
1088                 }
1089 
1090                 if (cmsg->cmsg_level == SOL_SOCKET &&
1091                     cmsg->cmsg_type == SCM_RIGHTS) {
1092 
1093                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
1094                                 cmsg_close_all(&mh);
1095                                 return -EMSGSIZE;
1096                         }
1097 
1098                         assert(passed_fd < 0);
1099                         passed_fd = *(int*) CMSG_DATA(cmsg);
1100                 }
1101         }
1102 
1103         if (sender)
1104                 *ret_sender = *sender;
1105         else
1106                 *ret_sender = (struct ucred) UCRED_INVALID;
1107 
1108         *ret_passed_fd = TAKE_FD(passed_fd);
1109 
1110         /* For safety reasons: let's always NUL terminate.  */
1111         ((char*) buffer)[n] = 0;
1112         *ret = TAKE_PTR(buffer);
1113 
1114         return 0;
1115 }
1116 
on_notify_socket(sd_event_source * s,int fd,uint32_t revents,void * userdata)1117 static int on_notify_socket(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1118         _cleanup_strv_free_ char **l = NULL;
1119         _cleanup_free_ void *datagram = NULL;
1120         _cleanup_close_ int passed_fd = -1;
1121         struct ucred sender = UCRED_INVALID;
1122         Manager *m = userdata;
1123         ssize_t n;
1124         Home *h;
1125 
1126         assert(s);
1127         assert(m);
1128 
1129         n = read_datagram(fd, &sender, &datagram, &passed_fd);
1130         if (n < 0) {
1131                 if (ERRNO_IS_TRANSIENT(n))
1132                         return 0;
1133                 return log_error_errno(n, "Failed to read notify datagram: %m");
1134         }
1135 
1136         if (sender.pid <= 0) {
1137                 log_warning("Received notify datagram without valid sender PID, ignoring.");
1138                 return 0;
1139         }
1140 
1141         h = hashmap_get(m->homes_by_worker_pid, PID_TO_PTR(sender.pid));
1142         if (!h) {
1143                 log_warning("Received notify datagram of unknown process, ignoring.");
1144                 return 0;
1145         }
1146 
1147         l = strv_split(datagram, "\n");
1148         if (!l)
1149                 return log_oom();
1150 
1151         home_process_notify(h, l, TAKE_FD(passed_fd));
1152         return 0;
1153 }
1154 
manager_listen_notify(Manager * m)1155 static int manager_listen_notify(Manager *m) {
1156         _cleanup_close_ int fd = -1;
1157         union sockaddr_union sa = {
1158                 .un.sun_family = AF_UNIX,
1159                 .un.sun_path = "/run/systemd/home/notify",
1160         };
1161         const char *suffix;
1162         int r;
1163 
1164         assert(m);
1165         assert(!m->notify_socket_event_source);
1166 
1167         suffix = getenv("SYSTEMD_HOME_DEBUG_SUFFIX");
1168         if (suffix) {
1169                 _cleanup_free_ char *unix_path = NULL;
1170 
1171                 unix_path = strjoin("/run/systemd/home/notify.", suffix);
1172                 if (!unix_path)
1173                         return log_oom();
1174                 r = sockaddr_un_set_path(&sa.un, unix_path);
1175                 if (r < 0)
1176                         return log_error_errno(r, "Socket path %s does not fit in sockaddr_un: %m", unix_path);
1177         }
1178 
1179         fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1180         if (fd < 0)
1181                 return log_error_errno(errno, "Failed to create listening socket: %m");
1182 
1183         (void) mkdir_parents(sa.un.sun_path, 0755);
1184         (void) sockaddr_un_unlink(&sa.un);
1185 
1186         if (bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0)
1187                 return log_error_errno(errno, "Failed to bind to socket: %m");
1188 
1189         r = setsockopt_int(fd, SOL_SOCKET, SO_PASSCRED, true);
1190         if (r < 0)
1191                 return r;
1192 
1193         r = sd_event_add_io(m->event, &m->notify_socket_event_source, fd, EPOLLIN, on_notify_socket, m);
1194         if (r < 0)
1195                 return log_error_errno(r, "Failed to allocate event source for notify socket: %m");
1196 
1197         (void) sd_event_source_set_description(m->notify_socket_event_source, "notify-socket");
1198 
1199         /* Make sure we process sd_notify() before SIGCHLD for any worker, so that we always know the error
1200          * number of a client before it exits. */
1201         r = sd_event_source_set_priority(m->notify_socket_event_source, SD_EVENT_PRIORITY_NORMAL - 5);
1202         if (r < 0)
1203                 return log_error_errno(r, "Failed to alter priority of NOTIFY_SOCKET event source: %m");
1204 
1205         r = sd_event_source_set_io_fd_own(m->notify_socket_event_source, true);
1206         if (r < 0)
1207                 return log_error_errno(r, "Failed to pass ownership of notify socket: %m");
1208 
1209         return TAKE_FD(fd);
1210 }
1211 
manager_add_device(Manager * m,sd_device * d)1212 static int manager_add_device(Manager *m, sd_device *d) {
1213         _cleanup_free_ char *user_name = NULL, *realm = NULL, *node = NULL;
1214         const char *tabletype, *parttype, *partname, *partuuid, *sysfs;
1215         sd_id128_t id;
1216         int r;
1217 
1218         assert(m);
1219         assert(d);
1220 
1221         r = sd_device_get_syspath(d, &sysfs);
1222         if (r < 0)
1223                 return log_error_errno(r, "Failed to acquire sysfs path of device: %m");
1224 
1225         r = sd_device_get_property_value(d, "ID_PART_TABLE_TYPE", &tabletype);
1226         if (r == -ENOENT)
1227                 return 0;
1228         if (r < 0)
1229                 return log_error_errno(r, "Failed to acquire ID_PART_TABLE_TYPE device property, ignoring: %m");
1230 
1231         if (!streq(tabletype, "gpt")) {
1232                 log_debug("Found partition (%s) on non-GPT table, ignoring.", sysfs);
1233                 return 0;
1234         }
1235 
1236         r = sd_device_get_property_value(d, "ID_PART_ENTRY_TYPE", &parttype);
1237         if (r == -ENOENT)
1238                 return 0;
1239         if (r < 0)
1240                 return log_error_errno(r, "Failed to acquire ID_PART_ENTRY_TYPE device property, ignoring: %m");
1241         if (id128_equal_string(parttype, GPT_USER_HOME) <= 0) {
1242                 log_debug("Found partition (%s) we don't care about, ignoring.", sysfs);
1243                 return 0;
1244         }
1245 
1246         r = sd_device_get_property_value(d, "ID_PART_ENTRY_NAME", &partname);
1247         if (r < 0)
1248                 return log_warning_errno(r, "Failed to acquire ID_PART_ENTRY_NAME device property, ignoring: %m");
1249 
1250         r = split_user_name_realm(partname, &user_name, &realm);
1251         if (r == -EINVAL)
1252                 return log_warning_errno(r, "Found partition with correct partition type but a non-parsable partition name '%s', ignoring.", partname);
1253         if (r < 0)
1254                 return log_error_errno(r, "Failed to validate partition name '%s': %m", partname);
1255 
1256         r = sd_device_get_property_value(d, "ID_FS_UUID", &partuuid);
1257         if (r < 0)
1258                 return log_warning_errno(r, "Failed to acquire ID_FS_UUID device property, ignoring: %m");
1259 
1260         r = sd_id128_from_string(partuuid, &id);
1261         if (r < 0)
1262                 return log_warning_errno(r, "Failed to parse ID_FS_UUID field '%s', ignoring: %m", partuuid);
1263 
1264         if (asprintf(&node, "/dev/disk/by-uuid/" SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(id)) < 0)
1265                 return log_oom();
1266 
1267         return manager_add_home_by_image(m, user_name, realm, node, sysfs, USER_LUKS, UID_INVALID);
1268 }
1269 
manager_on_device(sd_device_monitor * monitor,sd_device * d,void * userdata)1270 static int manager_on_device(sd_device_monitor *monitor, sd_device *d, void *userdata) {
1271         Manager *m = userdata;
1272         int r;
1273 
1274         assert(m);
1275         assert(d);
1276 
1277         if (device_for_action(d, SD_DEVICE_REMOVE)) {
1278                 const char *sysfs;
1279                 Home *h;
1280 
1281                 r = sd_device_get_syspath(d, &sysfs);
1282                 if (r < 0) {
1283                         log_warning_errno(r, "Failed to acquire sysfs path from device: %m");
1284                         return 0;
1285                 }
1286 
1287                 log_info("block device %s has been removed.", sysfs);
1288 
1289                 /* Let's see if we previously synthesized a home record from this device, if so, let's just
1290                  * revalidate that. Otherwise let's revalidate them all, but asynchronously. */
1291                 h = hashmap_get(m->homes_by_sysfs, sysfs);
1292                 if (h)
1293                         manager_revalidate_image(m, h);
1294                 else
1295                         manager_enqueue_gc(m, NULL);
1296         } else
1297                 (void) manager_add_device(m, d);
1298 
1299         (void) bus_manager_emit_auto_login_changed(m);
1300         return 0;
1301 }
1302 
manager_watch_devices(Manager * m)1303 static int manager_watch_devices(Manager *m) {
1304         int r;
1305 
1306         assert(m);
1307         assert(!m->device_monitor);
1308 
1309         r = sd_device_monitor_new(&m->device_monitor);
1310         if (r < 0)
1311                 return log_error_errno(r, "Failed to allocate device monitor: %m");
1312 
1313         r = sd_device_monitor_filter_add_match_subsystem_devtype(m->device_monitor, "block", NULL);
1314         if (r < 0)
1315                 return log_error_errno(r, "Failed to configure device monitor match: %m");
1316 
1317         r = sd_device_monitor_attach_event(m->device_monitor, m->event);
1318         if (r < 0)
1319                 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
1320 
1321         r = sd_device_monitor_start(m->device_monitor, manager_on_device, m);
1322         if (r < 0)
1323                 return log_error_errno(r, "Failed to start device monitor: %m");
1324 
1325         return 0;
1326 }
1327 
manager_enumerate_devices(Manager * m)1328 static int manager_enumerate_devices(Manager *m) {
1329         _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
1330         sd_device *d;
1331         int r;
1332 
1333         assert(m);
1334 
1335         r = sd_device_enumerator_new(&e);
1336         if (r < 0)
1337                 return r;
1338 
1339         r = sd_device_enumerator_add_match_subsystem(e, "block", true);
1340         if (r < 0)
1341                 return r;
1342 
1343         FOREACH_DEVICE(e, d)
1344                 (void) manager_add_device(m, d);
1345 
1346         return 0;
1347 }
1348 
manager_load_key_pair(Manager * m)1349 static int manager_load_key_pair(Manager *m) {
1350         _cleanup_(fclosep) FILE *f = NULL;
1351         struct stat st;
1352         int r;
1353 
1354         assert(m);
1355 
1356         if (m->private_key) {
1357                 EVP_PKEY_free(m->private_key);
1358                 m->private_key = NULL;
1359         }
1360 
1361         r = search_and_fopen_nulstr("local.private", "re", NULL, KEY_PATHS_NULSTR, &f, NULL);
1362         if (r == -ENOENT)
1363                 return 0;
1364         if (r < 0)
1365                 return log_error_errno(r, "Failed to read private key file: %m");
1366 
1367         if (fstat(fileno(f), &st) < 0)
1368                 return log_error_errno(errno, "Failed to stat private key file: %m");
1369 
1370         r = stat_verify_regular(&st);
1371         if (r < 0)
1372                 return log_error_errno(r, "Private key file is not regular: %m");
1373 
1374         if (st.st_uid != 0 || (st.st_mode & 0077) != 0)
1375                 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Private key file is readable by more than the root user");
1376 
1377         m->private_key = PEM_read_PrivateKey(f, NULL, NULL, NULL);
1378         if (!m->private_key)
1379                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to load private key pair");
1380 
1381         log_info("Successfully loaded private key pair.");
1382 
1383         return 1;
1384 }
1385 
1386 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(EVP_PKEY_CTX*, EVP_PKEY_CTX_free, NULL);
1387 
manager_generate_key_pair(Manager * m)1388 static int manager_generate_key_pair(Manager *m) {
1389         _cleanup_(EVP_PKEY_CTX_freep) EVP_PKEY_CTX *ctx = NULL;
1390         _cleanup_(unlink_and_freep) char *temp_public = NULL, *temp_private = NULL;
1391         _cleanup_fclose_ FILE *fpublic = NULL, *fprivate = NULL;
1392         int r;
1393 
1394         if (m->private_key) {
1395                 EVP_PKEY_free(m->private_key);
1396                 m->private_key = NULL;
1397         }
1398 
1399         ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_ED25519, NULL);
1400         if (!ctx)
1401                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to allocate Ed25519 key generation context.");
1402 
1403         if (EVP_PKEY_keygen_init(ctx) <= 0)
1404                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to initialize Ed25519 key generation context.");
1405 
1406         log_info("Generating key pair for signing local user identity records.");
1407 
1408         if (EVP_PKEY_keygen(ctx, &m->private_key) <= 0)
1409                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to generate Ed25519 key pair");
1410 
1411         log_info("Successfully created Ed25519 key pair.");
1412 
1413         (void) mkdir_p("/var/lib/systemd/home", 0755);
1414 
1415         /* Write out public key (note that we only do that as a help to the user, we don't make use of this ever */
1416         r = fopen_temporary("/var/lib/systemd/home/local.public", &fpublic, &temp_public);
1417         if (r < 0)
1418                 return log_error_errno(errno, "Failed to open key file for writing: %m");
1419 
1420         if (PEM_write_PUBKEY(fpublic, m->private_key) <= 0)
1421                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write public key.");
1422 
1423         r = fflush_sync_and_check(fpublic);
1424         if (r < 0)
1425                 return log_error_errno(r, "Failed to write private key: %m");
1426 
1427         fpublic = safe_fclose(fpublic);
1428 
1429         /* Write out the private key (this actually writes out both private and public, OpenSSL is confusing) */
1430         r = fopen_temporary("/var/lib/systemd/home/local.private", &fprivate, &temp_private);
1431         if (r < 0)
1432                 return log_error_errno(errno, "Failed to open key file for writing: %m");
1433 
1434         if (PEM_write_PrivateKey(fprivate, m->private_key, NULL, NULL, 0, NULL, 0) <= 0)
1435                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to write private key pair.");
1436 
1437         r = fflush_sync_and_check(fprivate);
1438         if (r < 0)
1439                 return log_error_errno(r, "Failed to write private key: %m");
1440 
1441         fprivate = safe_fclose(fprivate);
1442 
1443         /* Both are written now, move them into place */
1444 
1445         if (rename(temp_public, "/var/lib/systemd/home/local.public") < 0)
1446                 return log_error_errno(errno, "Failed to move public key file into place: %m");
1447         temp_public = mfree(temp_public);
1448 
1449         if (rename(temp_private, "/var/lib/systemd/home/local.private") < 0) {
1450                 (void) unlink_noerrno("/var/lib/systemd/home/local.public"); /* try to remove the file we already created */
1451                 return log_error_errno(errno, "Failed to move private key file into place: %m");
1452         }
1453         temp_private = mfree(temp_private);
1454 
1455         r = fsync_path_at(AT_FDCWD, "/var/lib/systemd/home/");
1456         if (r < 0)
1457                 log_warning_errno(r, "Failed to sync /var/lib/systemd/home/, ignoring: %m");
1458 
1459         return 1;
1460 }
1461 
manager_acquire_key_pair(Manager * m)1462 int manager_acquire_key_pair(Manager *m) {
1463         int r;
1464 
1465         assert(m);
1466 
1467         /* Already there? */
1468         if (m->private_key)
1469                 return 1;
1470 
1471         /* First try to load key off disk */
1472         r = manager_load_key_pair(m);
1473         if (r != 0)
1474                 return r;
1475 
1476         /* Didn't work, generate a new one */
1477         return manager_generate_key_pair(m);
1478 }
1479 
manager_sign_user_record(Manager * m,UserRecord * u,UserRecord ** ret,sd_bus_error * error)1480 int manager_sign_user_record(Manager *m, UserRecord *u, UserRecord **ret, sd_bus_error *error) {
1481         int r;
1482 
1483         assert(m);
1484         assert(u);
1485         assert(ret);
1486 
1487         r = manager_acquire_key_pair(m);
1488         if (r < 0)
1489                 return r;
1490         if (r == 0)
1491                 return sd_bus_error_set(error, BUS_ERROR_NO_PRIVATE_KEY, "Can't sign without local key.");
1492 
1493         return user_record_sign(u, m->private_key, ret);
1494 }
1495 
1496 DEFINE_PRIVATE_HASH_OPS_FULL(public_key_hash_ops, char, string_hash_func, string_compare_func, free, EVP_PKEY, EVP_PKEY_free);
1497 DEFINE_TRIVIAL_CLEANUP_FUNC_FULL(EVP_PKEY*, EVP_PKEY_free, NULL);
1498 
manager_load_public_key_one(Manager * m,const char * path)1499 static int manager_load_public_key_one(Manager *m, const char *path) {
1500         _cleanup_(EVP_PKEY_freep) EVP_PKEY *pkey = NULL;
1501         _cleanup_fclose_ FILE *f = NULL;
1502         _cleanup_free_ char *fn = NULL;
1503         struct stat st;
1504         int r;
1505 
1506         assert(m);
1507 
1508         if (streq(basename(path), "local.public")) /* we already loaded the private key, which includes the public one */
1509                 return 0;
1510 
1511         f = fopen(path, "re");
1512         if (!f) {
1513                 if (errno == ENOENT)
1514                         return 0;
1515 
1516                 return log_error_errno(errno, "Failed to open public key %s: %m", path);
1517         }
1518 
1519         if (fstat(fileno(f), &st) < 0)
1520                 return log_error_errno(errno, "Failed to stat public key %s: %m", path);
1521 
1522         r = stat_verify_regular(&st);
1523         if (r < 0)
1524                 return log_error_errno(r, "Public key file %s is not a regular file: %m", path);
1525 
1526         if (st.st_uid != 0 || (st.st_mode & 0022) != 0)
1527                 return log_error_errno(SYNTHETIC_ERRNO(EPERM), "Public key file %s is writable by more than the root user, refusing.", path);
1528 
1529         r = hashmap_ensure_allocated(&m->public_keys, &public_key_hash_ops);
1530         if (r < 0)
1531                 return log_oom();
1532 
1533         pkey = PEM_read_PUBKEY(f, &pkey, NULL, NULL);
1534         if (!pkey)
1535                 return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse public key file %s.", path);
1536 
1537         fn = strdup(basename(path));
1538         if (!fn)
1539                 return log_oom();
1540 
1541         r = hashmap_put(m->public_keys, fn, pkey);
1542         if (r < 0)
1543                 return log_error_errno(r, "Failed to add public key to set: %m");
1544 
1545         TAKE_PTR(fn);
1546         TAKE_PTR(pkey);
1547 
1548         return 0;
1549 }
1550 
manager_load_public_keys(Manager * m)1551 static int manager_load_public_keys(Manager *m) {
1552         _cleanup_strv_free_ char **files = NULL;
1553         int r;
1554 
1555         assert(m);
1556 
1557         m->public_keys = hashmap_free(m->public_keys);
1558 
1559         r = conf_files_list_nulstr(
1560                         &files,
1561                         ".public",
1562                         NULL,
1563                         CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED,
1564                         KEY_PATHS_NULSTR);
1565         if (r < 0)
1566                 return log_error_errno(r, "Failed to assemble list of public key directories: %m");
1567 
1568         STRV_FOREACH(i, files)
1569                 (void) manager_load_public_key_one(m, *i);
1570 
1571         return 0;
1572 }
1573 
manager_startup(Manager * m)1574 int manager_startup(Manager *m) {
1575         int r;
1576 
1577         assert(m);
1578 
1579         r = manager_listen_notify(m);
1580         if (r < 0)
1581                 return r;
1582 
1583         r = manager_connect_bus(m);
1584         if (r < 0)
1585                 return r;
1586 
1587         r = manager_bind_varlink(m);
1588         if (r < 0)
1589                 return r;
1590 
1591         r = manager_load_key_pair(m); /* only try to load it, don't generate any */
1592         if (r < 0)
1593                 return r;
1594 
1595         r = manager_load_public_keys(m);
1596         if (r < 0)
1597                 return r;
1598 
1599         manager_watch_home(m);
1600         (void) manager_watch_devices(m);
1601 
1602         (void) manager_enumerate_records(m);
1603         (void) manager_enumerate_images(m);
1604         (void) manager_enumerate_devices(m);
1605 
1606         /* Let's clean up home directories whose devices got removed while we were not running */
1607         (void) manager_enqueue_gc(m, NULL);
1608 
1609         return 0;
1610 }
1611 
manager_revalidate_image(Manager * m,Home * h)1612 void manager_revalidate_image(Manager *m, Home *h) {
1613         int r;
1614 
1615         assert(m);
1616         assert(h);
1617 
1618         /* Frees an automatically discovered image, if it's synthetic and its image disappeared. Unmounts any
1619          * image if it's mounted but it's image vanished. */
1620 
1621         if (h->current_operation || !ordered_set_isempty(h->pending_operations))
1622                 return;
1623 
1624         if (h->state == HOME_UNFIXATED) {
1625                 r = user_record_test_image_path(h->record);
1626                 if (r < 0)
1627                         log_warning_errno(r, "Can't determine if image of %s exists, freeing unfixated user: %m", h->user_name);
1628                 else if (r == USER_TEST_ABSENT)
1629                         log_info("Image for %s disappeared, freeing unfixated user.", h->user_name);
1630                 else
1631                         return;
1632 
1633                 home_free(h);
1634 
1635         } else if (h->state < 0) {
1636 
1637                 r = user_record_test_home_directory(h->record);
1638                 if (r < 0) {
1639                         log_warning_errno(r, "Unable to determine state of home directory, ignoring: %m");
1640                         return;
1641                 }
1642 
1643                 if (r == USER_TEST_MOUNTED) {
1644                         r = user_record_test_image_path(h->record);
1645                         if (r < 0) {
1646                                 log_warning_errno(r, "Unable to determine state of image path, ignoring: %m");
1647                                 return;
1648                         }
1649 
1650                         if (r == USER_TEST_ABSENT) {
1651                                 _cleanup_(operation_unrefp) Operation *o = NULL;
1652 
1653                                 log_notice("Backing image disappeared while home directory %s was mounted, unmounting it forcibly.", h->user_name);
1654                                 /* Wowza, the thing is mounted, but the device is gone? Act on it. */
1655 
1656                                 r = home_killall(h);
1657                                 if (r < 0)
1658                                         log_warning_errno(r, "Failed to kill processes of user %s, ignoring: %m", h->user_name);
1659 
1660                                 /* We enqueue the operation here, after all the home directory might
1661                                  * currently already run some operation, and we can deactivate it only after
1662                                  * that's complete. */
1663                                 o = operation_new(OPERATION_DEACTIVATE_FORCE, NULL);
1664                                 if (!o) {
1665                                         log_oom();
1666                                         return;
1667                                 }
1668 
1669                                 r = home_schedule_operation(h, o, NULL);
1670                                 if (r < 0)
1671                                         log_warning_errno(r, "Failed to enqueue forced home directory %s deactivation, ignoring: %m", h->user_name);
1672                         }
1673                 }
1674         }
1675 }
1676 
manager_gc_images(Manager * m)1677 int manager_gc_images(Manager *m) {
1678         Home *h;
1679 
1680         assert_se(m);
1681 
1682         if (m->gc_focus) {
1683                 /* Focus on a specific home */
1684 
1685                 h = TAKE_PTR(m->gc_focus);
1686                 manager_revalidate_image(m, h);
1687         } else {
1688                 /* Gc all */
1689 
1690                 HASHMAP_FOREACH(h, m->homes_by_name)
1691                         manager_revalidate_image(m, h);
1692         }
1693 
1694         return 0;
1695 }
1696 
on_deferred_rescan(sd_event_source * s,void * userdata)1697 static int on_deferred_rescan(sd_event_source *s, void *userdata) {
1698         Manager *m = userdata;
1699 
1700         assert(m);
1701 
1702         m->deferred_rescan_event_source = sd_event_source_disable_unref(m->deferred_rescan_event_source);
1703 
1704         manager_enumerate_devices(m);
1705         manager_enumerate_images(m);
1706         return 0;
1707 }
1708 
manager_enqueue_rescan(Manager * m)1709 int manager_enqueue_rescan(Manager *m) {
1710         int r;
1711 
1712         assert(m);
1713 
1714         if (m->deferred_rescan_event_source)
1715                 return 0;
1716 
1717         if (!m->event)
1718                 return 0;
1719 
1720         if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1721                 return 0;
1722 
1723         r = sd_event_add_defer(m->event, &m->deferred_rescan_event_source, on_deferred_rescan, m);
1724         if (r < 0)
1725                 return log_error_errno(r, "Failed to allocate rescan event source: %m");
1726 
1727         r = sd_event_source_set_priority(m->deferred_rescan_event_source, SD_EVENT_PRIORITY_IDLE+1);
1728         if (r < 0)
1729                 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1730 
1731         (void) sd_event_source_set_description(m->deferred_rescan_event_source, "deferred-rescan");
1732         return 1;
1733 }
1734 
on_deferred_gc(sd_event_source * s,void * userdata)1735 static int on_deferred_gc(sd_event_source *s, void *userdata) {
1736         Manager *m = userdata;
1737 
1738         assert(m);
1739 
1740         m->deferred_gc_event_source = sd_event_source_disable_unref(m->deferred_gc_event_source);
1741 
1742         manager_gc_images(m);
1743         return 0;
1744 }
1745 
manager_enqueue_gc(Manager * m,Home * focus)1746 int manager_enqueue_gc(Manager *m, Home *focus) {
1747         int r;
1748 
1749         assert(m);
1750 
1751         /* This enqueues a request to GC dead homes. It may be called with focus=NULL in which case all homes
1752          * will be scanned, or with the parameter set, in which case only that home is checked. */
1753 
1754         if (!m->event)
1755                 return 0;
1756 
1757         if (IN_SET(sd_event_get_state(m->event), SD_EVENT_FINISHED, SD_EVENT_EXITING))
1758                 return 0;
1759 
1760         /* If a focus home is specified, then remember to focus just on this home. Otherwise invalidate any
1761          * focus that might be set to look at all homes. */
1762 
1763         if (m->deferred_gc_event_source) {
1764                 if (m->gc_focus != focus) /* not the same focus, then look at everything */
1765                         m->gc_focus = NULL;
1766 
1767                 return 0;
1768         } else
1769                 m->gc_focus = focus; /* start focused */
1770 
1771         r = sd_event_add_defer(m->event, &m->deferred_gc_event_source, on_deferred_gc, m);
1772         if (r < 0)
1773                 return log_error_errno(r, "Failed to allocate GC event source: %m");
1774 
1775         r = sd_event_source_set_priority(m->deferred_gc_event_source, SD_EVENT_PRIORITY_IDLE);
1776         if (r < 0)
1777                 log_warning_errno(r, "Failed to tweak priority of event source, ignoring: %m");
1778 
1779         (void) sd_event_source_set_description(m->deferred_gc_event_source, "deferred-gc");
1780         return 1;
1781 }
1782 
manager_shall_rebalance(Manager * m)1783 static bool manager_shall_rebalance(Manager *m) {
1784         Home *h;
1785 
1786         assert(m);
1787 
1788         if (IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
1789                 return true;
1790 
1791         HASHMAP_FOREACH(h, m->homes_by_name)
1792                 if (home_shall_rebalance(h))
1793                         return true;
1794 
1795         return false;
1796 }
1797 
home_cmp(Home * const * a,Home * const * b)1798 static int home_cmp(Home *const*a, Home *const*b) {
1799         int r;
1800 
1801         assert(a);
1802         assert(*a);
1803         assert(b);
1804         assert(*b);
1805 
1806         /* Order user records by their weight (and by their name, to make things stable). We put the records
1807          * with the highest weight last, since we distribute space from the beginning and round down, hence
1808          * later entries tend to get slightly more than earlier entries. */
1809 
1810         r = CMP(user_record_rebalance_weight((*a)->record), user_record_rebalance_weight((*b)->record));
1811         if (r != 0)
1812                 return r;
1813 
1814         return strcmp((*a)->user_name, (*b)->user_name);
1815 }
1816 
manager_rebalance_calculate(Manager * m)1817 static int manager_rebalance_calculate(Manager *m) {
1818         uint64_t weight_sum, free_sum, usage_sum = 0, min_free = UINT64_MAX;
1819         _cleanup_free_ Home **array = NULL;
1820         bool relevant = false;
1821         struct statfs sfs;
1822         int c = 0, r;
1823         Home *h;
1824 
1825         assert(m);
1826 
1827         if (statfs(get_home_root(), &sfs) < 0)
1828                 return log_error_errno(errno, "Failed to statfs() /home: %m");
1829 
1830         free_sum = (uint64_t) sfs.f_bsize * sfs.f_bavail; /* This much free space is available on the
1831                                                            * underlying pool directory */
1832 
1833         weight_sum = REBALANCE_WEIGHT_BACKING; /* Grant the underlying pool directory a fixed weight of 20
1834                                                 * (home dirs get 100 by default, i.e. 5x more). This weight
1835                                                 * is not configurable, the per-home weights are. */
1836 
1837         HASHMAP_FOREACH(h, m->homes_by_name) {
1838                 statfs_f_type_t fstype;
1839                 h->rebalance_pending = false; /* First, reset the flag, we only want it to be true for the
1840                                                * homes that qualify for rebalancing */
1841 
1842                 if (!home_shall_rebalance(h)) /* Only look at actual candidates */
1843                         continue;
1844 
1845                 if (home_is_busy(h))
1846                         return -EBUSY; /* Let's not rebalance if there's a busy home directory. */
1847 
1848                 r = home_get_disk_status(
1849                                 h,
1850                                 &h->rebalance_size,
1851                                 &h->rebalance_usage,
1852                                 &h->rebalance_free,
1853                                 NULL,
1854                                 NULL,
1855                                 &fstype,
1856                                 NULL);
1857                 if (r < 0) {
1858                         log_warning_errno(r, "Failed to get free space of home '%s', ignoring.", h->user_name);
1859                         continue;
1860                 }
1861 
1862                 if (h->rebalance_free > UINT64_MAX - free_sum)
1863                         return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance free overflow");
1864                 free_sum += h->rebalance_free;
1865 
1866                 if (h->rebalance_usage > UINT64_MAX - usage_sum)
1867                         return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance usage overflow");
1868                 usage_sum += h->rebalance_usage;
1869 
1870                 h->rebalance_weight = user_record_rebalance_weight(h->record);
1871                 if (h->rebalance_weight > UINT64_MAX - weight_sum)
1872                         return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Rebalance weight overflow");
1873                 weight_sum += h->rebalance_weight;
1874 
1875                 h->rebalance_min = minimal_size_by_fs_magic(fstype);
1876 
1877                 if (!GREEDY_REALLOC(array, c+1))
1878                         return log_oom();
1879 
1880                 array[c++] = h;
1881         }
1882 
1883         if (c == 0) {
1884                 log_debug("No homes to rebalance.");
1885                 return 0;
1886         }
1887 
1888         assert(weight_sum > 0);
1889 
1890         log_debug("Disk space usage by all home directories to rebalance: %s — available disk space: %s",
1891                   FORMAT_BYTES(usage_sum), FORMAT_BYTES(free_sum));
1892 
1893         /* Bring the home directories in a well-defined order, so that we distribute space in a reproducible
1894          * way for the same parameters. */
1895         typesafe_qsort(array, c, home_cmp);
1896 
1897         for (int i = 0; i < c; i++) {
1898                 uint64_t new_free;
1899                 double d;
1900 
1901                 h = array[i];
1902 
1903                 assert(h->rebalance_free <= free_sum);
1904                 assert(h->rebalance_usage <= usage_sum);
1905                 assert(h->rebalance_weight <= weight_sum);
1906 
1907                 d = ((double) (free_sum / 4096) * (double) h->rebalance_weight) / (double) weight_sum; /* Calculate new space for this home in units of 4K */
1908 
1909                 /* Convert from units of 4K back to bytes */
1910                 if (d >= (double) (UINT64_MAX/4096))
1911                         new_free = UINT64_MAX;
1912                 else
1913                         new_free = (uint64_t) d * 4096;
1914 
1915                 /* Subtract the weight and assigned space from the sums now, to distribute the rounding noise
1916                  * to the remaining home dirs */
1917                 free_sum = LESS_BY(free_sum, new_free);
1918                 weight_sum = LESS_BY(weight_sum, h->rebalance_weight);
1919 
1920                 /* Keep track of home directory with the least amount of space left: we want to schedule the
1921                  * next rebalance more quickly if this is low */
1922                 if (new_free < min_free)
1923                         min_free = h->rebalance_size;
1924 
1925                 if (new_free > UINT64_MAX - h->rebalance_usage)
1926                         h->rebalance_goal = UINT64_MAX-1; /* maximum size */
1927                 else {
1928                         h->rebalance_goal = h->rebalance_usage + new_free;
1929 
1930                         if (h->rebalance_min != UINT64_MAX && h->rebalance_goal < h->rebalance_min)
1931                                 h->rebalance_goal = h->rebalance_min;
1932                 }
1933 
1934                 /* Skip over this home if the state doesn't match the operation */
1935                 if ((m->rebalance_state == REBALANCE_SHRINKING && h->rebalance_goal > h->rebalance_size) ||
1936                     (m->rebalance_state == REBALANCE_GROWING && h->rebalance_goal < h->rebalance_size))
1937                         h->rebalance_pending = false;
1938                 else {
1939                         log_debug("Rebalancing home directory '%s' %s → %s.", h->user_name,
1940                                   FORMAT_BYTES(h->rebalance_size), FORMAT_BYTES(h->rebalance_goal));
1941                         h->rebalance_pending = true;
1942                 }
1943 
1944                 if ((fabs((double) h->rebalance_size - (double) h->rebalance_goal) * 100 / (double) h->rebalance_size) >= 5.0)
1945                         relevant = true;
1946         }
1947 
1948         /* Scale next rebalancing interval based on the least amount of space of any of the home
1949          * directories. We pick a time in the range 1min … 15min, scaled by log2(min_free), so that:
1950          * 10M → ~0.7min, 100M → ~2.7min, 1G → ~4.6min, 10G → ~6.5min, 100G ~8.4 */
1951         m->rebalance_interval_usec = (usec_t) CLAMP((LESS_BY(log2(min_free), 22)*15*USEC_PER_MINUTE)/26,
1952                                                     1 * USEC_PER_MINUTE,
1953                                                     15 * USEC_PER_MINUTE);
1954 
1955 
1956         log_debug("Rebalancing interval set to %s.", FORMAT_TIMESPAN(m->rebalance_interval_usec, USEC_PER_MSEC));
1957 
1958         /* Let's suppress small resizes, growing/shrinking file systems isn't free after all */
1959         if (!relevant) {
1960                 log_debug("Skipping rebalancing, since all calculated size changes are below ±5%%.");
1961                 return 0;
1962         }
1963 
1964         return c;
1965 }
1966 
manager_rebalance_apply(Manager * m)1967 static int manager_rebalance_apply(Manager *m) {
1968         int c = 0, r;
1969         Home *h;
1970 
1971         assert(m);
1972 
1973         HASHMAP_FOREACH(h, m->homes_by_name) {
1974                 _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
1975 
1976                 if (!h->rebalance_pending)
1977                         continue;
1978 
1979                 h->rebalance_pending = false;
1980 
1981                 r = home_resize(h, h->rebalance_goal, /* secret= */ NULL, /* automatic= */ true, &error);
1982                 if (r < 0)
1983                         log_warning_errno(r, "Failed to resize home '%s' for rebalancing, ignoring: %s",
1984                                           h->user_name, bus_error_message(&error, r));
1985                 else
1986                         c++;
1987         }
1988 
1989         return c;
1990 }
1991 
manager_rebalance_reply_messages(Manager * m)1992 static void manager_rebalance_reply_messages(Manager *m) {
1993         int r;
1994 
1995         assert(m);
1996 
1997         for (;;) {
1998                 _cleanup_(sd_bus_message_unrefp) sd_bus_message *msg =
1999                         set_steal_first(m->rebalance_pending_method_calls);
2000 
2001                 if (!msg)
2002                         break;
2003 
2004                 r = sd_bus_reply_method_return(msg, NULL);
2005                 if (r < 0)
2006                         log_debug_errno(r, "Failed to reply to rebalance method call, ignoring: %m");
2007         }
2008 }
2009 
manager_rebalance_now(Manager * m)2010 static int manager_rebalance_now(Manager *m) {
2011         RebalanceState busy_state; /* the state to revert to when operation fails if busy */
2012         int r;
2013 
2014         assert(m);
2015 
2016         log_debug("Rebalancing now...");
2017 
2018         /* We maintain a simple state engine here to keep track of what we are doing. We'll first shrink all
2019          * homes that shall be shrunk and then grow all homes that shall be grown, so that they can take up
2020          * the space now freed. */
2021 
2022         for (;;) {
2023                 switch (m->rebalance_state) {
2024 
2025                 case REBALANCE_IDLE:
2026                 case REBALANCE_PENDING:
2027                 case REBALANCE_WAITING:
2028                         /* First shrink large home dirs */
2029                         m->rebalance_state = REBALANCE_SHRINKING;
2030                         busy_state = REBALANCE_PENDING;
2031 
2032                         /* We are initiating the next rebalancing cycle now, let's make the queued methods
2033                          * calls the pending ones, and flush out any pending ones (which shouldn't exist at
2034                          * this time anyway) */
2035                         set_clear(m->rebalance_pending_method_calls);
2036                         SWAP_TWO(m->rebalance_pending_method_calls, m->rebalance_queued_method_calls);
2037 
2038                         log_debug("Shrinking phase..");
2039                         break;
2040 
2041                 case REBALANCE_SHRINKING:
2042                         /* Then grow small home dirs */
2043                         m->rebalance_state = REBALANCE_GROWING;
2044                         busy_state = REBALANCE_SHRINKING;
2045                         log_debug("Growing phase..");
2046                         break;
2047 
2048                 case REBALANCE_GROWING:
2049                         /* Finally, we are done */
2050                         log_info("Rebalancing complete.");
2051                         m->rebalance_state = REBALANCE_IDLE;
2052                         r = 0;
2053                         goto finish;
2054 
2055                 case REBALANCE_OFF:
2056                 default:
2057                         assert_not_reached();
2058                 }
2059 
2060                 r = manager_rebalance_calculate(m);
2061                 if (r == -EBUSY) {
2062                         /* Calculations failed because one home directory is currently busy. Revert to a state that
2063                          * tells us what to do next. */
2064                         log_debug("Can't enter phase, busy.");
2065                         m->rebalance_state = busy_state;
2066                         return r;
2067                 }
2068                 if (r < 0)
2069                         goto finish;
2070                 if (r == 0)
2071                         continue; /* got to next step immediately, if there's nothing to do */
2072 
2073                 r = manager_rebalance_apply(m);
2074                 if (r < 0)
2075                         goto finish;
2076                 if (r > 0)
2077                         break; /* At least one resize operation is now pending, we are done for now */
2078 
2079                 /* If there was nothing to apply, go for next state right-away */
2080         }
2081 
2082         return 0;
2083 
2084 finish:
2085         /* Reset state and schedule next rebalance */
2086         m->rebalance_state = REBALANCE_IDLE;
2087         manager_rebalance_reply_messages(m);
2088         (void) manager_schedule_rebalance(m, /* immediately= */ false);
2089         return r;
2090 }
2091 
on_rebalance_timer(sd_event_source * s,usec_t t,void * userdata)2092 static int on_rebalance_timer(sd_event_source *s, usec_t t, void *userdata) {
2093         Manager *m = userdata;
2094 
2095         assert(s);
2096         assert(m);
2097         assert(IN_SET(m->rebalance_state, REBALANCE_WAITING, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING));
2098 
2099         (void) manager_rebalance_now(m);
2100         return 0;
2101 }
2102 
manager_schedule_rebalance(Manager * m,bool immediately)2103 int manager_schedule_rebalance(Manager *m, bool immediately) {
2104         int r;
2105 
2106         assert(m);
2107 
2108         /* Check if there are any records where rebalancing is requested */
2109         if (!manager_shall_rebalance(m)) {
2110                 log_debug("Not scheduling rebalancing, not needed.");
2111                 r = 0; /* report that we didn't schedule anything because nothing needed it */
2112                 goto turn_off;
2113         }
2114 
2115         if (immediately) {
2116                 /* If we are told to rebalance immediately, then mark a rebalance as pending (even if we area
2117                  * already running one) */
2118 
2119                 if (m->rebalance_event_source) {
2120                         r = sd_event_source_set_time(m->rebalance_event_source, 0);
2121                         if (r < 0) {
2122                                 log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2123                                 goto turn_off;
2124                         }
2125 
2126                         r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2127                         if (r < 0) {
2128                                 log_error_errno(r, "Failed to enable rebalancing event source: %m");
2129                                 goto turn_off;
2130                         }
2131                 } else {
2132                         r = sd_event_add_time(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, 0, USEC_PER_SEC, on_rebalance_timer, m);
2133                         if (r < 0) {
2134                                 log_error_errno(r, "Failed to allocate rebalance event source: %m");
2135                                 goto turn_off;
2136                         }
2137 
2138                         r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2139                         if (r < 0) {
2140                                 log_error_errno(r, "Failed to set rebalance event source priority: %m");
2141                                 goto turn_off;
2142                         }
2143 
2144                         (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2145 
2146                 }
2147 
2148                 if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2149                         m->rebalance_state = REBALANCE_PENDING;
2150 
2151                 log_debug("Scheduled immediate rebalancing...");
2152                 return 1; /* report that we scheduled something */
2153         }
2154 
2155         /* If we are told to schedule a rebalancing eventually, then do so only if we are not executing
2156          * anything yet. Also if we have something scheduled already, leave it in place */
2157         if (!IN_SET(m->rebalance_state, REBALANCE_OFF, REBALANCE_IDLE))
2158                 return 1; /* report that there's already something scheduled */
2159 
2160         if (m->rebalance_event_source) {
2161                 r = sd_event_source_set_time_relative(m->rebalance_event_source, m->rebalance_interval_usec);
2162                 if (r < 0) {
2163                         log_error_errno(r, "Failed to schedule immediate rebalancing: %m");
2164                         goto turn_off;
2165                 }
2166 
2167                 r = sd_event_source_set_enabled(m->rebalance_event_source, SD_EVENT_ONESHOT);
2168                 if (r < 0) {
2169                         log_error_errno(r, "Failed to enable rebalancing event source: %m");
2170                         goto turn_off;
2171                 }
2172         } else {
2173                 r = sd_event_add_time_relative(m->event, &m->rebalance_event_source, CLOCK_MONOTONIC, m->rebalance_interval_usec, USEC_PER_SEC, on_rebalance_timer, m);
2174                 if (r < 0) {
2175                         log_error_errno(r, "Failed to allocate rebalance event source: %m");
2176                         goto turn_off;
2177                 }
2178 
2179                 r = sd_event_source_set_priority(m->rebalance_event_source, SD_EVENT_PRIORITY_IDLE + 10);
2180                 if (r < 0) {
2181                         log_error_errno(r, "Failed to set rebalance event source priority: %m");
2182                         goto turn_off;
2183                 }
2184 
2185                 (void) sd_event_source_set_description(m->rebalance_event_source, "rebalance");
2186         }
2187 
2188         m->rebalance_state = REBALANCE_WAITING; /* We managed to enqueue a timer event, we now wait until it fires */
2189         log_debug("Scheduled rebalancing in %s...", FORMAT_TIMESPAN(m->rebalance_interval_usec, 0));
2190         return 1; /* report that we scheduled something */
2191 
2192 turn_off:
2193         m->rebalance_event_source = sd_event_source_disable_unref(m->rebalance_event_source);
2194         m->rebalance_state = REBALANCE_OFF;
2195         manager_rebalance_reply_messages(m);
2196         return r;
2197 }
2198 
manager_reschedule_rebalance(Manager * m)2199 int manager_reschedule_rebalance(Manager *m) {
2200         int r;
2201 
2202         assert(m);
2203 
2204         /* If a rebalance is pending reschedules it so it gets executed immediately */
2205 
2206         if (!IN_SET(m->rebalance_state, REBALANCE_PENDING, REBALANCE_SHRINKING, REBALANCE_GROWING))
2207                 return 0;
2208 
2209         r = manager_schedule_rebalance(m, /* immediately= */ true);
2210         if (r < 0)
2211                 return r;
2212 
2213         return 1;
2214 }
2215