1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <linux/fs.h>
6 
7 #include "alloc-util.h"
8 #include "fd-util.h"
9 #include "format-util.h"
10 #include "home-util.h"
11 #include "homework-mount.h"
12 #include "homework.h"
13 #include "missing_mount.h"
14 #include "missing_syscall.h"
15 #include "mkdir.h"
16 #include "mount-util.h"
17 #include "namespace-util.h"
18 #include "path-util.h"
19 #include "string-util.h"
20 #include "user-util.h"
21 
mount_options_for_fstype(const char * fstype)22 static const char *mount_options_for_fstype(const char *fstype) {
23         const char *e;
24         char *n;
25 
26         assert(fstype);
27 
28         /* Allow overriding our built-in defaults with an environment variable */
29         n = strjoina("SYSTEMD_HOME_MOUNT_OPTIONS_", fstype);
30         e = getenv(ascii_strupper(n));
31         if (e)
32                 return e;
33 
34         if (streq(fstype, "ext4"))
35                 return "noquota,user_xattr";
36         if (streq(fstype, "xfs"))
37                 return "noquota";
38         if (streq(fstype, "btrfs"))
39                 return "noacl,compress=zstd:1";
40         return NULL;
41 }
42 
home_mount_node(const char * node,const char * fstype,bool discard,unsigned long flags,const char * extra_mount_options)43 int home_mount_node(
44                 const char *node,
45                 const char *fstype,
46                 bool discard,
47                 unsigned long flags,
48                 const char *extra_mount_options) {
49 
50         _cleanup_free_ char *joined = NULL;
51         const char *default_options;
52         int r;
53 
54         assert(node);
55         assert(fstype);
56 
57         default_options = mount_options_for_fstype(fstype);
58         if (default_options) {
59                 if (!strextend_with_separator(&joined, ",", default_options))
60                         return log_oom();
61         }
62 
63         if (!strextend_with_separator(&joined, ",", discard ? "discard" : "nodiscard"))
64                 return log_oom();
65 
66         if (extra_mount_options) {
67                 if (!strextend_with_separator(&joined, ",", extra_mount_options))
68                         return log_oom();
69         }
70 
71         r = mount_nofollow_verbose(LOG_ERR, node, HOME_RUNTIME_WORK_DIR, fstype, flags|MS_RELATIME, joined);
72         if (r < 0)
73                 return r;
74 
75         log_info("Mounting file system completed.");
76         return 0;
77 }
78 
home_unshare_and_mkdir(void)79 int home_unshare_and_mkdir(void) {
80         int r;
81 
82         if (unshare(CLONE_NEWNS) < 0)
83                 return log_error_errno(errno, "Couldn't unshare file system namespace: %m");
84 
85         assert(path_startswith(HOME_RUNTIME_WORK_DIR, "/run"));
86 
87         r = mount_nofollow_verbose(LOG_ERR, "/run", "/run", NULL, MS_SLAVE|MS_REC, NULL); /* Mark /run as MS_SLAVE in our new namespace */
88         if (r < 0)
89                 return r;
90 
91         (void) mkdir_p(HOME_RUNTIME_WORK_DIR, 0700);
92         return 0;
93 }
94 
home_unshare_and_mount(const char * node,const char * fstype,bool discard,unsigned long flags,const char * extra_mount_options)95 int home_unshare_and_mount(
96                 const char *node,
97                 const char *fstype,
98                 bool discard,
99                 unsigned long flags,
100                 const char *extra_mount_options) {
101 
102         int r;
103 
104         assert(node);
105         assert(fstype);
106 
107         r = home_unshare_and_mkdir();
108         if (r < 0)
109                 return r;
110 
111         r = home_mount_node(node, fstype, discard, flags, extra_mount_options);
112         if (r < 0)
113                 return r;
114 
115         r = mount_nofollow_verbose(LOG_ERR, NULL, HOME_RUNTIME_WORK_DIR, NULL, MS_PRIVATE, NULL);
116         if (r < 0) {
117                 (void) umount_verbose(LOG_ERR, HOME_RUNTIME_WORK_DIR, UMOUNT_NOFOLLOW);
118                 return r;
119         }
120 
121         return 0;
122 }
123 
home_move_mount(const char * mount_suffix,const char * target)124 int home_move_mount(const char *mount_suffix, const char *target) {
125         _cleanup_free_ char *subdir = NULL;
126         const char *d;
127         int r;
128 
129         assert(target);
130 
131         /* If 'mount_suffix' is set, then we'll mount a subdir of the source mount into the host. If it's
132          * NULL we'll move the mount itself */
133         if (mount_suffix) {
134                 subdir = path_join(HOME_RUNTIME_WORK_DIR, mount_suffix);
135                 if (!subdir)
136                         return log_oom();
137 
138                 d = subdir;
139         } else
140                 d = HOME_RUNTIME_WORK_DIR;
141 
142         (void) mkdir_p(target, 0700);
143 
144         r = mount_nofollow_verbose(LOG_ERR, d, target, NULL, MS_BIND, NULL);
145         if (r < 0)
146                 return r;
147 
148         r = umount_recursive(HOME_RUNTIME_WORK_DIR, 0);
149         if (r < 0)
150                 return log_error_errno(r, "Failed to unmount %s: %m", HOME_RUNTIME_WORK_DIR);
151 
152         log_info("Moving to final mount point %s completed.", target);
153         return 0;
154 }
155 
append_identity_range(char ** text,uid_t start,uid_t next_start,uid_t exclude)156 static int append_identity_range(char **text, uid_t start, uid_t next_start, uid_t exclude) {
157         /* Creates an identity range ranging from 'start' to 'next_start-1'. Excludes the UID specified by 'exclude' if
158          * it is in that range. */
159 
160         assert(text);
161 
162         if (next_start <= start) /* Empty range? */
163                 return 0;
164 
165         if (exclude < start || exclude >= next_start) /* UID to exclude it outside of the range? */
166                 return strextendf(text, UID_FMT " " UID_FMT " " UID_FMT "\n", start, start, next_start - start);
167 
168         if (start == exclude && next_start == exclude + 1) /* The only UID in the range is the one to exclude? */
169                 return 0;
170 
171         if (exclude == start) /* UID to exclude at beginning of range? */
172                 return strextendf(text, UID_FMT " " UID_FMT " " UID_FMT "\n", start+1, start+1, next_start - start - 1);
173 
174         if (exclude == next_start - 1) /* UID to exclude at end of range? */
175                 return strextendf(text, UID_FMT " " UID_FMT " " UID_FMT "\n", start, start, next_start - start - 1);
176 
177         return strextendf(text,
178                           UID_FMT " " UID_FMT " " UID_FMT "\n"
179                           UID_FMT " " UID_FMT " " UID_FMT "\n",
180                           start, start, exclude - start,
181                           exclude + 1, exclude + 1, next_start - exclude - 1);
182 }
183 
make_userns(uid_t stored_uid,uid_t exposed_uid)184 static int make_userns(uid_t stored_uid, uid_t exposed_uid) {
185         _cleanup_free_ char *text = NULL;
186         _cleanup_close_ int userns_fd = -1;
187         int r;
188 
189         assert(uid_is_valid(stored_uid));
190         assert(uid_is_valid(exposed_uid));
191 
192         assert_cc(HOME_UID_MIN <= HOME_UID_MAX);
193         assert_cc(HOME_UID_MAX < UID_NOBODY);
194 
195         /* Map everything below the homed UID range to itself (except for the UID we actually care about if
196          * it is inside this range) */
197         r = append_identity_range(&text, 0, HOME_UID_MIN, stored_uid);
198         if (r < 0)
199                 return log_oom();
200 
201         /* Now map the UID we are doing this for to the target UID. */
202         r = strextendf(&text, UID_FMT " " UID_FMT " " UID_FMT "\n", stored_uid, exposed_uid, 1);
203         if (r < 0)
204                 return log_oom();
205 
206         /* Map everything above the homed UID range to itself (again, excluding the UID we actually care
207          * about if it is in that range). Also we leave "nobody" itself excluded) */
208         r = append_identity_range(&text, HOME_UID_MAX, UID_NOBODY, stored_uid);
209         if (r < 0)
210                 return log_oom();
211 
212         /* Also map the container range. People can use that to place containers owned by high UIDs in their
213          * home directories if they really want. We won't manage this UID range for them but pass it through
214          * 1:1, and it will lose its meaning once migrated between hosts. */
215         r = append_identity_range(&text, CONTAINER_UID_BASE_MIN, CONTAINER_UID_BASE_MAX+1, stored_uid);
216         if (r < 0)
217                 return log_oom();
218 
219         /* Map nspawn's mapped root UID as identity mapping so that people can run nspawn uidmap mounted
220          * containers off $HOME, if they want. */
221         r = strextendf(&text, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_MAPPED_ROOT, UID_MAPPED_ROOT, 1);
222         if (r < 0)
223                 return log_oom();
224 
225         /* Leave everything else unmapped, starting from UID_NOBODY itself. Specifically, this means the
226          * whole space outside of 16bit remains unmapped */
227 
228         log_debug("Creating userns with mapping:\n%s", text);
229 
230         userns_fd = userns_acquire(text, text); /* same uid + gid mapping */
231         if (userns_fd < 0)
232                 return log_error_errno(userns_fd, "Failed to allocate user namespace: %m");
233 
234         return TAKE_FD(userns_fd);
235 }
236 
home_shift_uid(int dir_fd,const char * target,uid_t stored_uid,uid_t exposed_uid,int * ret_mount_fd)237 int home_shift_uid(int dir_fd, const char *target, uid_t stored_uid, uid_t exposed_uid, int *ret_mount_fd) {
238         _cleanup_close_ int mount_fd = -1, userns_fd = -1;
239         int r;
240 
241         assert(dir_fd >= 0);
242         assert(uid_is_valid(stored_uid));
243         assert(uid_is_valid(exposed_uid));
244 
245         /* Let's try to set up a UID mapping for this directory. This is called when first creating a home
246          * directory or when activating it again. We do this as optimization only, to avoid having to
247          * recursively chown() things on each activation. If the kernel or file system doesn't support this
248          * scheme we'll handle this gracefully, and not do anything, so that the later recursive chown()ing
249          * then fixes up things for us. Note that the chown()ing is smart enough to skip things if they look
250          * alright already.
251          *
252          * Note that this always creates a new mount (i.e. we use OPEN_TREE_CLONE), since applying idmaps is
253          * not allowed once the mount is put in place. */
254 
255         mount_fd = open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
256         if (mount_fd < 0) {
257                 if (ERRNO_IS_NOT_SUPPORTED(errno)) {
258                         log_debug_errno(errno, "The open_tree() syscall is not supported, not setting up UID shift mount: %m");
259 
260                         if (ret_mount_fd)
261                                 *ret_mount_fd = -1;
262 
263                         return 0;
264                 }
265 
266                 return log_error_errno(errno, "Failed to open tree of home directory: %m");
267         }
268 
269         userns_fd = make_userns(stored_uid, exposed_uid);
270         if (userns_fd < 0)
271                 return userns_fd;
272 
273         /* Set the user namespace mapping attribute on the cloned mount point */
274         if (mount_setattr(mount_fd, "", AT_EMPTY_PATH,
275                           &(struct mount_attr) {
276                                   .attr_set = MOUNT_ATTR_IDMAP,
277                                   .userns_fd = userns_fd,
278                           }, MOUNT_ATTR_SIZE_VER0) < 0) {
279 
280                 if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EINVAL) { /* EINVAL is documented in mount_attr() as fs doesn't support idmapping */
281                         log_debug_errno(errno, "UID/GID mapping for shifted mount not available, not setting it up: %m");
282 
283                         if (ret_mount_fd)
284                                 *ret_mount_fd = -1;
285 
286                         return 0;
287                 }
288 
289                 return log_error_errno(errno, "Failed to apply UID/GID mapping: %m");
290         }
291 
292         if (target)
293                 r = move_mount(mount_fd, "", AT_FDCWD, target, MOVE_MOUNT_F_EMPTY_PATH);
294         else
295                 r = move_mount(mount_fd, "", dir_fd, "", MOVE_MOUNT_F_EMPTY_PATH|MOVE_MOUNT_T_EMPTY_PATH);
296         if (r < 0)
297                 return log_error_errno(errno, "Failed to apply UID/GID map: %m");
298 
299         log_debug("Applied uidmap mount to %s. Mapping is " UID_FMT " → " UID_FMT ".", strna(target), stored_uid, exposed_uid);
300 
301         if (ret_mount_fd)
302                 *ret_mount_fd = TAKE_FD(mount_fd);
303 
304         return 1;
305 }
306