1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <linux/fs.h>
6 #include <linux/loop.h>
7 #include <linux/magic.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <sys/file.h>
11 #include <sys/ioctl.h>
12 #include <sys/stat.h>
13 #include <unistd.h>
14 
15 #include "alloc-util.h"
16 #include "btrfs-util.h"
17 #include "chase-symlinks.h"
18 #include "chattr-util.h"
19 #include "copy.h"
20 #include "dirent-util.h"
21 #include "discover-image.h"
22 #include "dissect-image.h"
23 #include "env-file.h"
24 #include "env-util.h"
25 #include "fd-util.h"
26 #include "fs-util.h"
27 #include "hashmap.h"
28 #include "hostname-setup.h"
29 #include "id128-util.h"
30 #include "lockfile-util.h"
31 #include "log.h"
32 #include "loop-util.h"
33 #include "macro.h"
34 #include "mkdir.h"
35 #include "nulstr-util.h"
36 #include "os-util.h"
37 #include "path-util.h"
38 #include "rm-rf.h"
39 #include "stat-util.h"
40 #include "string-table.h"
41 #include "string-util.h"
42 #include "strv.h"
43 #include "time-util.h"
44 #include "utf8.h"
45 #include "xattr-util.h"
46 
47 static const char* const image_search_path[_IMAGE_CLASS_MAX] = {
48         [IMAGE_MACHINE] =   "/etc/machines\0"              /* only place symlinks here */
49                             "/run/machines\0"              /* and here too */
50                             "/var/lib/machines\0"          /* the main place for images */
51                             "/var/lib/container\0"         /* legacy */
52                             "/usr/local/lib/machines\0"
53                             "/usr/lib/machines\0",
54 
55         [IMAGE_PORTABLE] =  "/etc/portables\0"             /* only place symlinks here */
56                             "/run/portables\0"             /* and here too */
57                             "/var/lib/portables\0"         /* the main place for images */
58                             "/usr/local/lib/portables\0"
59                             "/usr/lib/portables\0",
60 
61         [IMAGE_EXTENSION] = "/etc/extensions\0"             /* only place symlinks here */
62                             "/run/extensions\0"             /* and here too */
63                             "/var/lib/extensions\0"         /* the main place for images */
64                             "/usr/local/lib/extensions\0"
65                             "/usr/lib/extensions\0",
66 };
67 
image_free(Image * i)68 static Image *image_free(Image *i) {
69         assert(i);
70 
71         free(i->name);
72         free(i->path);
73 
74         free(i->hostname);
75         strv_free(i->machine_info);
76         strv_free(i->os_release);
77         strv_free(i->extension_release);
78 
79         return mfree(i);
80 }
81 
82 DEFINE_TRIVIAL_REF_UNREF_FUNC(Image, image, image_free);
83 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(image_hash_ops, char, string_hash_func, string_compare_func,
84                                       Image, image_unref);
85 
image_settings_path(Image * image)86 static char **image_settings_path(Image *image) {
87         _cleanup_strv_free_ char **l = NULL;
88         const char *fn;
89         unsigned i = 0;
90 
91         assert(image);
92 
93         l = new0(char*, 4);
94         if (!l)
95                 return NULL;
96 
97         fn = strjoina(image->name, ".nspawn");
98 
99         FOREACH_STRING(s, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
100                 l[i] = path_join(s, fn);
101                 if (!l[i])
102                         return NULL;
103 
104                 i++;
105         }
106 
107         l[i] = file_in_same_dir(image->path, fn);
108         if (!l[i])
109                 return NULL;
110 
111         return TAKE_PTR(l);
112 }
113 
image_roothash_path(Image * image)114 static char *image_roothash_path(Image *image) {
115         const char *fn;
116 
117         assert(image);
118 
119         fn = strjoina(image->name, ".roothash");
120 
121         return file_in_same_dir(image->path, fn);
122 }
123 
image_new(ImageType t,const char * pretty,const char * path,const char * filename,bool read_only,usec_t crtime,usec_t mtime,Image ** ret)124 static int image_new(
125                 ImageType t,
126                 const char *pretty,
127                 const char *path,
128                 const char *filename,
129                 bool read_only,
130                 usec_t crtime,
131                 usec_t mtime,
132                 Image **ret) {
133 
134         _cleanup_(image_unrefp) Image *i = NULL;
135 
136         assert(t >= 0);
137         assert(t < _IMAGE_TYPE_MAX);
138         assert(pretty);
139         assert(filename);
140         assert(ret);
141 
142         i = new(Image, 1);
143         if (!i)
144                 return -ENOMEM;
145 
146         *i = (Image) {
147                 .n_ref = 1,
148                 .type = t,
149                 .read_only = read_only,
150                 .crtime = crtime,
151                 .mtime = mtime,
152                 .usage = UINT64_MAX,
153                 .usage_exclusive = UINT64_MAX,
154                 .limit = UINT64_MAX,
155                 .limit_exclusive = UINT64_MAX,
156         };
157 
158         i->name = strdup(pretty);
159         if (!i->name)
160                 return -ENOMEM;
161 
162         i->path = path_join(path, filename);
163         if (!i->path)
164                 return -ENOMEM;
165 
166         path_simplify(i->path);
167 
168         *ret = TAKE_PTR(i);
169 
170         return 0;
171 }
172 
extract_pretty(const char * path,const char * suffix,char ** ret)173 static int extract_pretty(const char *path, const char *suffix, char **ret) {
174         _cleanup_free_ char *name = NULL;
175         const char *p;
176         size_t n;
177 
178         assert(path);
179         assert(ret);
180 
181         p = last_path_component(path);
182         n = strcspn(p, "/");
183 
184         name = strndup(p, n);
185         if (!name)
186                 return -ENOMEM;
187 
188         if (suffix) {
189                 char *e;
190 
191                 e = endswith(name, suffix);
192                 if (!e)
193                         return -EINVAL;
194 
195                 *e = 0;
196         }
197 
198         if (!image_name_is_valid(name))
199                 return -EINVAL;
200 
201         *ret = TAKE_PTR(name);
202         return 0;
203 }
204 
image_make(const char * pretty,int dfd,const char * path,const char * filename,const struct stat * st,Image ** ret)205 static int image_make(
206                 const char *pretty,
207                 int dfd,
208                 const char *path,
209                 const char *filename,
210                 const struct stat *st,
211                 Image **ret) {
212 
213         _cleanup_free_ char *pretty_buffer = NULL, *parent = NULL;
214         struct stat stbuf;
215         bool read_only;
216         int r;
217 
218         assert(dfd >= 0 || dfd == AT_FDCWD);
219         assert(path || dfd == AT_FDCWD);
220         assert(filename);
221 
222         /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
223          * devices into /var/lib/machines/, and treat them normally.
224          *
225          * This function returns -ENOENT if we can't find the image after all, and -EMEDIUMTYPE if it's not a file we
226          * recognize. */
227 
228         if (!st) {
229                 if (fstatat(dfd, filename, &stbuf, 0) < 0)
230                         return -errno;
231 
232                 st = &stbuf;
233         }
234 
235         if (!path) {
236                 if (dfd == AT_FDCWD)
237                         (void) safe_getcwd(&parent);
238                 else
239                         (void) fd_get_path(dfd, &parent);
240         }
241 
242         read_only =
243                 (path && path_startswith(path, "/usr")) ||
244                 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
245 
246         if (S_ISDIR(st->st_mode)) {
247                 _cleanup_close_ int fd = -1;
248                 unsigned file_attr = 0;
249                 usec_t crtime = 0;
250 
251                 if (!ret)
252                         return 0;
253 
254                 if (!pretty) {
255                         r = extract_pretty(filename, NULL, &pretty_buffer);
256                         if (r < 0)
257                                 return r;
258 
259                         pretty = pretty_buffer;
260                 }
261 
262                 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
263                 if (fd < 0)
264                         return -errno;
265 
266                 if (btrfs_might_be_subvol(st)) {
267 
268                         r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
269                         if (r < 0)
270                                 return r;
271                         if (r) {
272                                 BtrfsSubvolInfo info;
273 
274                                 /* It's a btrfs subvolume */
275 
276                                 r = btrfs_subvol_get_info_fd(fd, 0, &info);
277                                 if (r < 0)
278                                         return r;
279 
280                                 r = image_new(IMAGE_SUBVOLUME,
281                                               pretty,
282                                               path,
283                                               filename,
284                                               info.read_only || read_only,
285                                               info.otime,
286                                               0,
287                                               ret);
288                                 if (r < 0)
289                                         return r;
290 
291                                 if (btrfs_quota_scan_ongoing(fd) == 0) {
292                                         BtrfsQuotaInfo quota;
293 
294                                         r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
295                                         if (r >= 0) {
296                                                 (*ret)->usage = quota.referenced;
297                                                 (*ret)->usage_exclusive = quota.exclusive;
298 
299                                                 (*ret)->limit = quota.referenced_max;
300                                                 (*ret)->limit_exclusive = quota.exclusive_max;
301                                         }
302                                 }
303 
304                                 return 0;
305                         }
306                 }
307 
308                 /* Get directory creation time (not available everywhere, but that's OK */
309                 (void) fd_getcrtime(fd, &crtime);
310 
311                 /* If the IMMUTABLE bit is set, we consider the directory read-only. Since the ioctl is not
312                  * supported everywhere we ignore failures. */
313                 (void) read_attr_fd(fd, &file_attr);
314 
315                 /* It's just a normal directory. */
316                 r = image_new(IMAGE_DIRECTORY,
317                               pretty,
318                               path,
319                               filename,
320                               read_only || (file_attr & FS_IMMUTABLE_FL),
321                               crtime,
322                               0, /* we don't use mtime of stat() here, since it's not the time of last change of the tree, but only of the top-level dir */
323                               ret);
324                 if (r < 0)
325                         return r;
326 
327                 return 0;
328 
329         } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
330                 usec_t crtime = 0;
331 
332                 /* It's a RAW disk image */
333 
334                 if (!ret)
335                         return 0;
336 
337                 (void) fd_getcrtime_at(dfd, filename, AT_SYMLINK_FOLLOW, &crtime);
338 
339                 if (!pretty) {
340                         r = extract_pretty(filename, ".raw", &pretty_buffer);
341                         if (r < 0)
342                                 return r;
343 
344                         pretty = pretty_buffer;
345                 }
346 
347                 r = image_new(IMAGE_RAW,
348                               pretty,
349                               path,
350                               filename,
351                               !(st->st_mode & 0222) || read_only,
352                               crtime,
353                               timespec_load(&st->st_mtim),
354                               ret);
355                 if (r < 0)
356                         return r;
357 
358                 (*ret)->usage = (*ret)->usage_exclusive = st->st_blocks * 512;
359                 (*ret)->limit = (*ret)->limit_exclusive = st->st_size;
360 
361                 return 0;
362 
363         } else if (S_ISBLK(st->st_mode)) {
364                 _cleanup_close_ int block_fd = -1;
365                 uint64_t size = UINT64_MAX;
366 
367                 /* A block device */
368 
369                 if (!ret)
370                         return 0;
371 
372                 if (!pretty) {
373                         r = extract_pretty(filename, NULL, &pretty_buffer);
374                         if (r < 0)
375                                 return r;
376 
377                         pretty = pretty_buffer;
378                 }
379 
380                 block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
381                 if (block_fd < 0)
382                         log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
383                 else {
384                         /* Refresh stat data after opening the node */
385                         if (fstat(block_fd, &stbuf) < 0)
386                                 return -errno;
387                         st = &stbuf;
388 
389                         if (!S_ISBLK(st->st_mode)) /* Verify that what we opened is actually what we think it is */
390                                 return -ENOTTY;
391 
392                         if (!read_only) {
393                                 int state = 0;
394 
395                                 if (ioctl(block_fd, BLKROGET, &state) < 0)
396                                         log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
397                                 else if (state)
398                                         read_only = true;
399                         }
400 
401                         if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
402                                 log_debug_errno(errno, "Failed to issue BLKGETSIZE64 on device %s/%s, ignoring: %m", path ?: strnull(parent), filename);
403 
404                         block_fd = safe_close(block_fd);
405                 }
406 
407                 r = image_new(IMAGE_BLOCK,
408                               pretty,
409                               path,
410                               filename,
411                               !(st->st_mode & 0222) || read_only,
412                               0,
413                               0,
414                               ret);
415                 if (r < 0)
416                         return r;
417 
418                 if (!IN_SET(size, 0, UINT64_MAX))
419                         (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
420 
421                 return 0;
422         }
423 
424         return -EMEDIUMTYPE;
425 }
426 
image_find(ImageClass class,const char * name,const char * root,Image ** ret)427 int image_find(ImageClass class,
428                const char *name,
429                const char *root,
430                Image **ret) {
431 
432         const char *path;
433         int r;
434 
435         assert(class >= 0);
436         assert(class < _IMAGE_CLASS_MAX);
437         assert(name);
438 
439         /* There are no images with invalid names */
440         if (!image_name_is_valid(name))
441                 return -ENOENT;
442 
443         NULSTR_FOREACH(path, image_search_path[class]) {
444                 _cleanup_free_ char *resolved = NULL;
445                 _cleanup_closedir_ DIR *d = NULL;
446                 struct stat st;
447                 int flags;
448 
449                 r = chase_symlinks_and_opendir(path, root, CHASE_PREFIX_ROOT, &resolved, &d);
450                 if (r == -ENOENT)
451                         continue;
452                 if (r < 0)
453                         return r;
454 
455                 /* As mentioned above, we follow symlinks on this fstatat(), because we want to permit people
456                  * to symlink block devices into the search path. (For now, we disable that when operating
457                  * relative to some root directory.) */
458                 flags = root ? AT_SYMLINK_NOFOLLOW : 0;
459                 if (fstatat(dirfd(d), name, &st, flags) < 0) {
460                         _cleanup_free_ char *raw = NULL;
461 
462                         if (errno != ENOENT)
463                                 return -errno;
464 
465                         raw = strjoin(name, ".raw");
466                         if (!raw)
467                                 return -ENOMEM;
468 
469                         if (fstatat(dirfd(d), raw, &st, flags) < 0) {
470                                 if (errno == ENOENT)
471                                         continue;
472 
473                                 return -errno;
474                         }
475 
476                         if (!S_ISREG(st.st_mode))
477                                 continue;
478 
479                         r = image_make(name, dirfd(d), resolved, raw, &st, ret);
480 
481                 } else {
482                         if (!S_ISDIR(st.st_mode) && !S_ISBLK(st.st_mode))
483                                 continue;
484 
485                         r = image_make(name, dirfd(d), resolved, name, &st, ret);
486                 }
487                 if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
488                         continue;
489                 if (r < 0)
490                         return r;
491 
492                 if (ret)
493                         (*ret)->discoverable = true;
494 
495                 return 1;
496         }
497 
498         if (class == IMAGE_MACHINE && streq(name, ".host")) {
499                 r = image_make(".host", AT_FDCWD, NULL, empty_to_root(root), NULL, ret);
500                 if (r < 0)
501                         return r;
502 
503                 if (ret)
504                         (*ret)->discoverable = true;
505 
506                 return r;
507         }
508 
509         return -ENOENT;
510 };
511 
image_from_path(const char * path,Image ** ret)512 int image_from_path(const char *path, Image **ret) {
513 
514         /* Note that we don't set the 'discoverable' field of the returned object, because we don't check here whether
515          * the image is in the image search path. And if it is we don't know if the path we used is actually not
516          * overridden by another, different image earlier in the search path */
517 
518         if (path_equal(path, "/"))
519                 return image_make(".host", AT_FDCWD, NULL, "/", NULL, ret);
520 
521         return image_make(NULL, AT_FDCWD, NULL, path, NULL, ret);
522 }
523 
image_find_harder(ImageClass class,const char * name_or_path,const char * root,Image ** ret)524 int image_find_harder(ImageClass class, const char *name_or_path, const char *root, Image **ret) {
525         if (image_name_is_valid(name_or_path))
526                 return image_find(class, name_or_path, root, ret);
527 
528         return image_from_path(name_or_path, ret);
529 }
530 
image_discover(ImageClass class,const char * root,Hashmap * h)531 int image_discover(
532                 ImageClass class,
533                 const char *root,
534                 Hashmap *h) {
535 
536         const char *path;
537         int r;
538 
539         assert(class >= 0);
540         assert(class < _IMAGE_CLASS_MAX);
541         assert(h);
542 
543         NULSTR_FOREACH(path, image_search_path[class]) {
544                 _cleanup_free_ char *resolved = NULL;
545                 _cleanup_closedir_ DIR *d = NULL;
546 
547                 r = chase_symlinks_and_opendir(path, root, CHASE_PREFIX_ROOT, &resolved, &d);
548                 if (r == -ENOENT)
549                         continue;
550                 if (r < 0)
551                         return r;
552 
553                 FOREACH_DIRENT_ALL(de, d, return -errno) {
554                         _cleanup_(image_unrefp) Image *image = NULL;
555                         _cleanup_free_ char *truncated = NULL;
556                         const char *pretty;
557                         struct stat st;
558                         int flags;
559 
560                         if (dot_or_dot_dot(de->d_name))
561                                 continue;
562 
563                         /* As mentioned above, we follow symlinks on this fstatat(), because we want to
564                          * permit people to symlink block devices into the search path. */
565                         flags = root ? AT_SYMLINK_NOFOLLOW : 0;
566                         if (fstatat(dirfd(d), de->d_name, &st, flags) < 0) {
567                                 if (errno == ENOENT)
568                                         continue;
569 
570                                 return -errno;
571                         }
572 
573                         if (S_ISREG(st.st_mode)) {
574                                 const char *e;
575 
576                                 e = endswith(de->d_name, ".raw");
577                                 if (!e)
578                                         continue;
579 
580                                 truncated = strndup(de->d_name, e - de->d_name);
581                                 if (!truncated)
582                                         return -ENOMEM;
583 
584                                 pretty = truncated;
585                         } else if (S_ISDIR(st.st_mode) || S_ISBLK(st.st_mode))
586                                 pretty = de->d_name;
587                         else
588                                 continue;
589 
590                         if (!image_name_is_valid(pretty))
591                                 continue;
592 
593                         if (hashmap_contains(h, pretty))
594                                 continue;
595 
596                         r = image_make(pretty, dirfd(d), resolved, de->d_name, &st, &image);
597                         if (IN_SET(r, -ENOENT, -EMEDIUMTYPE))
598                                 continue;
599                         if (r < 0)
600                                 return r;
601 
602                         image->discoverable = true;
603 
604                         r = hashmap_put(h, image->name, image);
605                         if (r < 0)
606                                 return r;
607 
608                         image = NULL;
609                 }
610         }
611 
612         if (class == IMAGE_MACHINE && !hashmap_contains(h, ".host")) {
613                 _cleanup_(image_unrefp) Image *image = NULL;
614 
615                 r = image_make(".host", AT_FDCWD, NULL, empty_to_root("/"), NULL, &image);
616                 if (r < 0)
617                         return r;
618 
619                 image->discoverable = true;
620 
621                 r = hashmap_put(h, image->name, image);
622                 if (r < 0)
623                         return r;
624 
625                 image = NULL;
626         }
627 
628         return 0;
629 }
630 
image_remove(Image * i)631 int image_remove(Image *i) {
632         _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
633         _cleanup_strv_free_ char **settings = NULL;
634         _cleanup_free_ char *roothash = NULL;
635         int r;
636 
637         assert(i);
638 
639         if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
640                 return -EROFS;
641 
642         settings = image_settings_path(i);
643         if (!settings)
644                 return -ENOMEM;
645 
646         roothash = image_roothash_path(i);
647         if (!roothash)
648                 return -ENOMEM;
649 
650         /* Make sure we don't interfere with a running nspawn */
651         r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
652         if (r < 0)
653                 return r;
654 
655         switch (i->type) {
656 
657         case IMAGE_SUBVOLUME:
658 
659                 /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
660                  * big guns */
661                 if (unlink(i->path) < 0) {
662                         r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
663                         if (r < 0)
664                                 return r;
665                 }
666 
667                 break;
668 
669         case IMAGE_DIRECTORY:
670                 /* Allow deletion of read-only directories */
671                 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
672                 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
673                 if (r < 0)
674                         return r;
675 
676                 break;
677 
678         case IMAGE_BLOCK:
679 
680                 /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
681                  * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
682                  * the thing (it's most likely a symlink after all). */
683 
684                 if (path_startswith(i->path, "/dev"))
685                         break;
686 
687                 _fallthrough_;
688         case IMAGE_RAW:
689                 if (unlink(i->path) < 0)
690                         return -errno;
691                 break;
692 
693         default:
694                 return -EOPNOTSUPP;
695         }
696 
697         STRV_FOREACH(j, settings)
698                 if (unlink(*j) < 0 && errno != ENOENT)
699                         log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
700 
701         if (unlink(roothash) < 0 && errno != ENOENT)
702                 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
703 
704         return 0;
705 }
706 
rename_auxiliary_file(const char * path,const char * new_name,const char * suffix)707 static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
708         _cleanup_free_ char *rs = NULL;
709         const char *fn;
710 
711         fn = strjoina(new_name, suffix);
712 
713         rs = file_in_same_dir(path, fn);
714         if (!rs)
715                 return -ENOMEM;
716 
717         return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
718 }
719 
image_rename(Image * i,const char * new_name)720 int image_rename(Image *i, const char *new_name) {
721         _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
722         _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
723         _cleanup_strv_free_ char **settings = NULL;
724         unsigned file_attr = 0;
725         int r;
726 
727         assert(i);
728 
729         if (!image_name_is_valid(new_name))
730                 return -EINVAL;
731 
732         if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
733                 return -EROFS;
734 
735         settings = image_settings_path(i);
736         if (!settings)
737                 return -ENOMEM;
738 
739         roothash = image_roothash_path(i);
740         if (!roothash)
741                 return -ENOMEM;
742 
743         /* Make sure we don't interfere with a running nspawn */
744         r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
745         if (r < 0)
746                 return r;
747 
748         /* Make sure nobody takes the new name, between the time we
749          * checked it is currently unused in all search paths, and the
750          * time we take possession of it */
751         r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
752         if (r < 0)
753                 return r;
754 
755         r = image_find(IMAGE_MACHINE, new_name, NULL, NULL);
756         if (r >= 0)
757                 return -EEXIST;
758         if (r != -ENOENT)
759                 return r;
760 
761         switch (i->type) {
762 
763         case IMAGE_DIRECTORY:
764                 /* Turn of the immutable bit while we rename the image, so that we can rename it */
765                 (void) read_attr_path(i->path, &file_attr);
766 
767                 if (file_attr & FS_IMMUTABLE_FL)
768                         (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL, NULL);
769 
770                 _fallthrough_;
771         case IMAGE_SUBVOLUME:
772                 new_path = file_in_same_dir(i->path, new_name);
773                 break;
774 
775         case IMAGE_BLOCK:
776 
777                 /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
778                 if (path_startswith(i->path, "/dev"))
779                         return -EROFS;
780 
781                 new_path = file_in_same_dir(i->path, new_name);
782                 break;
783 
784         case IMAGE_RAW: {
785                 const char *fn;
786 
787                 fn = strjoina(new_name, ".raw");
788                 new_path = file_in_same_dir(i->path, fn);
789                 break;
790         }
791 
792         default:
793                 return -EOPNOTSUPP;
794         }
795 
796         if (!new_path)
797                 return -ENOMEM;
798 
799         nn = strdup(new_name);
800         if (!nn)
801                 return -ENOMEM;
802 
803         r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
804         if (r < 0)
805                 return r;
806 
807         /* Restore the immutable bit, if it was set before */
808         if (file_attr & FS_IMMUTABLE_FL)
809                 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
810 
811         free_and_replace(i->path, new_path);
812         free_and_replace(i->name, nn);
813 
814         STRV_FOREACH(j, settings) {
815                 r = rename_auxiliary_file(*j, new_name, ".nspawn");
816                 if (r < 0 && r != -ENOENT)
817                         log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
818         }
819 
820         r = rename_auxiliary_file(roothash, new_name, ".roothash");
821         if (r < 0 && r != -ENOENT)
822                 log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
823 
824         return 0;
825 }
826 
clone_auxiliary_file(const char * path,const char * new_name,const char * suffix)827 static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
828         _cleanup_free_ char *rs = NULL;
829         const char *fn;
830 
831         fn = strjoina(new_name, suffix);
832 
833         rs = file_in_same_dir(path, fn);
834         if (!rs)
835                 return -ENOMEM;
836 
837         return copy_file_atomic(path, rs, 0664, 0, 0, COPY_REFLINK);
838 }
839 
image_clone(Image * i,const char * new_name,bool read_only)840 int image_clone(Image *i, const char *new_name, bool read_only) {
841         _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
842         _cleanup_strv_free_ char **settings = NULL;
843         _cleanup_free_ char *roothash = NULL;
844         const char *new_path;
845         int r;
846 
847         assert(i);
848 
849         if (!image_name_is_valid(new_name))
850                 return -EINVAL;
851 
852         settings = image_settings_path(i);
853         if (!settings)
854                 return -ENOMEM;
855 
856         roothash = image_roothash_path(i);
857         if (!roothash)
858                 return -ENOMEM;
859 
860         /* Make sure nobody takes the new name, between the time we
861          * checked it is currently unused in all search paths, and the
862          * time we take possession of it */
863         r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
864         if (r < 0)
865                 return r;
866 
867         r = image_find(IMAGE_MACHINE, new_name, NULL, NULL);
868         if (r >= 0)
869                 return -EEXIST;
870         if (r != -ENOENT)
871                 return r;
872 
873         switch (i->type) {
874 
875         case IMAGE_SUBVOLUME:
876         case IMAGE_DIRECTORY:
877                 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
878                  * directory. */
879 
880                 new_path = strjoina("/var/lib/machines/", new_name);
881 
882                 r = btrfs_subvol_snapshot(i->path, new_path,
883                                           (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
884                                           BTRFS_SNAPSHOT_FALLBACK_COPY |
885                                           BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
886                                           BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
887                                           BTRFS_SNAPSHOT_RECURSIVE |
888                                           BTRFS_SNAPSHOT_QUOTA);
889                 if (r >= 0)
890                         /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
891                         (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
892 
893                 break;
894 
895         case IMAGE_RAW:
896                 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
897 
898                 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME);
899                 break;
900 
901         case IMAGE_BLOCK:
902         default:
903                 return -EOPNOTSUPP;
904         }
905 
906         if (r < 0)
907                 return r;
908 
909         STRV_FOREACH(j, settings) {
910                 r = clone_auxiliary_file(*j, new_name, ".nspawn");
911                 if (r < 0 && r != -ENOENT)
912                         log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
913         }
914 
915         r = clone_auxiliary_file(roothash, new_name, ".roothash");
916         if (r < 0 && r != -ENOENT)
917                 log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
918 
919         return 0;
920 }
921 
image_read_only(Image * i,bool b)922 int image_read_only(Image *i, bool b) {
923         _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
924         int r;
925 
926         assert(i);
927 
928         if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
929                 return -EROFS;
930 
931         /* Make sure we don't interfere with a running nspawn */
932         r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
933         if (r < 0)
934                 return r;
935 
936         switch (i->type) {
937 
938         case IMAGE_SUBVOLUME:
939 
940                 /* Note that we set the flag only on the top-level
941                  * subvolume of the image. */
942 
943                 r = btrfs_subvol_set_read_only(i->path, b);
944                 if (r < 0)
945                         return r;
946 
947                 break;
948 
949         case IMAGE_DIRECTORY:
950                 /* For simple directory trees we cannot use the access
951                    mode of the top-level directory, since it has an
952                    effect on the container itself.  However, we can
953                    use the "immutable" flag, to at least make the
954                    top-level directory read-only. It's not as good as
955                    a read-only subvolume, but at least something, and
956                    we can read the value back. */
957 
958                 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL, NULL);
959                 if (r < 0)
960                         return r;
961 
962                 break;
963 
964         case IMAGE_RAW: {
965                 struct stat st;
966 
967                 if (stat(i->path, &st) < 0)
968                         return -errno;
969 
970                 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
971                         return -errno;
972 
973                 /* If the images is now read-only, it's a good time to
974                  * defrag it, given that no write patterns will
975                  * fragment it again. */
976                 if (b)
977                         (void) btrfs_defrag(i->path);
978                 break;
979         }
980 
981         case IMAGE_BLOCK: {
982                 _cleanup_close_ int fd = -1;
983                 struct stat st;
984                 int state = b;
985 
986                 fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
987                 if (fd < 0)
988                         return -errno;
989 
990                 if (fstat(fd, &st) < 0)
991                         return -errno;
992                 if (!S_ISBLK(st.st_mode))
993                         return -ENOTTY;
994 
995                 if (ioctl(fd, BLKROSET, &state) < 0)
996                         return -errno;
997 
998                 break;
999         }
1000 
1001         default:
1002                 return -EOPNOTSUPP;
1003         }
1004 
1005         return 0;
1006 }
1007 
image_path_lock(const char * path,int operation,LockFile * global,LockFile * local)1008 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
1009         _cleanup_free_ char *p = NULL;
1010         LockFile t = LOCK_FILE_INIT;
1011         struct stat st;
1012         bool exclusive;
1013         int r;
1014 
1015         assert(path);
1016         assert(global);
1017         assert(local);
1018 
1019         /* Locks an image path. This actually creates two locks: one "local" one, next to the image path
1020          * itself, which might be shared via NFS. And another "global" one, in /run, that uses the
1021          * device/inode number. This has the benefit that we can even lock a tree that is a mount point,
1022          * correctly. */
1023 
1024         if (!path_is_absolute(path))
1025                 return -EINVAL;
1026 
1027         switch (operation & (LOCK_SH|LOCK_EX)) {
1028         case LOCK_SH:
1029                 exclusive = false;
1030                 break;
1031         case LOCK_EX:
1032                 exclusive = true;
1033                 break;
1034         default:
1035                 return -EINVAL;
1036         }
1037 
1038         if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
1039                 *local = *global = (LockFile) LOCK_FILE_INIT;
1040                 return 0;
1041         }
1042 
1043         /* Prohibit taking exclusive locks on the host image. We can't allow this, since we ourselves are
1044          * running off it after all, and we don't want any images to manipulate the host image. We make an
1045          * exception for shared locks however: we allow those (and make them NOPs since there's no point in
1046          * taking them if there can't be exclusive locks). Strictly speaking these are questionable as well,
1047          * since it means changes made to the host might propagate to the container as they happen (and a
1048          * shared lock kinda suggests that no changes happen at all while it is in place), but it's too
1049          * useful not to allow read-only containers off the host root, hence let's support this, and trust
1050          * the user to do the right thing with this. */
1051         if (path_equal(path, "/")) {
1052                 if (exclusive)
1053                         return -EBUSY;
1054 
1055                 *local = *global = (LockFile) LOCK_FILE_INIT;
1056                 return 0;
1057         }
1058 
1059         if (stat(path, &st) >= 0) {
1060                 if (S_ISBLK(st.st_mode))
1061                         r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
1062                 else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
1063                         r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
1064                 else
1065                         return -ENOTTY;
1066                 if (r < 0)
1067                         return -ENOMEM;
1068         }
1069 
1070         /* For block devices we don't need the "local" lock, as the major/minor lock above should be
1071          * sufficient, since block devices are host local anyway. */
1072         if (!path_startswith(path, "/dev/")) {
1073                 r = make_lock_file_for(path, operation, &t);
1074                 if (r < 0) {
1075                         if (!exclusive && r == -EROFS)
1076                                 log_debug_errno(r, "Failed to create shared lock for '%s', ignoring: %m", path);
1077                         else
1078                                 return r;
1079                 }
1080         }
1081 
1082         if (p) {
1083                 (void) mkdir_p("/run/systemd/nspawn/locks", 0700);
1084 
1085                 r = make_lock_file(p, operation, global);
1086                 if (r < 0) {
1087                         release_lock_file(&t);
1088                         return r;
1089                 }
1090         } else
1091                 *global = (LockFile) LOCK_FILE_INIT;
1092 
1093         *local = t;
1094         return 0;
1095 }
1096 
image_set_limit(Image * i,uint64_t referenced_max)1097 int image_set_limit(Image *i, uint64_t referenced_max) {
1098         assert(i);
1099 
1100         if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
1101                 return -EROFS;
1102 
1103         if (i->type != IMAGE_SUBVOLUME)
1104                 return -EOPNOTSUPP;
1105 
1106         /* We set the quota both for the subvolume as well as for the
1107          * subtree. The latter is mostly for historical reasons, since
1108          * we didn't use to have a concept of subtree quota, and hence
1109          * only modified the subvolume quota. */
1110 
1111         (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
1112         (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
1113         return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
1114 }
1115 
image_read_metadata(Image * i)1116 int image_read_metadata(Image *i) {
1117         _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
1118         int r;
1119 
1120         assert(i);
1121 
1122         r = image_path_lock(i->path, LOCK_SH|LOCK_NB, &global_lock, &local_lock);
1123         if (r < 0)
1124                 return r;
1125 
1126         switch (i->type) {
1127 
1128         case IMAGE_SUBVOLUME:
1129         case IMAGE_DIRECTORY: {
1130                 _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **extension_release = NULL;
1131                 sd_id128_t machine_id = SD_ID128_NULL;
1132                 _cleanup_free_ char *hostname = NULL;
1133                 _cleanup_free_ char *path = NULL;
1134 
1135                 r = chase_symlinks("/etc/hostname", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
1136                 if (r < 0 && r != -ENOENT)
1137                         log_debug_errno(r, "Failed to chase /etc/hostname in image %s: %m", i->name);
1138                 else if (r >= 0) {
1139                         r = read_etc_hostname(path, &hostname);
1140                         if (r < 0)
1141                                 log_debug_errno(errno, "Failed to read /etc/hostname of image %s: %m", i->name);
1142                 }
1143 
1144                 path = mfree(path);
1145 
1146                 r = chase_symlinks("/etc/machine-id", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
1147                 if (r < 0 && r != -ENOENT)
1148                         log_debug_errno(r, "Failed to chase /etc/machine-id in image %s: %m", i->name);
1149                 else if (r >= 0) {
1150                         _cleanup_close_ int fd = -1;
1151 
1152                         fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY);
1153                         if (fd < 0)
1154                                 log_debug_errno(errno, "Failed to open %s: %m", path);
1155                         else {
1156                                 r = id128_read_fd(fd, ID128_PLAIN, &machine_id);
1157                                 if (r < 0)
1158                                         log_debug_errno(r, "Image %s contains invalid machine ID.", i->name);
1159                         }
1160                 }
1161 
1162                 path = mfree(path);
1163 
1164                 r = chase_symlinks("/etc/machine-info", i->path, CHASE_PREFIX_ROOT|CHASE_TRAIL_SLASH, &path, NULL);
1165                 if (r < 0 && r != -ENOENT)
1166                         log_debug_errno(r, "Failed to chase /etc/machine-info in image %s: %m", i->name);
1167                 else if (r >= 0) {
1168                         r = load_env_file_pairs(NULL, path, &machine_info);
1169                         if (r < 0)
1170                                 log_debug_errno(r, "Failed to parse machine-info data of %s: %m", i->name);
1171                 }
1172 
1173                 r = load_os_release_pairs(i->path, &os_release);
1174                 if (r < 0)
1175                         log_debug_errno(r, "Failed to read os-release in image, ignoring: %m");
1176 
1177                 r = load_extension_release_pairs(i->path, i->name, &extension_release);
1178                 if (r < 0)
1179                         log_debug_errno(r, "Failed to read extension-release in image, ignoring: %m");
1180 
1181                 free_and_replace(i->hostname, hostname);
1182                 i->machine_id = machine_id;
1183                 strv_free_and_replace(i->machine_info, machine_info);
1184                 strv_free_and_replace(i->os_release, os_release);
1185                 strv_free_and_replace(i->extension_release, extension_release);
1186 
1187                 break;
1188         }
1189 
1190         case IMAGE_RAW:
1191         case IMAGE_BLOCK: {
1192                 _cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
1193                 _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
1194 
1195                 r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
1196                 if (r < 0)
1197                         return r;
1198 
1199                 /* Make sure udevd doesn't issue BLKRRPART in the background which might make our partitions
1200                  * disappear temporarily. */
1201                 r = loop_device_flock(d, LOCK_SH);
1202                 if (r < 0)
1203                         return r;
1204 
1205                 r = dissect_image(
1206                                 d->fd,
1207                                 NULL, NULL,
1208                                 d->diskseq,
1209                                 d->uevent_seqnum_not_before,
1210                                 d->timestamp_not_before,
1211                                 DISSECT_IMAGE_GENERIC_ROOT |
1212                                 DISSECT_IMAGE_REQUIRE_ROOT |
1213                                 DISSECT_IMAGE_RELAX_VAR_CHECK |
1214                                 DISSECT_IMAGE_READ_ONLY |
1215                                 DISSECT_IMAGE_USR_NO_ROOT,
1216                                 &m);
1217                 if (r < 0)
1218                         return r;
1219 
1220                 r = dissected_image_acquire_metadata(m,
1221                                                      DISSECT_IMAGE_VALIDATE_OS |
1222                                                      DISSECT_IMAGE_VALIDATE_OS_EXT);
1223                 if (r < 0)
1224                         return r;
1225 
1226                 free_and_replace(i->hostname, m->hostname);
1227                 i->machine_id = m->machine_id;
1228                 strv_free_and_replace(i->machine_info, m->machine_info);
1229                 strv_free_and_replace(i->os_release, m->os_release);
1230                 strv_free_and_replace(i->extension_release, m->extension_release);
1231 
1232                 break;
1233         }
1234 
1235         default:
1236                 return -EOPNOTSUPP;
1237         }
1238 
1239         i->metadata_valid = true;
1240 
1241         return 0;
1242 }
1243 
image_name_lock(const char * name,int operation,LockFile * ret)1244 int image_name_lock(const char *name, int operation, LockFile *ret) {
1245         const char *p;
1246 
1247         assert(name);
1248         assert(ret);
1249 
1250         /* Locks an image name, regardless of the precise path used. */
1251 
1252         if (streq(name, ".host"))
1253                 return -EBUSY;
1254 
1255         if (!image_name_is_valid(name))
1256                 return -EINVAL;
1257 
1258         if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
1259                 *ret = (LockFile) LOCK_FILE_INIT;
1260                 return 0;
1261         }
1262 
1263         (void) mkdir_p("/run/systemd/nspawn/locks", 0700);
1264 
1265         p = strjoina("/run/systemd/nspawn/locks/name-", name);
1266         return make_lock_file(p, operation, ret);
1267 }
1268 
image_in_search_path(ImageClass class,const char * root,const char * image)1269 bool image_in_search_path(
1270                 ImageClass class,
1271                 const char *root,
1272                 const char *image) {
1273 
1274         const char *path;
1275 
1276         assert(image);
1277 
1278         NULSTR_FOREACH(path, image_search_path[class]) {
1279                 const char *p, *q;
1280                 size_t k;
1281 
1282                 if (!empty_or_root(root)) {
1283                         q = path_startswith(path, root);
1284                         if (!q)
1285                                 continue;
1286                 } else
1287                         q = path;
1288 
1289                 p = path_startswith(q, path);
1290                 if (!p)
1291                         continue;
1292 
1293                 /* Make sure there's a filename following */
1294                 k = strcspn(p, "/");
1295                 if (k == 0)
1296                         continue;
1297 
1298                 p += k;
1299 
1300                 /* Accept trailing slashes */
1301                 if (p[strspn(p, "/")] == 0)
1302                         return true;
1303 
1304         }
1305 
1306         return false;
1307 }
1308 
1309 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
1310         [IMAGE_DIRECTORY] = "directory",
1311         [IMAGE_SUBVOLUME] = "subvolume",
1312         [IMAGE_RAW] = "raw",
1313         [IMAGE_BLOCK] = "block",
1314 };
1315 
1316 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);
1317