1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <fcntl.h>
4 #include <linux/loop.h>
5 #include <pthread.h>
6 
7 #include "alloc-util.h"
8 #include "dissect-image.h"
9 #include "fd-util.h"
10 #include "fileio.h"
11 #include "fs-util.h"
12 #include "gpt.h"
13 #include "main-func.h"
14 #include "missing_loop.h"
15 #include "mkfs-util.h"
16 #include "mount-util.h"
17 #include "namespace-util.h"
18 #include "parse-util.h"
19 #include "string-util.h"
20 #include "strv.h"
21 #include "tests.h"
22 #include "tmpfile-util.h"
23 #include "user-util.h"
24 #include "virt.h"
25 
26 static unsigned arg_n_threads = 5;
27 static unsigned arg_n_iterations = 3;
28 static usec_t arg_timeout = 0;
29 
30 #if HAVE_BLKID
31 static usec_t end = 0;
32 
thread_func(void * ptr)33 static void* thread_func(void *ptr) {
34         int fd = PTR_TO_FD(ptr);
35         int r;
36 
37         for (unsigned i = 0; i < arg_n_iterations; i++) {
38                 _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
39                 _cleanup_(umount_and_rmdir_and_freep) char *mounted = NULL;
40                 _cleanup_(dissected_image_unrefp) DissectedImage *dissected = NULL;
41 
42                 if (now(CLOCK_MONOTONIC) >= end) {
43                         log_notice("Time's up, exiting thread's loop");
44                         break;
45                 }
46 
47                 log_notice("> Thread iteration #%u.", i);
48 
49                 assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
50 
51                 r = loop_device_make(fd, O_RDONLY, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, &loop);
52                 if (r < 0)
53                         log_error_errno(r, "Failed to allocate loopback device: %m");
54                 assert_se(r >= 0);
55 
56                 log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted);
57 
58                 /* Let's make sure udev doesn't call BLKRRPART in the background, while we try to mount the device. */
59                 assert_se(loop_device_flock(loop, LOCK_SH) >= 0);
60 
61                 r = dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, DISSECT_IMAGE_READ_ONLY, &dissected);
62                 if (r < 0)
63                         log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node);
64                 assert_se(r >= 0);
65 
66                 log_info("Dissected loop device %s", loop->node);
67 
68                 for (PartitionDesignator d = 0; d < _PARTITION_DESIGNATOR_MAX; d++) {
69                         if (!dissected->partitions[d].found)
70                                 continue;
71 
72                         log_notice("Found node %s fstype %s designator %s",
73                                    dissected->partitions[d].node,
74                                    dissected->partitions[d].fstype,
75                                    partition_designator_to_string(d));
76                 }
77 
78                 assert_se(dissected->partitions[PARTITION_ESP].found);
79                 assert_se(dissected->partitions[PARTITION_ESP].node);
80                 assert_se(dissected->partitions[PARTITION_XBOOTLDR].found);
81                 assert_se(dissected->partitions[PARTITION_XBOOTLDR].node);
82                 assert_se(dissected->partitions[PARTITION_ROOT].found);
83                 assert_se(dissected->partitions[PARTITION_ROOT].node);
84                 assert_se(dissected->partitions[PARTITION_HOME].found);
85                 assert_se(dissected->partitions[PARTITION_HOME].node);
86 
87                 r = dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
88                 log_notice_errno(r, "Mounted %s → %s: %m", loop->node, mounted);
89                 assert_se(r >= 0);
90 
91                 /* Now the block device is mounted, we don't need no manual lock anymore, the devices are now
92                  * pinned by the mounts. */
93                 assert_se(loop_device_flock(loop, LOCK_UN) >= 0);
94 
95                 log_notice("Unmounting %s", mounted);
96                 mounted = umount_and_rmdir_and_free(mounted);
97 
98                 log_notice("Unmounted.");
99 
100                 dissected = dissected_image_unref(dissected);
101 
102                 log_notice("Detaching loop device %s", loop->node);
103                 loop = loop_device_unref(loop);
104                 log_notice("Detached loop device.");
105         }
106 
107         log_notice("Leaving thread");
108 
109         return NULL;
110 }
111 #endif
112 
have_root_gpt_type(void)113 static bool have_root_gpt_type(void) {
114 #ifdef GPT_ROOT_NATIVE
115         return true;
116 #else
117         return false;
118 #endif
119 }
120 
run(int argc,char * argv[])121 static int run(int argc, char *argv[]) {
122         _cleanup_free_ char *p = NULL, *cmd = NULL;
123         _cleanup_(pclosep) FILE *sfdisk = NULL;
124         _cleanup_(loop_device_unrefp) LoopDevice *loop = NULL;
125         _cleanup_close_ int fd = -1;
126         int r;
127 
128         test_setup_logging(LOG_DEBUG);
129         log_show_tid(true);
130         log_show_time(true);
131         log_show_color(true);
132 
133         if (argc >= 2) {
134                 r = safe_atou(argv[1], &arg_n_threads);
135                 if (r < 0)
136                         return log_error_errno(r, "Failed to parse first argument (number of threads): %s", argv[1]);
137                 if (arg_n_threads <= 0)
138                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Number of threads must be at least 1, refusing.");
139         }
140 
141         if (argc >= 3) {
142                 r = safe_atou(argv[2], &arg_n_iterations);
143                 if (r < 0)
144                         return log_error_errno(r, "Failed to parse second argument (number of iterations): %s", argv[2]);
145                 if (arg_n_iterations <= 0)
146                         return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Number of iterations must be at least 1, refusing.");
147         }
148 
149         if (argc >= 4) {
150                 r = parse_sec(argv[3], &arg_timeout);
151                 if (r < 0)
152                         return log_error_errno(r, "Failed to parse third argument (timeout): %s", argv[3]);
153         }
154 
155         if (argc >= 5)
156                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Too many arguments (expected 3 at max).");
157 
158         if (!have_root_gpt_type()) {
159                 log_tests_skipped("No root partition GPT defined for this architecture, exiting.");
160                 return EXIT_TEST_SKIP;
161         }
162 
163         if (detect_container() > 0) {
164                 log_tests_skipped("Test not supported in a container, requires udev/uevent notifications.");
165                 return EXIT_TEST_SKIP;
166         }
167 
168         /* This is a test for the loopback block device setup code and it's use by the image dissection
169          * logic: since the kernel APIs are hard use and prone to races, let's test this in a heavy duty
170          * test: we open a bunch of threads and repeatedly allocate and deallocate loopback block devices in
171          * them in parallel, with an image file with a number of partitions. */
172 
173         r = detach_mount_namespace();
174         if (ERRNO_IS_PRIVILEGE(r)) {
175                 log_tests_skipped("Lacking privileges");
176                 return EXIT_TEST_SKIP;
177         }
178 
179         FOREACH_STRING(fs, "vfat", "ext4") {
180                 r = mkfs_exists(fs);
181                 assert_se(r >= 0);
182                 if (!r) {
183                         log_tests_skipped("mkfs.{vfat|ext4} not installed");
184                         return EXIT_TEST_SKIP;
185                 }
186         }
187 
188         assert_se(r >= 0);
189 
190         assert_se(tempfn_random_child("/var/tmp", "sfdisk", &p) >= 0);
191         fd = open(p, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC|O_NOFOLLOW, 0666);
192         assert_se(fd >= 0);
193         assert_se(ftruncate(fd, 256*1024*1024) >= 0);
194 
195         assert_se(cmd = strjoin("sfdisk ", p));
196         assert_se(sfdisk = popen(cmd, "we"));
197 
198         /* A reasonably complex partition table that fits on a 64K disk */
199         fputs("label: gpt\n"
200               "size=32M, type=C12A7328-F81F-11D2-BA4B-00A0C93EC93B\n"
201               "size=32M, type=BC13C2FF-59E6-4262-A352-B275FD6F7172\n"
202               "size=32M, type=0657FD6D-A4AB-43C4-84E5-0933C84B4F4F\n"
203               "size=32M, type=", sfdisk);
204 
205 #ifdef GPT_ROOT_NATIVE
206         fprintf(sfdisk, SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(GPT_ROOT_NATIVE));
207 #else
208         fprintf(sfdisk, SD_ID128_UUID_FORMAT_STR, SD_ID128_FORMAT_VAL(GPT_ROOT_X86_64));
209 #endif
210 
211         fputs("\n"
212               "size=32M, type=933AC7E1-2EB4-4F13-B844-0E14E2AEF915\n", sfdisk);
213 
214         assert_se(pclose(sfdisk) == 0);
215         sfdisk = NULL;
216 
217         assert_se(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, &loop) >= 0);
218 
219 #if HAVE_BLKID
220         _cleanup_(dissected_image_unrefp) DissectedImage *dissected = NULL;
221         _cleanup_(umount_and_rmdir_and_freep) char *mounted = NULL;
222         pthread_t threads[arg_n_threads];
223         sd_id128_t id;
224 
225         /* Take an explicit lock while we format the file systems, in accordance with
226          * https://systemd.io/BLOCK_DEVICE_LOCKING/. We don't want udev to interfere and probe while we write
227          * or even issue BLKRRPART or similar while we are working on this. */
228         assert_se(loop_device_flock(loop, LOCK_EX) >= 0);
229 
230         assert_se(dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, 0, &dissected) >= 0);
231 
232         assert_se(dissected->partitions[PARTITION_ESP].found);
233         assert_se(dissected->partitions[PARTITION_ESP].node);
234         assert_se(dissected->partitions[PARTITION_XBOOTLDR].found);
235         assert_se(dissected->partitions[PARTITION_XBOOTLDR].node);
236         assert_se(dissected->partitions[PARTITION_ROOT].found);
237         assert_se(dissected->partitions[PARTITION_ROOT].node);
238         assert_se(dissected->partitions[PARTITION_HOME].found);
239         assert_se(dissected->partitions[PARTITION_HOME].node);
240 
241         assert_se(sd_id128_randomize(&id) >= 0);
242         assert_se(make_filesystem(dissected->partitions[PARTITION_ESP].node, "vfat", "EFI", id, true) >= 0);
243 
244         assert_se(sd_id128_randomize(&id) >= 0);
245         assert_se(make_filesystem(dissected->partitions[PARTITION_XBOOTLDR].node, "vfat", "xbootldr", id, true) >= 0);
246 
247         assert_se(sd_id128_randomize(&id) >= 0);
248         assert_se(make_filesystem(dissected->partitions[PARTITION_ROOT].node, "ext4", "root", id, true) >= 0);
249 
250         assert_se(sd_id128_randomize(&id) >= 0);
251         assert_se(make_filesystem(dissected->partitions[PARTITION_HOME].node, "ext4", "home", id, true) >= 0);
252 
253         dissected = dissected_image_unref(dissected);
254         assert_se(dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, 0, &dissected) >= 0);
255 
256         assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
257 
258         /* We are particularly correct here, and now downgrade LOCK → LOCK_SH. That's because we are done
259          * with formatting the file systems, so we don't need the exclusive lock anymore. From now on a
260          * shared one is fine. This way udev can now probe the device if it wants, but still won't call
261          * BLKRRPART on it, and that's good, because that would destroy our partition table while we are at
262          * it. */
263         assert_se(loop_device_flock(loop, LOCK_SH) >= 0);
264 
265         /* This first (writable) mount will initialize the mount point dirs, so that the subsequent read-only ones can work */
266         assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, 0) >= 0);
267 
268         /* Now we mounted everything, the partitions are pinned. Now it's fine to release the lock
269          * fully. This means udev could now issue BLKRRPART again, but that's OK given this will fail because
270          * we now mounted the device. */
271         assert_se(loop_device_flock(loop, LOCK_UN) >= 0);
272 
273         assert_se(umount_recursive(mounted, 0) >= 0);
274         loop = loop_device_unref(loop);
275 
276         log_notice("Threads are being started now");
277 
278         /* zero timeout means pick default: let's make sure we run for 10s on slow systems at max */
279         if (arg_timeout == 0)
280                 arg_timeout = slow_tests_enabled() ? 5 * USEC_PER_SEC : 1 * USEC_PER_SEC;
281 
282         end = usec_add(now(CLOCK_MONOTONIC), arg_timeout);
283 
284         if (arg_n_threads > 1)
285                 for (unsigned i = 0; i < arg_n_threads; i++)
286                         assert_se(pthread_create(threads + i, NULL, thread_func, FD_TO_PTR(fd)) == 0);
287 
288         log_notice("All threads started now.");
289 
290         if (arg_n_threads == 1)
291                 assert_se(thread_func(FD_TO_PTR(fd)) == NULL);
292         else
293                 for (unsigned i = 0; i < arg_n_threads; i++) {
294                         log_notice("Joining thread #%u.", i);
295 
296                         void *k;
297                         assert_se(pthread_join(threads[i], &k) == 0);
298                         assert_se(k == NULL);
299 
300                         log_notice("Joined thread #%u.", i);
301                 }
302 
303         log_notice("Threads are all terminated now.");
304 #else
305         log_notice("Cutting test short, since we do not have libblkid.");
306 #endif
307 
308         return 0;
309 }
310 
311 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);
312