1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <pthread.h>
4 #include <unistd.h>
5 
6 #include "chattr-util.h"
7 #include "copy.h"
8 #include "errno-util.h"
9 #include "fd-util.h"
10 #include "format-util.h"
11 #include "journal-authenticate.h"
12 #include "managed-journal-file.h"
13 #include "path-util.h"
14 #include "random-util.h"
15 #include "set.h"
16 #include "stat-util.h"
17 #include "sync-util.h"
18 
19 #define PAYLOAD_BUFFER_SIZE (16U * 1024U)
20 #define MINIMUM_HOLE_SIZE (1U * 1024U * 1024U / 2U)
21 
managed_journal_file_truncate(JournalFile * f)22 static int managed_journal_file_truncate(JournalFile *f) {
23         uint64_t p;
24         int r;
25 
26         /* truncate excess from the end of archives */
27         r = journal_file_tail_end_by_pread(f, &p);
28         if (r < 0)
29                 return log_debug_errno(r, "Failed to determine end of tail object: %m");
30 
31         /* arena_size can't exceed the file size, ensure it's updated before truncating */
32         f->header->arena_size = htole64(p - le64toh(f->header->header_size));
33 
34         if (ftruncate(f->fd, p) < 0)
35                 return log_debug_errno(errno, "Failed to truncate %s: %m", f->path);
36 
37         return journal_file_fstat(f);
38 }
39 
managed_journal_file_entry_array_punch_hole(JournalFile * f,uint64_t p,uint64_t n_entries)40 static int managed_journal_file_entry_array_punch_hole(JournalFile *f, uint64_t p, uint64_t n_entries) {
41         Object o;
42         uint64_t offset, sz, n_items = 0, n_unused;
43         int r;
44 
45         if (n_entries == 0)
46                 return 0;
47 
48         for (uint64_t q = p; q != 0; q = le64toh(o.entry_array.next_entry_array_offset)) {
49                 r = journal_file_read_object_header(f, OBJECT_ENTRY_ARRAY, q, &o);
50                 if (r < 0)
51                         return r;
52 
53                 n_items += journal_file_entry_array_n_items(&o);
54                 p = q;
55         }
56 
57         if (p == 0)
58                 return 0;
59 
60         if (n_entries > n_items)
61                 return -EBADMSG;
62 
63         /* Amount of unused items in the final entry array. */
64         n_unused = n_items - n_entries;
65 
66         if (n_unused == 0)
67                 return 0;
68 
69         offset = p + offsetof(Object, entry_array.items) +
70                 (journal_file_entry_array_n_items(&o) - n_unused) * sizeof(le64_t);
71         sz = p + le64toh(o.object.size) - offset;
72 
73         if (sz < MINIMUM_HOLE_SIZE)
74                 return 0;
75 
76         if (p == le64toh(f->header->tail_object_offset) && !JOURNAL_HEADER_SEALED(f->header)) {
77                 ssize_t n;
78 
79                 o.object.size = htole64(offset - p);
80 
81                 n = pwrite(f->fd, &o, sizeof(EntryArrayObject), p);
82                 if (n < 0)
83                         return log_debug_errno(errno, "Failed to modify entry array object size: %m");
84                 if ((size_t) n != sizeof(EntryArrayObject))
85                         return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short pwrite() while modifying entry array object size.");
86 
87                 f->header->arena_size = htole64(ALIGN64(offset) - le64toh(f->header->header_size));
88 
89                 if (ftruncate(f->fd, ALIGN64(offset)) < 0)
90                         return log_debug_errno(errno, "Failed to truncate %s: %m", f->path);
91 
92                 return 0;
93         }
94 
95         if (fallocate(f->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, sz) < 0) {
96                 if (ERRNO_IS_NOT_SUPPORTED(errno)) {
97                         log_debug("Hole punching not supported by backing file system, skipping.");
98                         return -EOPNOTSUPP; /* Make recognizable */
99                 }
100 
101                 return log_debug_errno(errno, "Failed to punch hole in entry array of %s: %m", f->path);
102         }
103 
104         return 0;
105 }
106 
managed_journal_file_punch_holes(JournalFile * f)107 static int managed_journal_file_punch_holes(JournalFile *f) {
108         HashItem items[PAYLOAD_BUFFER_SIZE / sizeof(HashItem)];
109         uint64_t p, sz;
110         ssize_t n = SSIZE_MAX;
111         int r;
112 
113         r = managed_journal_file_entry_array_punch_hole(
114                 f, le64toh(f->header->entry_array_offset), le64toh(f->header->n_entries));
115         if (r < 0)
116                 return r;
117 
118         p = le64toh(f->header->data_hash_table_offset);
119         sz = le64toh(f->header->data_hash_table_size);
120 
121         for (uint64_t i = p; i < p + sz && n > 0; i += n) {
122                 size_t m = MIN(sizeof(items), p + sz - i);
123                 n = pread(f->fd, items, m, i);
124                 if (n < 0)
125                         return log_debug_errno(errno, "Failed to read hash table items: %m");
126 
127                 /* Let's ignore any partial hash items by rounding down to the nearest multiple of HashItem. */
128                 n -= n % sizeof(HashItem);
129 
130                 for (size_t j = 0; j < (size_t) n / sizeof(HashItem); j++) {
131                         Object o;
132 
133                         for (uint64_t q = le64toh(items[j].head_hash_offset); q != 0;
134                              q = le64toh(o.data.next_hash_offset)) {
135 
136                                 r = journal_file_read_object_header(f, OBJECT_DATA, q, &o);
137                                 if (r < 0) {
138                                         log_debug_errno(r, "Invalid data object: %m, ignoring");
139                                         break;
140                                 }
141 
142                                 if (le64toh(o.data.n_entries) == 0)
143                                         continue;
144 
145                                 r = managed_journal_file_entry_array_punch_hole(
146                                                 f, le64toh(o.data.entry_array_offset), le64toh(o.data.n_entries) - 1);
147                                 if (r == -EOPNOTSUPP)
148                                         return -EOPNOTSUPP;
149 
150                                 /* Ignore other errors */
151                         }
152                 }
153         }
154 
155         return 0;
156 }
157 
158 /* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().
159  * As a result we use atomic operations on f->offline_state for inter-thread communications with
160  * journal_file_set_offline() and journal_file_set_online(). */
managed_journal_file_set_offline_internal(ManagedJournalFile * f)161 static void managed_journal_file_set_offline_internal(ManagedJournalFile *f) {
162         int r;
163 
164         assert(f);
165         assert(f->file->fd >= 0);
166         assert(f->file->header);
167 
168         for (;;) {
169                 switch (f->file->offline_state) {
170                 case OFFLINE_CANCEL:
171                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_CANCEL, OFFLINE_DONE))
172                                 continue;
173                         return;
174 
175                 case OFFLINE_AGAIN_FROM_SYNCING:
176                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING))
177                                 continue;
178                         break;
179 
180                 case OFFLINE_AGAIN_FROM_OFFLINING:
181                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING))
182                                 continue;
183                         break;
184 
185                 case OFFLINE_SYNCING:
186                         if (f->file->archive) {
187                                 (void) managed_journal_file_truncate(f->file);
188                                 (void) managed_journal_file_punch_holes(f->file);
189                         }
190 
191                         (void) fsync(f->file->fd);
192 
193                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING))
194                                 continue;
195 
196                         f->file->header->state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE;
197                         (void) fsync(f->file->fd);
198 
199                         /* If we've archived the journal file, first try to re-enable COW on the file. If the
200                          * FS_NOCOW_FL flag was never set or we successfully removed it, continue. If we fail
201                          * to remove the flag on the archived file, rewrite the file without the NOCOW flag.
202                          * We need this fallback because on some filesystems (BTRFS), the NOCOW flag cannot
203                          * be removed after data has been written to a file. The only way to remove it is to
204                          * copy all data to a new file without the NOCOW flag set. */
205 
206                         if (f->file->archive) {
207                                 r = chattr_fd(f->file->fd, 0, FS_NOCOW_FL, NULL);
208                                 if (r >= 0)
209                                         continue;
210 
211                                 log_debug_errno(r, "Failed to re-enable copy-on-write for %s: %m, rewriting file", f->file->path);
212 
213                                 r = copy_file_atomic(FORMAT_PROC_FD_PATH(f->file->fd), f->file->path, f->file->mode,
214                                                      0,
215                                                      FS_NOCOW_FL,
216                                                      COPY_REPLACE | COPY_FSYNC | COPY_HOLES | COPY_ALL_XATTRS);
217                                 if (r < 0) {
218                                         log_debug_errno(r, "Failed to rewrite %s: %m", f->file->path);
219                                         continue;
220                                 }
221                         }
222 
223                         break;
224 
225                 case OFFLINE_OFFLINING:
226                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE))
227                                 continue;
228                         _fallthrough_;
229                 case OFFLINE_DONE:
230                         return;
231 
232                 case OFFLINE_JOINED:
233                         log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()");
234                         return;
235                 }
236         }
237 }
238 
managed_journal_file_set_offline_thread(void * arg)239 static void * managed_journal_file_set_offline_thread(void *arg) {
240         ManagedJournalFile *f = arg;
241 
242         (void) pthread_setname_np(pthread_self(), "journal-offline");
243 
244         managed_journal_file_set_offline_internal(f);
245 
246         return NULL;
247 }
248 
249 /* Trigger a restart if the offline thread is mid-flight in a restartable state. */
managed_journal_file_set_offline_try_restart(ManagedJournalFile * f)250 static bool managed_journal_file_set_offline_try_restart(ManagedJournalFile *f) {
251         for (;;) {
252                 switch (f->file->offline_state) {
253                 case OFFLINE_AGAIN_FROM_SYNCING:
254                 case OFFLINE_AGAIN_FROM_OFFLINING:
255                         return true;
256 
257                 case OFFLINE_CANCEL:
258                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING))
259                                 continue;
260                         return true;
261 
262                 case OFFLINE_SYNCING:
263                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING))
264                                 continue;
265                         return true;
266 
267                 case OFFLINE_OFFLINING:
268                         if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING))
269                                 continue;
270                         return true;
271 
272                 default:
273                         return false;
274                 }
275         }
276 }
277 
278 /* Sets a journal offline.
279  *
280  * If wait is false then an offline is dispatched in a separate thread for a
281  * subsequent journal_file_set_offline() or journal_file_set_online() of the
282  * same journal to synchronize with.
283  *
284  * If wait is true, then either an existing offline thread will be restarted
285  * and joined, or if none exists the offline is simply performed in this
286  * context without involving another thread.
287  */
managed_journal_file_set_offline(ManagedJournalFile * f,bool wait)288 int managed_journal_file_set_offline(ManagedJournalFile *f, bool wait) {
289         int target_state;
290         bool restarted;
291         int r;
292 
293         assert(f);
294 
295         if (!journal_file_writable(f->file))
296                 return -EPERM;
297 
298         if (f->file->fd < 0 || !f->file->header)
299                 return -EINVAL;
300 
301         target_state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE;
302 
303         /* An offlining journal is implicitly online and may modify f->header->state,
304          * we must also join any potentially lingering offline thread when already in
305          * the desired offline state.
306          */
307         if (!managed_journal_file_is_offlining(f) && f->file->header->state == target_state)
308                 return journal_file_set_offline_thread_join(f->file);
309 
310         /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */
311         restarted = managed_journal_file_set_offline_try_restart(f);
312         if ((restarted && wait) || !restarted) {
313                 r = journal_file_set_offline_thread_join(f->file);
314                 if (r < 0)
315                         return r;
316         }
317 
318         if (restarted)
319                 return 0;
320 
321         /* Initiate a new offline. */
322         f->file->offline_state = OFFLINE_SYNCING;
323 
324         if (wait) /* Without using a thread if waiting. */
325                 managed_journal_file_set_offline_internal(f);
326         else {
327                 sigset_t ss, saved_ss;
328                 int k;
329 
330                 assert_se(sigfillset(&ss) >= 0);
331                 /* Don't block SIGBUS since the offlining thread accesses a memory mapped file.
332                  * Asynchronous SIGBUS signals can safely be handled by either thread. */
333                 assert_se(sigdelset(&ss, SIGBUS) >= 0);
334 
335                 r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
336                 if (r > 0)
337                         return -r;
338 
339                 r = pthread_create(&f->file->offline_thread, NULL, managed_journal_file_set_offline_thread, f);
340 
341                 k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
342                 if (r > 0) {
343                         f->file->offline_state = OFFLINE_JOINED;
344                         return -r;
345                 }
346                 if (k > 0)
347                         return -k;
348         }
349 
350         return 0;
351 }
352 
managed_journal_file_is_offlining(ManagedJournalFile * f)353 bool managed_journal_file_is_offlining(ManagedJournalFile *f) {
354         assert(f);
355 
356         __sync_synchronize();
357 
358         if (IN_SET(f->file->offline_state, OFFLINE_DONE, OFFLINE_JOINED))
359                 return false;
360 
361         return true;
362 }
363 
managed_journal_file_close(ManagedJournalFile * f)364 ManagedJournalFile* managed_journal_file_close(ManagedJournalFile *f) {
365         if (!f)
366                 return NULL;
367 
368 #if HAVE_GCRYPT
369         /* Write the final tag */
370         if (JOURNAL_HEADER_SEALED(f->file->header) && journal_file_writable(f->file)) {
371                 int r;
372 
373                 r = journal_file_append_tag(f->file);
374                 if (r < 0)
375                         log_error_errno(r, "Failed to append tag when closing journal: %m");
376         }
377 #endif
378 
379         if (f->file->post_change_timer) {
380                 if (sd_event_source_get_enabled(f->file->post_change_timer, NULL) > 0)
381                         journal_file_post_change(f->file);
382 
383                 sd_event_source_disable_unref(f->file->post_change_timer);
384         }
385 
386         managed_journal_file_set_offline(f, true);
387 
388         journal_file_close(f->file);
389 
390         return mfree(f);
391 }
392 
managed_journal_file_open(int fd,const char * fname,int open_flags,JournalFileFlags file_flags,mode_t mode,uint64_t compress_threshold_bytes,JournalMetrics * metrics,MMapCache * mmap_cache,Set * deferred_closes,ManagedJournalFile * template,ManagedJournalFile ** ret)393 int managed_journal_file_open(
394                 int fd,
395                 const char *fname,
396                 int open_flags,
397                 JournalFileFlags file_flags,
398                 mode_t mode,
399                 uint64_t compress_threshold_bytes,
400                 JournalMetrics *metrics,
401                 MMapCache *mmap_cache,
402                 Set *deferred_closes,
403                 ManagedJournalFile *template,
404                 ManagedJournalFile **ret) {
405         _cleanup_free_ ManagedJournalFile *f = NULL;
406         int r;
407 
408         set_clear_with_destructor(deferred_closes, managed_journal_file_close);
409 
410         f = new0(ManagedJournalFile, 1);
411         if (!f)
412                 return -ENOMEM;
413 
414         r = journal_file_open(fd, fname, open_flags, file_flags, mode, compress_threshold_bytes, metrics,
415                               mmap_cache, template ? template->file : NULL, &f->file);
416         if (r < 0)
417                 return r;
418 
419         *ret = TAKE_PTR(f);
420 
421         return 0;
422 }
423 
424 
managed_journal_file_initiate_close(ManagedJournalFile * f,Set * deferred_closes)425 ManagedJournalFile* managed_journal_file_initiate_close(ManagedJournalFile *f, Set *deferred_closes) {
426         int r;
427 
428         assert(f);
429 
430         if (deferred_closes) {
431                 r = set_put(deferred_closes, f);
432                 if (r < 0)
433                         log_debug_errno(r, "Failed to add file to deferred close set, closing immediately.");
434                 else {
435                         (void) managed_journal_file_set_offline(f, false);
436                         return NULL;
437                 }
438         }
439 
440         return managed_journal_file_close(f);
441 }
442 
managed_journal_file_rotate(ManagedJournalFile ** f,MMapCache * mmap_cache,JournalFileFlags file_flags,uint64_t compress_threshold_bytes,Set * deferred_closes)443 int managed_journal_file_rotate(
444                 ManagedJournalFile **f,
445                 MMapCache *mmap_cache,
446                 JournalFileFlags file_flags,
447                 uint64_t compress_threshold_bytes,
448                 Set *deferred_closes) {
449 
450         _cleanup_free_ char *path = NULL;
451         ManagedJournalFile *new_file = NULL;
452         int r;
453 
454         assert(f);
455         assert(*f);
456 
457         r = journal_file_archive((*f)->file, &path);
458         if (r < 0)
459                 return r;
460 
461         r = managed_journal_file_open(
462                         -1,
463                         path,
464                         (*f)->file->open_flags,
465                         file_flags,
466                         (*f)->file->mode,
467                         compress_threshold_bytes,
468                         NULL,            /* metrics */
469                         mmap_cache,
470                         deferred_closes,
471                         *f,              /* template */
472                         &new_file);
473 
474         managed_journal_file_initiate_close(*f, deferred_closes);
475         *f = new_file;
476 
477         return r;
478 }
479 
managed_journal_file_open_reliably(const char * fname,int open_flags,JournalFileFlags file_flags,mode_t mode,uint64_t compress_threshold_bytes,JournalMetrics * metrics,MMapCache * mmap_cache,Set * deferred_closes,ManagedJournalFile * template,ManagedJournalFile ** ret)480 int managed_journal_file_open_reliably(
481                 const char *fname,
482                 int open_flags,
483                 JournalFileFlags file_flags,
484                 mode_t mode,
485                 uint64_t compress_threshold_bytes,
486                 JournalMetrics *metrics,
487                 MMapCache *mmap_cache,
488                 Set *deferred_closes,
489                 ManagedJournalFile *template,
490                 ManagedJournalFile **ret) {
491 
492         int r;
493 
494         r = managed_journal_file_open(-1, fname, open_flags, file_flags, mode, compress_threshold_bytes, metrics,
495                                mmap_cache, deferred_closes, template, ret);
496         if (!IN_SET(r,
497                     -EBADMSG,           /* Corrupted */
498                     -ENODATA,           /* Truncated */
499                     -EHOSTDOWN,         /* Other machine */
500                     -EPROTONOSUPPORT,   /* Incompatible feature */
501                     -EBUSY,             /* Unclean shutdown */
502                     -ESHUTDOWN,         /* Already archived */
503                     -EIO,               /* IO error, including SIGBUS on mmap */
504                     -EIDRM,             /* File has been deleted */
505                     -ETXTBSY))          /* File is from the future */
506                 return r;
507 
508         if ((open_flags & O_ACCMODE) == O_RDONLY)
509                 return r;
510 
511         if (!(open_flags & O_CREAT))
512                 return r;
513 
514         if (!endswith(fname, ".journal"))
515                 return r;
516 
517         /* The file is corrupted. Rotate it away and try it again (but only once) */
518         log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
519 
520         r = journal_file_dispose(AT_FDCWD, fname);
521         if (r < 0)
522                 return r;
523 
524         return managed_journal_file_open(-1, fname, open_flags, file_flags, mode, compress_threshold_bytes, metrics,
525                                   mmap_cache, deferred_closes, template, ret);
526 }
527