1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <errno.h>
4 #include <stdlib.h>
5 #include <sys/mman.h>
6 
7 #include "alloc-util.h"
8 #include "errno-util.h"
9 #include "fd-util.h"
10 #include "hashmap.h"
11 #include "list.h"
12 #include "log.h"
13 #include "macro.h"
14 #include "memory-util.h"
15 #include "mmap-cache.h"
16 #include "sigbus.h"
17 
18 typedef struct Window Window;
19 typedef struct Context Context;
20 
21 struct Window {
22         MMapCache *cache;
23 
24         bool invalidated:1;
25         bool keep_always:1;
26         bool in_unused:1;
27 
28         void *ptr;
29         uint64_t offset;
30         size_t size;
31 
32         MMapFileDescriptor *fd;
33 
34         LIST_FIELDS(Window, by_fd);
35         LIST_FIELDS(Window, unused);
36 
37         LIST_HEAD(Context, contexts);
38 };
39 
40 struct Context {
41         Window *window;
42 
43         LIST_FIELDS(Context, by_window);
44 };
45 
46 struct MMapFileDescriptor {
47         MMapCache *cache;
48         int fd;
49         int prot;
50         bool sigbus;
51         LIST_HEAD(Window, windows);
52 };
53 
54 struct MMapCache {
55         unsigned n_ref;
56         unsigned n_windows;
57 
58         unsigned n_context_cache_hit, n_window_list_hit, n_missed;
59 
60         Hashmap *fds;
61 
62         LIST_HEAD(Window, unused);
63         Window *last_unused;
64 
65         Context contexts[MMAP_CACHE_MAX_CONTEXTS];
66 };
67 
68 #define WINDOWS_MIN 64
69 
70 #if ENABLE_DEBUG_MMAP_CACHE
71 /* Tiny windows increase mmap activity and the chance of exposing unsafe use. */
72 # define WINDOW_SIZE (page_size())
73 #else
74 # define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
75 #endif
76 
mmap_cache_new(void)77 MMapCache* mmap_cache_new(void) {
78         MMapCache *m;
79 
80         m = new0(MMapCache, 1);
81         if (!m)
82                 return NULL;
83 
84         m->n_ref = 1;
85         return m;
86 }
87 
window_unlink(Window * w)88 static void window_unlink(Window *w) {
89 
90         assert(w);
91 
92         if (w->ptr)
93                 munmap(w->ptr, w->size);
94 
95         if (w->fd)
96                 LIST_REMOVE(by_fd, w->fd->windows, w);
97 
98         if (w->in_unused) {
99                 if (w->cache->last_unused == w)
100                         w->cache->last_unused = w->unused_prev;
101 
102                 LIST_REMOVE(unused, w->cache->unused, w);
103         }
104 
105         LIST_FOREACH(by_window, c, w->contexts) {
106                 assert(c->window == w);
107                 c->window = NULL;
108         }
109 }
110 
window_invalidate(Window * w)111 static void window_invalidate(Window *w) {
112         assert(w);
113         assert(w->fd);
114 
115         if (w->invalidated)
116                 return;
117 
118         /* Replace the window with anonymous pages. This is useful
119          * when we hit a SIGBUS and want to make sure the file cannot
120          * trigger any further SIGBUS, possibly overrunning the sigbus
121          * queue. */
122 
123         assert_se(mmap(w->ptr, w->size, w->fd->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr);
124         w->invalidated = true;
125 }
126 
window_free(Window * w)127 static void window_free(Window *w) {
128         assert(w);
129 
130         window_unlink(w);
131         w->cache->n_windows--;
132         free(w);
133 }
134 
window_matches(Window * w,uint64_t offset,size_t size)135 _pure_ static bool window_matches(Window *w, uint64_t offset, size_t size) {
136         assert(w);
137         assert(size > 0);
138 
139         return
140                 offset >= w->offset &&
141                 offset + size <= w->offset + w->size;
142 }
143 
window_matches_fd(Window * w,MMapFileDescriptor * f,uint64_t offset,size_t size)144 _pure_ static bool window_matches_fd(Window *w, MMapFileDescriptor *f, uint64_t offset, size_t size) {
145         assert(w);
146         assert(f);
147 
148         return
149                 w->fd == f &&
150                 window_matches(w, offset, size);
151 }
152 
window_add(MMapCache * m,MMapFileDescriptor * f,bool keep_always,uint64_t offset,size_t size,void * ptr)153 static Window *window_add(MMapCache *m, MMapFileDescriptor *f, bool keep_always, uint64_t offset, size_t size, void *ptr) {
154         Window *w;
155 
156         assert(m);
157         assert(f);
158 
159         if (!m->last_unused || m->n_windows <= WINDOWS_MIN) {
160 
161                 /* Allocate a new window */
162                 w = new(Window, 1);
163                 if (!w)
164                         return NULL;
165                 m->n_windows++;
166         } else {
167 
168                 /* Reuse an existing one */
169                 w = m->last_unused;
170                 window_unlink(w);
171         }
172 
173         *w = (Window) {
174                 .cache = m,
175                 .fd = f,
176                 .keep_always = keep_always,
177                 .offset = offset,
178                 .size = size,
179                 .ptr = ptr,
180         };
181 
182         LIST_PREPEND(by_fd, f->windows, w);
183 
184         return w;
185 }
186 
context_detach_window(MMapCache * m,Context * c)187 static void context_detach_window(MMapCache *m, Context *c) {
188         Window *w;
189 
190         assert(m);
191         assert(c);
192 
193         if (!c->window)
194                 return;
195 
196         w = TAKE_PTR(c->window);
197         LIST_REMOVE(by_window, w->contexts, c);
198 
199         if (!w->contexts && !w->keep_always) {
200                 /* Not used anymore? */
201 #if ENABLE_DEBUG_MMAP_CACHE
202                 /* Unmap unused windows immediately to expose use-after-unmap
203                  * by SIGSEGV. */
204                 window_free(w);
205 #else
206                 LIST_PREPEND(unused, m->unused, w);
207                 if (!m->last_unused)
208                         m->last_unused = w;
209 
210                 w->in_unused = true;
211 #endif
212         }
213 }
214 
context_attach_window(MMapCache * m,Context * c,Window * w)215 static void context_attach_window(MMapCache *m, Context *c, Window *w) {
216         assert(m);
217         assert(c);
218         assert(w);
219 
220         if (c->window == w)
221                 return;
222 
223         context_detach_window(m, c);
224 
225         if (w->in_unused) {
226                 /* Used again? */
227                 if (m->last_unused == w)
228                         m->last_unused = w->unused_prev;
229                 LIST_REMOVE(unused, m->unused, w);
230 
231                 w->in_unused = false;
232         }
233 
234         c->window = w;
235         LIST_PREPEND(by_window, w->contexts, c);
236 }
237 
mmap_cache_free(MMapCache * m)238 static MMapCache *mmap_cache_free(MMapCache *m) {
239         assert(m);
240 
241         for (int i = 0; i < MMAP_CACHE_MAX_CONTEXTS; i++)
242                 context_detach_window(m, &m->contexts[i]);
243 
244         hashmap_free(m->fds);
245 
246         while (m->unused)
247                 window_free(m->unused);
248 
249         return mfree(m);
250 }
251 
252 DEFINE_TRIVIAL_REF_UNREF_FUNC(MMapCache, mmap_cache, mmap_cache_free);
253 
make_room(MMapCache * m)254 static int make_room(MMapCache *m) {
255         assert(m);
256 
257         if (!m->last_unused)
258                 return 0;
259 
260         window_free(m->last_unused);
261         return 1;
262 }
263 
try_context(MMapFileDescriptor * f,Context * c,bool keep_always,uint64_t offset,size_t size,void ** ret)264 static int try_context(
265                 MMapFileDescriptor *f,
266                 Context *c,
267                 bool keep_always,
268                 uint64_t offset,
269                 size_t size,
270                 void **ret) {
271 
272         assert(f);
273         assert(f->cache);
274         assert(f->cache->n_ref > 0);
275         assert(c);
276         assert(size > 0);
277         assert(ret);
278 
279         if (!c->window)
280                 return 0;
281 
282         if (!window_matches_fd(c->window, f, offset, size)) {
283 
284                 /* Drop the reference to the window, since it's unnecessary now */
285                 context_detach_window(f->cache, c);
286                 return 0;
287         }
288 
289         if (c->window->fd->sigbus)
290                 return -EIO;
291 
292         c->window->keep_always = c->window->keep_always || keep_always;
293 
294         *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset);
295         f->cache->n_context_cache_hit++;
296 
297         return 1;
298 }
299 
find_mmap(MMapFileDescriptor * f,Context * c,bool keep_always,uint64_t offset,size_t size,void ** ret)300 static int find_mmap(
301                 MMapFileDescriptor *f,
302                 Context *c,
303                 bool keep_always,
304                 uint64_t offset,
305                 size_t size,
306                 void **ret) {
307 
308         Window *found = NULL;
309 
310         assert(f);
311         assert(f->cache);
312         assert(f->cache->n_ref > 0);
313         assert(c);
314         assert(size > 0);
315 
316         if (f->sigbus)
317                 return -EIO;
318 
319         LIST_FOREACH(by_fd, w, f->windows)
320                 if (window_matches(w, offset, size)) {
321                         found = w;
322                         break;
323                 }
324 
325         if (!found)
326                 return 0;
327 
328         context_attach_window(f->cache, c, found);
329         found->keep_always = found->keep_always || keep_always;
330 
331         *ret = (uint8_t*) found->ptr + (offset - found->offset);
332         f->cache->n_window_list_hit++;
333 
334         return 1;
335 }
336 
mmap_try_harder(MMapFileDescriptor * f,void * addr,int flags,uint64_t offset,size_t size,void ** res)337 static int mmap_try_harder(MMapFileDescriptor *f, void *addr, int flags, uint64_t offset, size_t size, void **res) {
338         void *ptr;
339 
340         assert(f);
341         assert(res);
342 
343         for (;;) {
344                 int r;
345 
346                 ptr = mmap(addr, size, f->prot, flags, f->fd, offset);
347                 if (ptr != MAP_FAILED)
348                         break;
349                 if (errno != ENOMEM)
350                         return negative_errno();
351 
352                 r = make_room(f->cache);
353                 if (r < 0)
354                         return r;
355                 if (r == 0)
356                         return -ENOMEM;
357         }
358 
359         *res = ptr;
360         return 0;
361 }
362 
add_mmap(MMapFileDescriptor * f,Context * c,bool keep_always,uint64_t offset,size_t size,struct stat * st,void ** ret)363 static int add_mmap(
364                 MMapFileDescriptor *f,
365                 Context *c,
366                 bool keep_always,
367                 uint64_t offset,
368                 size_t size,
369                 struct stat *st,
370                 void **ret) {
371 
372         uint64_t woffset, wsize;
373         Window *w;
374         void *d;
375         int r;
376 
377         assert(f);
378         assert(f->cache);
379         assert(f->cache->n_ref > 0);
380         assert(c);
381         assert(size > 0);
382         assert(ret);
383 
384         woffset = offset & ~((uint64_t) page_size() - 1ULL);
385         wsize = size + (offset - woffset);
386         wsize = PAGE_ALIGN(wsize);
387 
388         if (wsize < WINDOW_SIZE) {
389                 uint64_t delta;
390 
391                 delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
392 
393                 if (delta > offset)
394                         woffset = 0;
395                 else
396                         woffset -= delta;
397 
398                 wsize = WINDOW_SIZE;
399         }
400 
401         if (st) {
402                 /* Memory maps that are larger then the files
403                    underneath have undefined behavior. Hence, clamp
404                    things to the file size if we know it */
405 
406                 if (woffset >= (uint64_t) st->st_size)
407                         return -EADDRNOTAVAIL;
408 
409                 if (woffset + wsize > (uint64_t) st->st_size)
410                         wsize = PAGE_ALIGN(st->st_size - woffset);
411         }
412 
413         r = mmap_try_harder(f, NULL, MAP_SHARED, woffset, wsize, &d);
414         if (r < 0)
415                 return r;
416 
417         w = window_add(f->cache, f, keep_always, woffset, wsize, d);
418         if (!w)
419                 goto outofmem;
420 
421         context_attach_window(f->cache, c, w);
422 
423         *ret = (uint8_t*) w->ptr + (offset - w->offset);
424 
425         return 1;
426 
427 outofmem:
428         (void) munmap(d, wsize);
429         return -ENOMEM;
430 }
431 
mmap_cache_fd_get(MMapFileDescriptor * f,unsigned context,bool keep_always,uint64_t offset,size_t size,struct stat * st,void ** ret)432 int mmap_cache_fd_get(
433                 MMapFileDescriptor *f,
434                 unsigned context,
435                 bool keep_always,
436                 uint64_t offset,
437                 size_t size,
438                 struct stat *st,
439                 void **ret) {
440 
441         Context *c;
442         int r;
443 
444         assert(f);
445         assert(f->cache);
446         assert(f->cache->n_ref > 0);
447         assert(size > 0);
448         assert(ret);
449         assert(context < MMAP_CACHE_MAX_CONTEXTS);
450 
451         c = &f->cache->contexts[context];
452 
453         /* Check whether the current context is the right one already */
454         r = try_context(f, c, keep_always, offset, size, ret);
455         if (r != 0)
456                 return r;
457 
458         /* Search for a matching mmap */
459         r = find_mmap(f, c, keep_always, offset, size, ret);
460         if (r != 0)
461                 return r;
462 
463         f->cache->n_missed++;
464 
465         /* Create a new mmap */
466         return add_mmap(f, c, keep_always, offset, size, st, ret);
467 }
468 
mmap_cache_stats_log_debug(MMapCache * m)469 void mmap_cache_stats_log_debug(MMapCache *m) {
470         assert(m);
471 
472         log_debug("mmap cache statistics: %u context cache hit, %u window list hit, %u miss", m->n_context_cache_hit, m->n_window_list_hit, m->n_missed);
473 }
474 
mmap_cache_process_sigbus(MMapCache * m)475 static void mmap_cache_process_sigbus(MMapCache *m) {
476         bool found = false;
477         MMapFileDescriptor *f;
478         int r;
479 
480         assert(m);
481 
482         /* Iterate through all triggered pages and mark their files as
483          * invalidated */
484         for (;;) {
485                 bool ours;
486                 void *addr;
487 
488                 r = sigbus_pop(&addr);
489                 if (_likely_(r == 0))
490                         break;
491                 if (r < 0) {
492                         log_error_errno(r, "SIGBUS handling failed: %m");
493                         abort();
494                 }
495 
496                 ours = false;
497                 HASHMAP_FOREACH(f, m->fds) {
498                         LIST_FOREACH(by_fd, w, f->windows) {
499                                 if ((uint8_t*) addr >= (uint8_t*) w->ptr &&
500                                     (uint8_t*) addr < (uint8_t*) w->ptr + w->size) {
501                                         found = ours = f->sigbus = true;
502                                         break;
503                                 }
504                         }
505 
506                         if (ours)
507                                 break;
508                 }
509 
510                 /* Didn't find a matching window, give up */
511                 if (!ours) {
512                         log_error("Unknown SIGBUS page, aborting.");
513                         abort();
514                 }
515         }
516 
517         /* The list of triggered pages is now empty. Now, let's remap
518          * all windows of the triggered file to anonymous maps, so
519          * that no page of the file in question is triggered again, so
520          * that we can be sure not to hit the queue size limit. */
521         if (_likely_(!found))
522                 return;
523 
524         HASHMAP_FOREACH(f, m->fds) {
525                 if (!f->sigbus)
526                         continue;
527 
528                 LIST_FOREACH(by_fd, w, f->windows)
529                         window_invalidate(w);
530         }
531 }
532 
mmap_cache_fd_got_sigbus(MMapFileDescriptor * f)533 bool mmap_cache_fd_got_sigbus(MMapFileDescriptor *f) {
534         assert(f);
535 
536         mmap_cache_process_sigbus(f->cache);
537 
538         return f->sigbus;
539 }
540 
mmap_cache_add_fd(MMapCache * m,int fd,int prot)541 MMapFileDescriptor* mmap_cache_add_fd(MMapCache *m, int fd, int prot) {
542         MMapFileDescriptor *f;
543         int r;
544 
545         assert(m);
546         assert(fd >= 0);
547 
548         f = hashmap_get(m->fds, FD_TO_PTR(fd));
549         if (f)
550                 return f;
551 
552         r = hashmap_ensure_allocated(&m->fds, NULL);
553         if (r < 0)
554                 return NULL;
555 
556         f = new0(MMapFileDescriptor, 1);
557         if (!f)
558                 return NULL;
559 
560         r = hashmap_put(m->fds, FD_TO_PTR(fd), f);
561         if (r < 0)
562                 return mfree(f);
563 
564         f->cache = mmap_cache_ref(m);
565         f->fd = fd;
566         f->prot = prot;
567 
568         return f;
569 }
570 
mmap_cache_fd_free(MMapFileDescriptor * f)571 void mmap_cache_fd_free(MMapFileDescriptor *f) {
572         assert(f);
573         assert(f->cache);
574 
575         /* Make sure that any queued SIGBUS are first dispatched, so
576          * that we don't end up with a SIGBUS entry we cannot relate
577          * to any existing memory map */
578 
579         mmap_cache_process_sigbus(f->cache);
580 
581         while (f->windows)
582                 window_free(f->windows);
583 
584         if (f->cache) {
585                 assert_se(hashmap_remove(f->cache->fds, FD_TO_PTR(f->fd)));
586                 f->cache = mmap_cache_unref(f->cache);
587         }
588 
589         free(f);
590 }
591 
mmap_cache_fd_cache(MMapFileDescriptor * f)592 MMapCache* mmap_cache_fd_cache(MMapFileDescriptor *f) {
593         assert(f);
594 
595         return f->cache;
596 }
597