1 /* Cache memory handling.
2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published
7    by the Free Software Foundation; version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
17 
18 #include <assert.h>
19 #include <errno.h>
20 #include <error.h>
21 #include <fcntl.h>
22 #include <inttypes.h>
23 #include <libintl.h>
24 #include <limits.h>
25 #include <obstack.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <sys/mman.h>
30 #include <sys/param.h>
31 
32 #include "dbg_log.h"
33 #include "nscd.h"
34 
35 
36 static int
sort_he(const void * p1,const void * p2)37 sort_he (const void *p1, const void *p2)
38 {
39   struct hashentry *h1 = *(struct hashentry **) p1;
40   struct hashentry *h2 = *(struct hashentry **) p2;
41 
42   if (h1 < h2)
43     return -1;
44   if (h1 > h2)
45     return 1;
46   return 0;
47 }
48 
49 
50 static int
sort_he_data(const void * p1,const void * p2)51 sort_he_data (const void *p1, const void *p2)
52 {
53   struct hashentry *h1 = *(struct hashentry **) p1;
54   struct hashentry *h2 = *(struct hashentry **) p2;
55 
56   if (h1->packet < h2->packet)
57     return -1;
58   if (h1->packet > h2->packet)
59     return 1;
60   return 0;
61 }
62 
63 
64 /* Basic definitions for the bitmap implementation.  Only BITMAP_T
65    needs to be changed to choose a different word size.  */
66 #define BITMAP_T uint8_t
67 #define BITS (CHAR_BIT * sizeof (BITMAP_T))
68 #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
69 #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
70 
71 
72 static void
markrange(BITMAP_T * mark,ref_t start,size_t len)73 markrange (BITMAP_T *mark, ref_t start, size_t len)
74 {
75   /* Adjust parameters for block alignment.  */
76   assert ((start & BLOCK_ALIGN_M1) == 0);
77   start /= BLOCK_ALIGN;
78   len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
79 
80   size_t elem = start / BITS;
81 
82   if (start % BITS != 0)
83     {
84       if (start % BITS + len <= BITS)
85 	{
86 	  /* All fits in the partial byte.  */
87 	  mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
88 	  return;
89 	}
90 
91       mark[elem++] |= ALLBITS << (start % BITS);
92       len -= BITS - (start % BITS);
93     }
94 
95   while (len >= BITS)
96     {
97       mark[elem++] = ALLBITS;
98       len -= BITS;
99     }
100 
101   if (len > 0)
102     mark[elem] |= ALLBITS >> (BITS - len);
103 }
104 
105 
106 void
gc(struct database_dyn * db)107 gc (struct database_dyn *db)
108 {
109   /* We need write access.  */
110   pthread_rwlock_wrlock (&db->lock);
111 
112   /* And the memory handling lock.  */
113   pthread_mutex_lock (&db->memlock);
114 
115   /* We need an array representing the data area.  All memory
116      allocation is BLOCK_ALIGN aligned so this is the level at which
117      we have to look at the memory.  We use a mark and sweep algorithm
118      where the marks are placed in this array.  */
119   assert (db->head->first_free % BLOCK_ALIGN == 0);
120 
121   BITMAP_T *mark;
122   bool mark_use_malloc;
123   /* In prune_cache we are also using a dynamically allocated array.
124      If the array in the caller is too large we have malloc'ed it.  */
125   size_t stack_used = sizeof (bool) * db->head->module;
126   if (__glibc_unlikely (stack_used > MAX_STACK_USE))
127     stack_used = 0;
128   size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
129   size_t memory_needed = nmark * sizeof (BITMAP_T);
130   if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
131     {
132       mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
133       mark_use_malloc = false;
134       memset (mark, '\0', memory_needed);
135     }
136   else
137     {
138       mark = (BITMAP_T *) xcalloc (1, memory_needed);
139       mark_use_malloc = true;
140     }
141 
142   /* Create an array which can hold pointer to all the entries in hash
143      entries.  */
144   memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
145   struct hashentry **he;
146   struct hashentry **he_data;
147   bool he_use_malloc;
148   if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
149     {
150       he = alloca_account (memory_needed, stack_used);
151       he_use_malloc = false;
152     }
153   else
154     {
155       he = xmalloc (memory_needed);
156       he_use_malloc = true;
157     }
158   he_data = &he[db->head->nentries];
159 
160   size_t cnt = 0;
161   for (size_t idx = 0; idx < db->head->module; ++idx)
162     {
163       ref_t *prevp = &db->head->array[idx];
164       ref_t run = *prevp;
165 
166       while (run != ENDREF)
167 	{
168 	  assert (cnt < db->head->nentries);
169 	  he[cnt] = (struct hashentry *) (db->data + run);
170 
171 	  he[cnt]->prevp = prevp;
172 	  prevp = &he[cnt]->next;
173 
174 	  /* This is the hash entry itself.  */
175 	  markrange (mark, run, sizeof (struct hashentry));
176 
177 	  /* Add the information for the data itself.  We do this
178 	     only for the one special entry marked with FIRST.  */
179 	  if (he[cnt]->first)
180 	    {
181 	      struct datahead *dh
182 		= (struct datahead *) (db->data + he[cnt]->packet);
183 	      markrange (mark, he[cnt]->packet, dh->allocsize);
184 	    }
185 
186 	  run = he[cnt]->next;
187 
188 	  ++cnt;
189 	}
190     }
191   assert (cnt == db->head->nentries);
192 
193   /* Sort the entries by the addresses of the referenced data.  All
194      the entries pointing to the same DATAHEAD object will have the
195      same key.  Stability of the sorting is unimportant.  */
196   memcpy (he_data, he, cnt * sizeof (struct hashentry *));
197   qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
198 
199   /* Sort the entries by their address.  */
200   qsort (he, cnt, sizeof (struct hashentry *), sort_he);
201 
202 #define obstack_chunk_alloc xmalloc
203 #define obstack_chunk_free free
204   struct obstack ob;
205   obstack_init (&ob);
206 
207   /* Determine the highest used address.  */
208   size_t high = nmark;
209   while (high > 0 && mark[high - 1] == 0)
210     --high;
211 
212   /* No memory used.  */
213   if (high == 0)
214     {
215       db->head->first_free = 0;
216       goto out;
217     }
218 
219   /* Determine the highest offset.  */
220   BITMAP_T mask = HIGHBIT;
221   while ((mark[high - 1] & mask) == 0)
222     mask >>= 1;
223 
224   /* Now we can iterate over the MARK array and find bits which are not
225      set.  These represent memory which can be recovered.  */
226   size_t byte = 0;
227   /* Find the first gap.  */
228   while (byte < high && mark[byte] == ALLBITS)
229     ++byte;
230 
231   if (byte == high
232       || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
233     /* No gap.  */
234     goto out;
235 
236   mask = 1;
237   cnt = 0;
238   while ((mark[byte] & mask) != 0)
239     {
240       ++cnt;
241       mask <<= 1;
242     }
243   ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
244   assert (off_free <= db->head->first_free);
245 
246   struct hashentry **next_hash = he;
247   struct hashentry **next_data = he_data;
248 
249   /* Skip over the hash entries in the first block which does not get
250      moved.  */
251   while (next_hash < &he[db->head->nentries]
252 	 && *next_hash < (struct hashentry *) (db->data + off_free))
253     ++next_hash;
254 
255   while (next_data < &he_data[db->head->nentries]
256 	 && (*next_data)->packet < off_free)
257     ++next_data;
258 
259 
260   /* Now we start modifying the data.  Make sure all readers of the
261      data are aware of this and temporarily don't use the data.  */
262   atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
263   assert ((db->head->gc_cycle & 1) == 1);
264 
265 
266   /* We do not perform the move operations right away since the
267      he_data array is not sorted by the address of the data.  */
268   struct moveinfo
269   {
270     void *from;
271     void *to;
272     size_t size;
273     struct moveinfo *next;
274   } *moves = NULL;
275 
276   while (byte < high)
277     {
278       /* Search for the next filled block.  BYTE is the index of the
279 	 entry in MARK, MASK is the bit, and CNT is the bit number.
280 	 OFF_FILLED is the corresponding offset.  */
281       if ((mark[byte] & ~(mask - 1)) == 0)
282 	{
283 	  /* No other bit set in the same element of MARK.  Search in the
284 	     following memory.  */
285 	  do
286 	    ++byte;
287 	  while (byte < high && mark[byte] == 0);
288 
289 	  if (byte == high)
290 	    /* That was it.  */
291 	    break;
292 
293 	  mask = 1;
294 	  cnt = 0;
295 	}
296       /* Find the exact bit.  */
297       while ((mark[byte] & mask) == 0)
298 	{
299 	  ++cnt;
300 	  mask <<= 1;
301 	}
302 
303       ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
304       assert (off_alloc <= db->head->first_free);
305 
306       /* Find the end of the used area.  */
307       if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
308 	{
309 	  /* All other bits set.  Search the next bytes in MARK.  */
310 	  do
311 	    ++byte;
312 	  while (byte < high && mark[byte] == ALLBITS);
313 
314 	  mask = 1;
315 	  cnt = 0;
316 	}
317       if (byte < high)
318 	{
319 	  /* Find the exact bit.  */
320 	  while ((mark[byte] & mask) != 0)
321 	    {
322 	      ++cnt;
323 	      mask <<= 1;
324 	    }
325 	}
326 
327       ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
328       assert (off_allocend <= db->head->first_free);
329       /* Now we know that we can copy the area from OFF_ALLOC to
330 	 OFF_ALLOCEND (not included) to the memory starting at
331 	 OFF_FREE.  First fix up all the entries for the
332 	 displacement.  */
333       ref_t disp = off_alloc - off_free;
334 
335       struct moveinfo *new_move;
336       if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
337 			    1))
338 	new_move = alloca_account (sizeof (*new_move), stack_used);
339       else
340 	new_move = obstack_alloc (&ob, sizeof (*new_move));
341       new_move->from = db->data + off_alloc;
342       new_move->to = db->data + off_free;
343       new_move->size = off_allocend - off_alloc;
344       /* Create a circular list to be always able to append at the end.  */
345       if (moves == NULL)
346 	moves = new_move->next = new_move;
347       else
348 	{
349 	  new_move->next = moves->next;
350 	  moves = moves->next = new_move;
351 	}
352 
353       /* The following loop will prepare to move this much data.  */
354       off_free += off_allocend - off_alloc;
355 
356       while (off_alloc < off_allocend)
357 	{
358 	  /* Determine whether the next entry is for a hash entry or
359 	     the data.  */
360 	  if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
361 	    {
362 	      /* Just correct the forward reference.  */
363 	      *(*next_hash++)->prevp -= disp;
364 
365 	      off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
366 			    & ~BLOCK_ALIGN_M1);
367 	    }
368 	  else
369 	    {
370 	      assert (next_data < &he_data[db->head->nentries]);
371 	      assert ((*next_data)->packet == off_alloc);
372 
373 	      struct datahead *dh = (struct datahead *) (db->data + off_alloc);
374 	      do
375 		{
376 		  assert ((*next_data)->key >= (*next_data)->packet);
377 		  assert ((*next_data)->key + (*next_data)->len
378 			  <= (*next_data)->packet + dh->allocsize);
379 
380 		  (*next_data)->packet -= disp;
381 		  (*next_data)->key -= disp;
382 		  ++next_data;
383 		}
384 	      while (next_data < &he_data[db->head->nentries]
385 		     && (*next_data)->packet == off_alloc);
386 
387 	      off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
388 	    }
389 	}
390       assert (off_alloc == off_allocend);
391 
392       assert (off_alloc <= db->head->first_free);
393       if (off_alloc == db->head->first_free)
394 	/* We are done, that was the last block.  */
395 	break;
396     }
397   assert (next_hash == &he[db->head->nentries]);
398   assert (next_data == &he_data[db->head->nentries]);
399 
400   /* Now perform the actual moves.  */
401   if (moves != NULL)
402     {
403       struct moveinfo *runp = moves->next;
404       do
405 	{
406 	  assert ((char *) runp->to >= db->data);
407 	  assert ((char *) runp->to + runp->size
408 		  <= db->data  + db->head->first_free);
409 	  assert ((char *) runp->from >= db->data);
410 	  assert ((char *) runp->from + runp->size
411 		  <= db->data  + db->head->first_free);
412 
413 	  /* The regions may overlap.  */
414 	  memmove (runp->to, runp->from, runp->size);
415 	  runp = runp->next;
416 	}
417       while (runp != moves->next);
418 
419       if (__glibc_unlikely (debug_level >= 3))
420 	dbg_log (_("freed %zu bytes in %s cache"),
421 		 (size_t) (db->head->first_free
422 			   - ((char *) moves->to + moves->size - db->data)),
423 		 dbnames[db - dbs]);
424 
425       /* The byte past the end of the last copied block is the next
426 	 available byte.  */
427       db->head->first_free = (char *) moves->to + moves->size - db->data;
428 
429       /* Consistency check.  */
430       if (__glibc_unlikely (debug_level >= 3))
431 	{
432 	  for (size_t idx = 0; idx < db->head->module; ++idx)
433 	    {
434 	      ref_t run = db->head->array[idx];
435 	      size_t cnt = 0;
436 
437 	      while (run != ENDREF)
438 		{
439 		  if (run + sizeof (struct hashentry) > db->head->first_free)
440 		    {
441 		      dbg_log ("entry %zu in hash bucket %zu out of bounds: "
442 			       "%" PRIu32 "+%zu > %zu\n",
443 			       cnt, idx, run, sizeof (struct hashentry),
444 			       (size_t) db->head->first_free);
445 		      break;
446 		    }
447 
448 		  struct hashentry *he = (struct hashentry *) (db->data + run);
449 
450 		  if (he->key + he->len > db->head->first_free)
451 		    dbg_log ("key of entry %zu in hash bucket %zu out of "
452 			     "bounds: %" PRIu32 "+%zu > %zu\n",
453 			     cnt, idx, he->key, (size_t) he->len,
454 			     (size_t) db->head->first_free);
455 
456 		  if (he->packet + sizeof (struct datahead)
457 		      > db->head->first_free)
458 		    dbg_log ("packet of entry %zu in hash bucket %zu out of "
459 			     "bounds: %" PRIu32 "+%zu > %zu\n",
460 			     cnt, idx, he->packet, sizeof (struct datahead),
461 			     (size_t) db->head->first_free);
462 		  else
463 		    {
464 		      struct datahead *dh = (struct datahead *) (db->data
465 								 + he->packet);
466 		      if (he->packet + dh->allocsize
467 			  > db->head->first_free)
468 			dbg_log ("full key of entry %zu in hash bucket %zu "
469 				 "out of bounds: %" PRIu32 "+%zu > %zu",
470 				 cnt, idx, he->packet, (size_t) dh->allocsize,
471 				 (size_t) db->head->first_free);
472 		    }
473 
474 		  run = he->next;
475 		  ++cnt;
476 		}
477 	    }
478 	}
479     }
480 
481   /* Make sure the data on disk is updated.  */
482   if (db->persistent)
483     msync (db->head, db->data + db->head->first_free - (char *) db->head,
484 	   MS_ASYNC);
485 
486 
487   /* Now we are done modifying the data.  */
488   atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
489   assert ((db->head->gc_cycle & 1) == 0);
490 
491   /* We are done.  */
492  out:
493   pthread_mutex_unlock (&db->memlock);
494   pthread_rwlock_unlock (&db->lock);
495 
496   if (he_use_malloc)
497     free (he);
498   if (mark_use_malloc)
499     free (mark);
500 
501   obstack_free (&ob, NULL);
502 }
503 
504 
505 void *
mempool_alloc(struct database_dyn * db,size_t len,int data_alloc)506 mempool_alloc (struct database_dyn *db, size_t len, int data_alloc)
507 {
508   /* Make sure LEN is a multiple of our maximum alignment so we can
509      keep track of used memory is multiples of this alignment value.  */
510   if ((len & BLOCK_ALIGN_M1) != 0)
511     len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
512 
513   if (data_alloc)
514     pthread_rwlock_rdlock (&db->lock);
515 
516   pthread_mutex_lock (&db->memlock);
517 
518   assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
519 
520   bool tried_resize = false;
521   void *res;
522  retry:
523   res = db->data + db->head->first_free;
524 
525   if (__glibc_unlikely (db->head->first_free + len > db->head->data_size))
526     {
527       if (! tried_resize)
528 	{
529 	  /* Try to resize the database.  Grow size of 1/8th.  */
530 	  size_t oldtotal = (sizeof (struct database_pers_head)
531 			     + roundup (db->head->module * sizeof (ref_t),
532 					ALIGN)
533 			     + db->head->data_size);
534 	  size_t new_data_size = (db->head->data_size
535 				  + MAX (2 * len, db->head->data_size / 8));
536 	  size_t newtotal = (sizeof (struct database_pers_head)
537 			     + roundup (db->head->module * sizeof (ref_t), ALIGN)
538 			     + new_data_size);
539 	  if (newtotal > db->max_db_size)
540 	    {
541 	      new_data_size -= newtotal - db->max_db_size;
542 	      newtotal = db->max_db_size;
543 	    }
544 
545 	  if (db->mmap_used && newtotal > oldtotal
546 	      /* We only have to adjust the file size.  The new pages
547 		 become magically available.  */
548 	      && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
549 							  newtotal
550 							  - oldtotal)) == 0)
551 	    {
552 	      db->head->data_size = new_data_size;
553 	      tried_resize = true;
554 	      goto retry;
555 	    }
556 	}
557 
558       if (data_alloc)
559 	pthread_rwlock_unlock (&db->lock);
560 
561       if (! db->last_alloc_failed)
562 	{
563 	  dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
564 
565 	  db->last_alloc_failed = true;
566 	}
567 
568       ++db->head->addfailed;
569 
570       /* No luck.  */
571       res = NULL;
572     }
573   else
574     {
575       db->head->first_free += len;
576 
577       db->last_alloc_failed = false;
578 
579     }
580 
581   pthread_mutex_unlock (&db->memlock);
582 
583   return res;
584 }
585