1 /* Copyright (c) 1998-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published
6    by the Free Software Foundation; version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
16 
17 #include <assert.h>
18 #include <atomic.h>
19 #include <errno.h>
20 #include <error.h>
21 #include <inttypes.h>
22 #include <limits.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <libintl.h>
26 #include <arpa/inet.h>
27 #include <sys/mman.h>
28 #include <sys/param.h>
29 #include <sys/stat.h>
30 #include <sys/uio.h>
31 #include <nss.h>
32 
33 #include "nscd.h"
34 #include "dbg_log.h"
35 
36 
37 /* Wrapper functions with error checking for standard functions.  */
38 extern void *xcalloc (size_t n, size_t s);
39 
40 
41 /* Number of times a value is reloaded without being used.  UINT_MAX
42    means unlimited.  */
43 unsigned int reload_count = DEFAULT_RELOAD_LIMIT;
44 
45 
46 static time_t (*const readdfcts[LASTREQ]) (struct database_dyn *,
47 					   struct hashentry *,
48 					   struct datahead *) =
49 {
50   [GETPWBYNAME] = readdpwbyname,
51   [GETPWBYUID] = readdpwbyuid,
52   [GETGRBYNAME] = readdgrbyname,
53   [GETGRBYGID] = readdgrbygid,
54   [GETHOSTBYNAME] = readdhstbyname,
55   [GETHOSTBYNAMEv6] = readdhstbynamev6,
56   [GETHOSTBYADDR] = readdhstbyaddr,
57   [GETHOSTBYADDRv6] = readdhstbyaddrv6,
58   [GETAI] = readdhstai,
59   [INITGROUPS] = readdinitgroups,
60   [GETSERVBYNAME] = readdservbyname,
61   [GETSERVBYPORT] = readdservbyport,
62   [GETNETGRENT] = readdgetnetgrent,
63   [INNETGR] = readdinnetgr
64 };
65 
66 
67 /* Search the cache for a matching entry and return it when found.  If
68    this fails search the negative cache and return (void *) -1 if this
69    search was successful.  Otherwise return NULL.
70 
71    This function must be called with the read-lock held.  */
72 struct datahead *
cache_search(request_type type,const void * key,size_t len,struct database_dyn * table,uid_t owner)73 cache_search (request_type type, const void *key, size_t len,
74 	      struct database_dyn *table, uid_t owner)
75 {
76   unsigned long int hash = __nss_hash (key, len) % table->head->module;
77 
78   unsigned long int nsearched = 0;
79   struct datahead *result = NULL;
80 
81   ref_t work = table->head->array[hash];
82   while (work != ENDREF)
83     {
84       ++nsearched;
85 
86       struct hashentry *here = (struct hashentry *) (table->data + work);
87 
88       if (type == here->type && len == here->len
89 	  && memcmp (key, table->data + here->key, len) == 0
90 	  && here->owner == owner)
91 	{
92 	  /* We found the entry.  Increment the appropriate counter.  */
93 	  struct datahead *dh
94 	    = (struct datahead *) (table->data + here->packet);
95 
96 	  /* See whether we must ignore the entry.  */
97 	  if (dh->usable)
98 	    {
99 	      /* We do not synchronize the memory here.  The statistics
100 		 data is not crucial, we synchronize only once in a while
101 		 in the cleanup threads.  */
102 	      if (dh->notfound)
103 		++table->head->neghit;
104 	      else
105 		{
106 		  ++table->head->poshit;
107 
108 		  if (dh->nreloads != 0)
109 		    dh->nreloads = 0;
110 		}
111 
112 	      result = dh;
113 	      break;
114 	    }
115 	}
116 
117       work = here->next;
118     }
119 
120   if (nsearched > table->head->maxnsearched)
121     table->head->maxnsearched = nsearched;
122 
123   return result;
124 }
125 
126 /* Add a new entry to the cache.  The return value is zero if the function
127    call was successful.
128 
129    This function must be called with the read-lock held.
130 
131    We modify the table but we nevertheless only acquire a read-lock.
132    This is ok since we use operations which would be safe even without
133    locking, given that the `prune_cache' function never runs.  Using
134    the readlock reduces the chance of conflicts.  */
135 int
cache_add(int type,const void * key,size_t len,struct datahead * packet,bool first,struct database_dyn * table,uid_t owner,bool prune_wakeup)136 cache_add (int type, const void *key, size_t len, struct datahead *packet,
137 	   bool first, struct database_dyn *table,
138 	   uid_t owner, bool prune_wakeup)
139 {
140   if (__glibc_unlikely (debug_level >= 2))
141     {
142       const char *str;
143       char buf[INET6_ADDRSTRLEN + 1];
144       if (type == GETHOSTBYADDR || type == GETHOSTBYADDRv6)
145 	str = inet_ntop (type == GETHOSTBYADDR ? AF_INET : AF_INET6,
146 			 key, buf, sizeof (buf));
147       else
148 	str = key;
149 
150       dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"),
151 	       str, serv2str[type], dbnames[table - dbs],
152 	       first ? _(" (first)") : "");
153     }
154 
155   unsigned long int hash = __nss_hash (key, len) % table->head->module;
156   struct hashentry *newp;
157 
158   newp = mempool_alloc (table, sizeof (struct hashentry), 0);
159   /* If we cannot allocate memory, just do not do anything.  */
160   if (newp == NULL)
161     {
162       /* If necessary mark the entry as unusable so that lookups will
163 	 not use it.  */
164       if (first)
165 	packet->usable = false;
166 
167       return -1;
168     }
169 
170   newp->type = type;
171   newp->first = first;
172   newp->len = len;
173   newp->key = (char *) key - table->data;
174   assert (newp->key + newp->len <= table->head->first_free);
175   newp->owner = owner;
176   newp->packet = (char *) packet - table->data;
177   assert ((newp->packet & BLOCK_ALIGN_M1) == 0);
178 
179   /* Put the new entry in the first position.  */
180   /* TODO Review concurrency.  Use atomic_exchange_release.  */
181   newp->next = atomic_load_relaxed (&table->head->array[hash]);
182   while (!atomic_compare_exchange_weak_release (&table->head->array[hash],
183 						(ref_t *) &newp->next,
184 						(ref_t) ((char *) newp
185 							 - table->data)));
186 
187   /* Update the statistics.  */
188   if (packet->notfound)
189     ++table->head->negmiss;
190   else if (first)
191     ++table->head->posmiss;
192 
193   /* We depend on this value being correct and at least as high as the
194      real number of entries.  */
195   atomic_increment (&table->head->nentries);
196 
197   /* It does not matter that we are not loading the just increment
198      value, this is just for statistics.  */
199   unsigned long int nentries = table->head->nentries;
200   if (nentries > table->head->maxnentries)
201     table->head->maxnentries = nentries;
202 
203   if (table->persistent)
204     // XXX async OK?
205     msync ((void *) table->head,
206 	   (char *) &table->head->array[hash] - (char *) table->head
207 	   + sizeof (ref_t), MS_ASYNC);
208 
209   /* We do not have to worry about the pruning thread if we are
210      re-adding the data since this is done by the pruning thread.  We
211      also do not have to do anything in case this is not the first
212      time the data is entered since different data heads all have the
213      same timeout.  */
214   if (first && prune_wakeup)
215     {
216       /* Perhaps the prune thread for the table is not running in a long
217 	 time.  Wake it if necessary.  */
218       pthread_mutex_lock (&table->prune_lock);
219       time_t next_wakeup = table->wakeup_time;
220       bool do_wakeup = false;
221       if (next_wakeup > packet->timeout + CACHE_PRUNE_INTERVAL)
222 	{
223 	  table->wakeup_time = packet->timeout;
224 	  do_wakeup = true;
225 	}
226       pthread_mutex_unlock (&table->prune_lock);
227       if (do_wakeup)
228 	pthread_cond_signal (&table->prune_cond);
229     }
230 
231   return 0;
232 }
233 
234 /* Walk through the table and remove all entries which lifetime ended.
235 
236    We have a problem here.  To actually remove the entries we must get
237    the write-lock.  But since we want to keep the time we have the
238    lock as short as possible we cannot simply acquire the lock when we
239    start looking for timedout entries.
240 
241    Therefore we do it in two stages: first we look for entries which
242    must be invalidated and remember them.  Then we get the lock and
243    actually remove them.  This is complicated by the way we have to
244    free the data structures since some hash table entries share the same
245    data.  */
246 time_t
prune_cache(struct database_dyn * table,time_t now,int fd)247 prune_cache (struct database_dyn *table, time_t now, int fd)
248 {
249   size_t cnt = table->head->module;
250 
251   /* If this table is not actually used don't do anything.  */
252   if (cnt == 0)
253     {
254       if (fd != -1)
255 	{
256 	  /* Reply to the INVALIDATE initiator.  */
257 	  int32_t resp = 0;
258 	  writeall (fd, &resp, sizeof (resp));
259 	}
260 
261       /* No need to do this again anytime soon.  */
262       return 24 * 60 * 60;
263     }
264 
265   /* If we check for the modification of the underlying file we invalidate
266      the entries also in this case.  */
267   if (table->check_file && now != LONG_MAX)
268     {
269       struct traced_file *runp = table->traced_files;
270 
271       while (runp != NULL)
272 	{
273 #ifdef HAVE_INOTIFY
274 	  if (runp->inotify_descr[TRACED_FILE] == -1)
275 #endif
276 	    {
277 	      struct stat64 st;
278 
279 	      if (stat64 (runp->fname, &st) < 0)
280 		{
281 		  /* Print a diagnostic that the traced file was missing.
282 		     We must not disable tracing since the file might return
283 		     shortly and we want to reload it at the next pruning.
284 		     Disabling tracing here would go against the configuration
285 		     as specified by the user via check-files.  */
286 		  char buf[128];
287 		  dbg_log (_("checking for monitored file `%s': %s"),
288 			   runp->fname, strerror_r (errno, buf, sizeof (buf)));
289 		}
290 	      else
291 		{
292 		  /* This must be `!=` to catch cases where users turn the
293 		     clocks back and we still want to detect any time difference
294 		     in mtime.  */
295 		  if (st.st_mtime != runp->mtime)
296 		    {
297 		      dbg_log (_("monitored file `%s` changed (mtime)"),
298 			       runp->fname);
299 		      /* The file changed. Invalidate all entries.  */
300 		      now = LONG_MAX;
301 		      runp->mtime = st.st_mtime;
302 #ifdef HAVE_INOTIFY
303 		      /* Attempt to install a watch on the file.  */
304 		      install_watches (runp);
305 #endif
306 		    }
307 		}
308 	    }
309 
310 	  runp = runp->next;
311 	}
312     }
313 
314   /* We run through the table and find values which are not valid anymore.
315 
316      Note that for the initial step, finding the entries to be removed,
317      we don't need to get any lock.  It is at all timed assured that the
318      linked lists are set up correctly and that no second thread prunes
319      the cache.  */
320   bool *mark;
321   size_t memory_needed = cnt * sizeof (bool);
322   bool mark_use_alloca;
323   if (__glibc_likely (memory_needed <= MAX_STACK_USE))
324     {
325       mark = alloca (cnt * sizeof (bool));
326       memset (mark, '\0', memory_needed);
327       mark_use_alloca = true;
328     }
329   else
330     {
331       mark = xcalloc (1, memory_needed);
332       mark_use_alloca = false;
333     }
334   size_t first = cnt + 1;
335   size_t last = 0;
336   char *const data = table->data;
337   bool any = false;
338 
339   if (__glibc_unlikely (debug_level > 2))
340     dbg_log (_("pruning %s cache; time %ld"),
341 	     dbnames[table - dbs], (long int) now);
342 
343 #define NO_TIMEOUT LONG_MAX
344   time_t next_timeout = NO_TIMEOUT;
345   do
346     {
347       ref_t run = table->head->array[--cnt];
348 
349       while (run != ENDREF)
350 	{
351 	  struct hashentry *runp = (struct hashentry *) (data + run);
352 	  struct datahead *dh = (struct datahead *) (data + runp->packet);
353 
354 	  /* Some debug support.  */
355 	  if (__glibc_unlikely (debug_level > 2))
356 	    {
357 	      char buf[INET6_ADDRSTRLEN];
358 	      const char *str;
359 
360 	      if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
361 		{
362 		  inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
363 			     data + runp->key, buf, sizeof (buf));
364 		  str = buf;
365 		}
366 	      else
367 		str = data + runp->key;
368 
369 	      dbg_log (_("considering %s entry \"%s\", timeout %" PRIu64),
370 		       serv2str[runp->type], str, dh->timeout);
371 	    }
372 
373 	  /* Check whether the entry timed out.  */
374 	  if (dh->timeout < now)
375 	    {
376 	      /* This hash bucket could contain entries which need to
377 		 be looked at.  */
378 	      mark[cnt] = true;
379 
380 	      first = MIN (first, cnt);
381 	      last = MAX (last, cnt);
382 
383 	      /* We only have to look at the data of the first entries
384 		 since the count information is kept in the data part
385 		 which is shared.  */
386 	      if (runp->first)
387 		{
388 
389 		  /* At this point there are two choices: we reload the
390 		     value or we discard it.  Do not change NRELOADS if
391 		     we never not reload the record.  */
392 		  if ((reload_count != UINT_MAX
393 		       && __builtin_expect (dh->nreloads >= reload_count, 0))
394 		      /* We always remove negative entries.  */
395 		      || dh->notfound
396 		      /* Discard everything if the user explicitly
397 			 requests it.  */
398 		      || now == LONG_MAX)
399 		    {
400 		      /* Remove the value.  */
401 		      dh->usable = false;
402 
403 		      /* We definitely have some garbage entries now.  */
404 		      any = true;
405 		    }
406 		  else
407 		    {
408 		      /* Reload the value.  We do this only for the
409 			 initially used key, not the additionally
410 			 added derived value.  */
411 		      assert (runp->type < LASTREQ
412 			      && readdfcts[runp->type] != NULL);
413 
414 		      time_t timeout = readdfcts[runp->type] (table, runp, dh);
415 		      next_timeout = MIN (next_timeout, timeout);
416 
417 		      /* If the entry has been replaced, we might need
418 			 cleanup.  */
419 		      any |= !dh->usable;
420 		    }
421 		}
422 	    }
423 	  else
424 	    {
425 	      assert (dh->usable);
426 	      next_timeout = MIN (next_timeout, dh->timeout);
427 	    }
428 
429 	  run = runp->next;
430 	}
431     }
432   while (cnt > 0);
433 
434   if (__glibc_unlikely (fd != -1))
435     {
436       /* Reply to the INVALIDATE initiator that the cache has been
437 	 invalidated.  */
438       int32_t resp = 0;
439       writeall (fd, &resp, sizeof (resp));
440     }
441 
442   if (first <= last)
443     {
444       struct hashentry *head = NULL;
445 
446       /* Now we have to get the write lock since we are about to modify
447 	 the table.  */
448       if (__glibc_unlikely (pthread_rwlock_trywrlock (&table->lock) != 0))
449 	{
450 	  ++table->head->wrlockdelayed;
451 	  pthread_rwlock_wrlock (&table->lock);
452 	}
453 
454       /* Now we start modifying the data.  Make sure all readers of the
455 	 data are aware of this and temporarily don't use the data.  */
456       atomic_fetch_add_relaxed (&table->head->gc_cycle, 1);
457       assert ((table->head->gc_cycle & 1) == 1);
458 
459       while (first <= last)
460 	{
461 	  if (mark[first])
462 	    {
463 	      ref_t *old = &table->head->array[first];
464 	      ref_t run = table->head->array[first];
465 
466 	      assert (run != ENDREF);
467 	      do
468 		{
469 		  struct hashentry *runp = (struct hashentry *) (data + run);
470 		  struct datahead *dh
471 		    = (struct datahead *) (data + runp->packet);
472 
473 		  if (! dh->usable)
474 		    {
475 		      /* We need the list only for debugging but it is
476 			 more costly to avoid creating the list than
477 			 doing it.  */
478 		      runp->dellist = head;
479 		      head = runp;
480 
481 		      /* No need for an atomic operation, we have the
482 			 write lock.  */
483 		      --table->head->nentries;
484 
485 		      run = *old = runp->next;
486 		    }
487 		  else
488 		    {
489 		      old = &runp->next;
490 		      run = runp->next;
491 		    }
492 		}
493 	      while (run != ENDREF);
494 	    }
495 
496 	  ++first;
497 	}
498 
499       /* Now we are done modifying the data.  */
500       atomic_fetch_add_relaxed (&table->head->gc_cycle, 1);
501       assert ((table->head->gc_cycle & 1) == 0);
502 
503       /* It's all done.  */
504       pthread_rwlock_unlock (&table->lock);
505 
506       /* Make sure the data is saved to disk.  */
507       if (table->persistent)
508 	msync (table->head,
509 	       data + table->head->first_free - (char *) table->head,
510 	       MS_ASYNC);
511 
512       /* One extra pass if we do debugging.  */
513       if (__glibc_unlikely (debug_level > 0))
514 	{
515 	  struct hashentry *runp = head;
516 
517 	  while (runp != NULL)
518 	    {
519 	      char buf[INET6_ADDRSTRLEN];
520 	      const char *str;
521 
522 	      if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
523 		{
524 		  inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
525 			     data + runp->key, buf, sizeof (buf));
526 		  str = buf;
527 		}
528 	      else
529 		str = data + runp->key;
530 
531 	      dbg_log ("remove %s entry \"%s\"", serv2str[runp->type], str);
532 
533 	      runp = runp->dellist;
534 	    }
535 	}
536     }
537 
538   if (__glibc_unlikely (! mark_use_alloca))
539     free (mark);
540 
541   /* Run garbage collection if any entry has been removed or replaced.  */
542   if (any)
543     gc (table);
544 
545   /* If there is no entry in the database and we therefore have no new
546      timeout value, tell the caller to wake up in 24 hours.  */
547   return next_timeout == NO_TIMEOUT ? 24 * 60 * 60 : next_timeout - now;
548 }
549