1 /* Inner loops of cache daemon.
2    Copyright (C) 1998-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published
7    by the Free Software Foundation; version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
17 
18 #include <alloca.h>
19 #include <assert.h>
20 #include <atomic.h>
21 #include <error.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <grp.h>
25 #include <ifaddrs.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <stdint.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_NETLINK
36 # include <linux/netlink.h>
37 # include <linux/rtnetlink.h>
38 #endif
39 #ifdef HAVE_EPOLL
40 # include <sys/epoll.h>
41 #endif
42 #ifdef HAVE_INOTIFY
43 # include <sys/inotify.h>
44 #endif
45 #include <sys/mman.h>
46 #include <sys/param.h>
47 #include <sys/poll.h>
48 #include <sys/socket.h>
49 #include <sys/stat.h>
50 #include <sys/un.h>
51 
52 #include "nscd.h"
53 #include "dbg_log.h"
54 #include "selinux.h"
55 #include <resolv/resolv.h>
56 
57 #include <kernel-features.h>
58 #include <libc-diag.h>
59 
60 
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
72 
73 static pthread_attr_t attr;
74 
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
77 
78 /* Map request type to a string.  */
79 const char *const serv2str[LASTREQ] =
80 {
81   [GETPWBYNAME] = "GETPWBYNAME",
82   [GETPWBYUID] = "GETPWBYUID",
83   [GETGRBYNAME] = "GETGRBYNAME",
84   [GETGRBYGID] = "GETGRBYGID",
85   [GETHOSTBYNAME] = "GETHOSTBYNAME",
86   [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87   [GETHOSTBYADDR] = "GETHOSTBYADDR",
88   [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89   [SHUTDOWN] = "SHUTDOWN",
90   [GETSTAT] = "GETSTAT",
91   [INVALIDATE] = "INVALIDATE",
92   [GETFDPW] = "GETFDPW",
93   [GETFDGR] = "GETFDGR",
94   [GETFDHST] = "GETFDHST",
95   [GETAI] = "GETAI",
96   [INITGROUPS] = "INITGROUPS",
97   [GETSERVBYNAME] = "GETSERVBYNAME",
98   [GETSERVBYPORT] = "GETSERVBYPORT",
99   [GETFDSERV] = "GETFDSERV",
100   [GETNETGRENT] = "GETNETGRENT",
101   [INNETGR] = "INNETGR",
102   [GETFDNETGR] = "GETFDNETGR"
103 };
104 
105 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
106 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107 #else
108 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
109 #endif
110 
111 /* The control data structures for the services.  */
112 struct database_dyn dbs[lastdb] =
113 {
114   [pwddb] = {
115     .lock = RWLOCK_INITIALIZER,
116     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
117     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
118     .enabled = 0,
119     .check_file = 1,
120     .persistent = 0,
121     .propagate = 1,
122     .shared = 0,
123     .max_db_size = DEFAULT_MAX_DB_SIZE,
124     .suggested_module = DEFAULT_SUGGESTED_MODULE,
125     .db_filename = _PATH_NSCD_PASSWD_DB,
126     .disabled_iov = &pwd_iov_disabled,
127     .postimeout = 3600,
128     .negtimeout = 20,
129     .wr_fd = -1,
130     .ro_fd = -1,
131     .mmap_used = false
132   },
133   [grpdb] = {
134     .lock = RWLOCK_INITIALIZER,
135     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
136     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
137     .enabled = 0,
138     .check_file = 1,
139     .persistent = 0,
140     .propagate = 1,
141     .shared = 0,
142     .max_db_size = DEFAULT_MAX_DB_SIZE,
143     .suggested_module = DEFAULT_SUGGESTED_MODULE,
144     .db_filename = _PATH_NSCD_GROUP_DB,
145     .disabled_iov = &grp_iov_disabled,
146     .postimeout = 3600,
147     .negtimeout = 60,
148     .wr_fd = -1,
149     .ro_fd = -1,
150     .mmap_used = false
151   },
152   [hstdb] = {
153     .lock = RWLOCK_INITIALIZER,
154     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
155     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
156     .enabled = 0,
157     .check_file = 1,
158     .persistent = 0,
159     .propagate = 0,		/* Not used.  */
160     .shared = 0,
161     .max_db_size = DEFAULT_MAX_DB_SIZE,
162     .suggested_module = DEFAULT_SUGGESTED_MODULE,
163     .db_filename = _PATH_NSCD_HOSTS_DB,
164     .disabled_iov = &hst_iov_disabled,
165     .postimeout = 3600,
166     .negtimeout = 20,
167     .wr_fd = -1,
168     .ro_fd = -1,
169     .mmap_used = false
170   },
171   [servdb] = {
172     .lock = RWLOCK_INITIALIZER,
173     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
174     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
175     .enabled = 0,
176     .check_file = 1,
177     .persistent = 0,
178     .propagate = 0,		/* Not used.  */
179     .shared = 0,
180     .max_db_size = DEFAULT_MAX_DB_SIZE,
181     .suggested_module = DEFAULT_SUGGESTED_MODULE,
182     .db_filename = _PATH_NSCD_SERVICES_DB,
183     .disabled_iov = &serv_iov_disabled,
184     .postimeout = 28800,
185     .negtimeout = 20,
186     .wr_fd = -1,
187     .ro_fd = -1,
188     .mmap_used = false
189   },
190   [netgrdb] = {
191     .lock = RWLOCK_INITIALIZER,
192     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
193     .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
194     .enabled = 0,
195     .check_file = 1,
196     .persistent = 0,
197     .propagate = 0,		/* Not used.  */
198     .shared = 0,
199     .max_db_size = DEFAULT_MAX_DB_SIZE,
200     .suggested_module = DEFAULT_SUGGESTED_MODULE,
201     .db_filename = _PATH_NSCD_NETGROUP_DB,
202     .disabled_iov = &netgroup_iov_disabled,
203     .postimeout = 28800,
204     .negtimeout = 20,
205     .wr_fd = -1,
206     .ro_fd = -1,
207     .mmap_used = false
208   }
209 };
210 
211 
212 /* Mapping of request type to database.  */
213 static struct
214 {
215   bool data_request;
216   struct database_dyn *db;
217 } const reqinfo[LASTREQ] =
218 {
219   [GETPWBYNAME] = { true, &dbs[pwddb] },
220   [GETPWBYUID] = { true, &dbs[pwddb] },
221   [GETGRBYNAME] = { true, &dbs[grpdb] },
222   [GETGRBYGID] = { true, &dbs[grpdb] },
223   [GETHOSTBYNAME] = { true, &dbs[hstdb] },
224   [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
225   [GETHOSTBYADDR] = { true, &dbs[hstdb] },
226   [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
227   [SHUTDOWN] = { false, NULL },
228   [GETSTAT] = { false, NULL },
229   [GETFDPW] = { false, &dbs[pwddb] },
230   [GETFDGR] = { false, &dbs[grpdb] },
231   [GETFDHST] = { false, &dbs[hstdb] },
232   [GETAI] = { true, &dbs[hstdb] },
233   [INITGROUPS] = { true, &dbs[grpdb] },
234   [GETSERVBYNAME] = { true, &dbs[servdb] },
235   [GETSERVBYPORT] = { true, &dbs[servdb] },
236   [GETFDSERV] = { false, &dbs[servdb] },
237   [GETNETGRENT] = { true, &dbs[netgrdb] },
238   [INNETGR] = { true, &dbs[netgrdb] },
239   [GETFDNETGR] = { false, &dbs[netgrdb] }
240 };
241 
242 
243 /* Initial number of threads to use.  */
244 int nthreads = -1;
245 /* Maximum number of threads to use.  */
246 int max_nthreads = 32;
247 
248 /* Socket for incoming connections.  */
249 static int sock;
250 
251 #ifdef HAVE_INOTIFY
252 /* Inotify descriptor.  */
253 int inotify_fd = -1;
254 #endif
255 
256 #ifdef HAVE_NETLINK
257 /* Descriptor for netlink status updates.  */
258 static int nl_status_fd = -1;
259 #endif
260 
261 /* Number of times clients had to wait.  */
262 unsigned long int client_queued;
263 
264 
265 ssize_t
writeall(int fd,const void * buf,size_t len)266 writeall (int fd, const void *buf, size_t len)
267 {
268   size_t n = len;
269   ssize_t ret;
270   do
271     {
272       ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
273       if (ret <= 0)
274 	break;
275       buf = (const char *) buf + ret;
276       n -= ret;
277     }
278   while (n > 0);
279   return ret < 0 ? ret : len - n;
280 }
281 
282 
283 enum usekey
284   {
285     use_not = 0,
286     /* The following three are not really used, they are symbolic constants.  */
287     use_first = 16,
288     use_begin = 32,
289     use_end = 64,
290 
291     use_he = 1,
292     use_he_begin = use_he | use_begin,
293     use_he_end = use_he | use_end,
294     use_data = 3,
295     use_data_begin = use_data | use_begin,
296     use_data_end = use_data | use_end,
297     use_data_first = use_data_begin | use_first
298   };
299 
300 
301 static int
check_use(const char * data,nscd_ssize_t first_free,uint8_t * usemap,enum usekey use,ref_t start,size_t len)302 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
303 	   enum usekey use, ref_t start, size_t len)
304 {
305   if (len < 2)
306     return 0;
307 
308   if (start > first_free || start + len > first_free
309       || (start & BLOCK_ALIGN_M1))
310     return 0;
311 
312   if (usemap[start] == use_not)
313     {
314       /* Add the start marker.  */
315       usemap[start] = use | use_begin;
316       use &= ~use_first;
317 
318       while (--len > 0)
319 	if (usemap[++start] != use_not)
320 	  return 0;
321 	else
322 	  usemap[start] = use;
323 
324       /* Add the end marker.  */
325       usemap[start] = use | use_end;
326     }
327   else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
328     {
329       /* Hash entries can't be shared.  */
330       if (use == use_he)
331 	return 0;
332 
333       usemap[start] |= (use & use_first);
334       use &= ~use_first;
335 
336       while (--len > 1)
337 	if (usemap[++start] != use)
338 	  return 0;
339 
340       if (usemap[++start] != (use | use_end))
341 	return 0;
342     }
343   else
344     /* Points to a wrong object or somewhere in the middle.  */
345     return 0;
346 
347   return 1;
348 }
349 
350 
351 /* Verify data in persistent database.  */
352 static int
verify_persistent_db(void * mem,struct database_pers_head * readhead,int dbnr)353 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
354 {
355   assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
356 	  || dbnr == netgrdb);
357 
358   time_t now = time (NULL);
359 
360   struct database_pers_head *head = mem;
361   struct database_pers_head head_copy = *head;
362 
363   /* Check that the header that was read matches the head in the database.  */
364   if (memcmp (head, readhead, sizeof (*head)) != 0)
365     return 0;
366 
367   /* First some easy tests: make sure the database header is sane.  */
368   if (head->version != DB_VERSION
369       || head->header_size != sizeof (*head)
370       /* We allow a timestamp to be one hour ahead of the current time.
371 	 This should cover daylight saving time changes.  */
372       || head->timestamp > now + 60 * 60 + 60
373       || (head->gc_cycle & 1)
374       || head->module == 0
375       || (size_t) head->module > INT32_MAX / sizeof (ref_t)
376       || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
377       || head->first_free < 0
378       || head->first_free > head->data_size
379       || (head->first_free & BLOCK_ALIGN_M1) != 0
380       || head->maxnentries < 0
381       || head->maxnsearched < 0)
382     return 0;
383 
384   uint8_t *usemap = calloc (head->first_free, 1);
385   if (usemap == NULL)
386     return 0;
387 
388   const char *data = (char *) &head->array[roundup (head->module,
389 						    ALIGN / sizeof (ref_t))];
390 
391   nscd_ssize_t he_cnt = 0;
392   for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
393     {
394       ref_t trail = head->array[cnt];
395       ref_t work = trail;
396       int tick = 0;
397 
398       while (work != ENDREF)
399 	{
400 	  if (! check_use (data, head->first_free, usemap, use_he, work,
401 			   sizeof (struct hashentry)))
402 	    goto fail;
403 
404 	  /* Now we know we can dereference the record.  */
405 	  struct hashentry *here = (struct hashentry *) (data + work);
406 
407 	  ++he_cnt;
408 
409 	  /* Make sure the record is for this type of service.  */
410 	  if (here->type >= LASTREQ
411 	      || reqinfo[here->type].db != &dbs[dbnr])
412 	    goto fail;
413 
414 	  /* Validate boolean field value.  */
415 	  if (here->first != false && here->first != true)
416 	    goto fail;
417 
418 	  if (here->len < 0)
419 	    goto fail;
420 
421 	  /* Now the data.  */
422 	  if (here->packet < 0
423 	      || here->packet > head->first_free
424 	      || here->packet + sizeof (struct datahead) > head->first_free)
425 	    goto fail;
426 
427 	  struct datahead *dh = (struct datahead *) (data + here->packet);
428 
429 	  if (! check_use (data, head->first_free, usemap,
430 			   use_data | (here->first ? use_first : 0),
431 			   here->packet, dh->allocsize))
432 	    goto fail;
433 
434 	  if (dh->allocsize < sizeof (struct datahead)
435 	      || dh->recsize > dh->allocsize
436 	      || (dh->notfound != false && dh->notfound != true)
437 	      || (dh->usable != false && dh->usable != true))
438 	    goto fail;
439 
440 	  if (here->key < here->packet + sizeof (struct datahead)
441 	      || here->key > here->packet + dh->allocsize
442 	      || here->key + here->len > here->packet + dh->allocsize)
443 	    goto fail;
444 
445 	  work = here->next;
446 
447 	  if (work == trail)
448 	    /* A circular list, this must not happen.  */
449 	    goto fail;
450 	  if (tick)
451 	    trail = ((struct hashentry *) (data + trail))->next;
452 	  tick = 1 - tick;
453 	}
454     }
455 
456   if (he_cnt != head->nentries)
457     goto fail;
458 
459   /* See if all data and keys had at least one reference from
460      he->first == true hashentry.  */
461   for (ref_t idx = 0; idx < head->first_free; ++idx)
462     {
463       if (usemap[idx] == use_data_begin)
464 	goto fail;
465     }
466 
467   /* Finally, make sure the database hasn't changed since the first test.  */
468   if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
469     goto fail;
470 
471   free (usemap);
472   return 1;
473 
474 fail:
475   free (usemap);
476   return 0;
477 }
478 
479 
480 /* Initialize database information structures.  */
481 void
nscd_init(void)482 nscd_init (void)
483 {
484   /* Look up unprivileged uid/gid/groups before we start listening on the
485      socket  */
486   if (server_user != NULL)
487     begin_drop_privileges ();
488 
489   if (nthreads == -1)
490     /* No configuration for this value, assume a default.  */
491     nthreads = 4;
492 
493   for (size_t cnt = 0; cnt < lastdb; ++cnt)
494     if (dbs[cnt].enabled)
495       {
496 	pthread_rwlock_init (&dbs[cnt].lock, NULL);
497 	pthread_mutex_init (&dbs[cnt].memlock, NULL);
498 
499 	if (dbs[cnt].persistent)
500 	  {
501 	    /* Try to open the appropriate file on disk.  */
502 	    int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
503 	    if (fd != -1)
504 	      {
505 		char *msg = NULL;
506 		struct stat64 st;
507 		void *mem;
508 		size_t total;
509 		struct database_pers_head head;
510 		ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
511 						      sizeof (head)));
512 		if (n != sizeof (head) || fstat64 (fd, &st) != 0)
513 		  {
514 		  fail_db_errno:
515 		    /* The code is single-threaded at this point so
516 		       using strerror is just fine.  */
517 		    msg = strerror (errno);
518 		  fail_db:
519 		    dbg_log (_("invalid persistent database file \"%s\": %s"),
520 			     dbs[cnt].db_filename, msg);
521 		    unlink (dbs[cnt].db_filename);
522 		  }
523 		else if (head.module == 0 && head.data_size == 0)
524 		  {
525 		    /* The file has been created, but the head has not
526 		       been initialized yet.  */
527 		    msg = _("uninitialized header");
528 		    goto fail_db;
529 		  }
530 		else if (head.header_size != (int) sizeof (head))
531 		  {
532 		    msg = _("header size does not match");
533 		    goto fail_db;
534 		  }
535 		else if ((total = (sizeof (head)
536 				   + roundup (head.module * sizeof (ref_t),
537 					      ALIGN)
538 				   + head.data_size))
539 			 > st.st_size
540 			 || total < sizeof (head))
541 		  {
542 		    msg = _("file size does not match");
543 		    goto fail_db;
544 		  }
545 		/* Note we map with the maximum size allowed for the
546 		   database.  This is likely much larger than the
547 		   actual file size.  This is OK on most OSes since
548 		   extensions of the underlying file will
549 		   automatically translate more pages available for
550 		   memory access.  */
551 		else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
552 				      PROT_READ | PROT_WRITE,
553 				      MAP_SHARED, fd, 0))
554 			 == MAP_FAILED)
555 		  goto fail_db_errno;
556 		else if (!verify_persistent_db (mem, &head, cnt))
557 		  {
558 		    munmap (mem, total);
559 		    msg = _("verification failed");
560 		    goto fail_db;
561 		  }
562 		else
563 		  {
564 		    /* Success.  We have the database.  */
565 		    dbs[cnt].head = mem;
566 		    dbs[cnt].memsize = total;
567 		    dbs[cnt].data = (char *)
568 		      &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
569 						     ALIGN / sizeof (ref_t))];
570 		    dbs[cnt].mmap_used = true;
571 
572 		    if (dbs[cnt].suggested_module > head.module)
573 		      dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
574 			       dbnames[cnt]);
575 
576 		    dbs[cnt].wr_fd = fd;
577 		    fd = -1;
578 		    /* We also need a read-only descriptor.  */
579 		    if (dbs[cnt].shared)
580 		      {
581 			dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
582 					       O_RDONLY | O_CLOEXEC);
583 			if (dbs[cnt].ro_fd == -1)
584 			  dbg_log (_("\
585 cannot create read-only descriptor for \"%s\"; no mmap"),
586 				   dbs[cnt].db_filename);
587 		      }
588 
589 		    // XXX Shall we test whether the descriptors actually
590 		    // XXX point to the same file?
591 		  }
592 
593 		/* Close the file descriptors in case something went
594 		   wrong in which case the variable have not been
595 		   assigned -1.  */
596 		if (fd != -1)
597 		  close (fd);
598 	      }
599 	    else if (errno == EACCES)
600 	      do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
601 		       dbs[cnt].db_filename);
602 	  }
603 
604 	if (dbs[cnt].head == NULL)
605 	  {
606 	    /* No database loaded.  Allocate the data structure,
607 	       possibly on disk.  */
608 	    struct database_pers_head head;
609 	    size_t total = (sizeof (head)
610 			    + roundup (dbs[cnt].suggested_module
611 				       * sizeof (ref_t), ALIGN)
612 			    + (dbs[cnt].suggested_module
613 			       * DEFAULT_DATASIZE_PER_BUCKET));
614 
615 	    /* Try to create the database.  If we do not need a
616 	       persistent database create a temporary file.  */
617 	    int fd;
618 	    int ro_fd = -1;
619 	    if (dbs[cnt].persistent)
620 	      {
621 		fd = open (dbs[cnt].db_filename,
622 			   O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
623 			   S_IRUSR | S_IWUSR);
624 		if (fd != -1 && dbs[cnt].shared)
625 		  ro_fd = open (dbs[cnt].db_filename,
626 				O_RDONLY | O_CLOEXEC);
627 	      }
628 	    else
629 	      {
630 		char fname[] = _PATH_NSCD_XYZ_DB_TMP;
631 		fd = mkostemp (fname, O_CLOEXEC);
632 
633 		/* We do not need the file name anymore after we
634 		   opened another file descriptor in read-only mode.  */
635 		if (fd != -1)
636 		  {
637 		    if (dbs[cnt].shared)
638 		      ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
639 
640 		    unlink (fname);
641 		  }
642 	      }
643 
644 	    if (fd == -1)
645 	      {
646 		if (errno == EEXIST)
647 		  {
648 		    dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
649 			     dbnames[cnt], dbs[cnt].db_filename);
650 		    do_exit (1, 0, NULL);
651 		  }
652 
653 		if  (dbs[cnt].persistent)
654 		  dbg_log (_("cannot create %s; no persistent database used"),
655 			   dbs[cnt].db_filename);
656 		else
657 		  dbg_log (_("cannot create %s; no sharing possible"),
658 			   dbs[cnt].db_filename);
659 
660 		dbs[cnt].persistent = 0;
661 		// XXX remember: no mmap
662 	      }
663 	    else
664 	      {
665 		/* Tell the user if we could not create the read-only
666 		   descriptor.  */
667 		if (ro_fd == -1 && dbs[cnt].shared)
668 		  dbg_log (_("\
669 cannot create read-only descriptor for \"%s\"; no mmap"),
670 			   dbs[cnt].db_filename);
671 
672 		/* Before we create the header, initialize the hash
673 		   table.  That way if we get interrupted while writing
674 		   the header we can recognize a partially initialized
675 		   database.  */
676 		size_t ps = sysconf (_SC_PAGESIZE);
677 		char tmpbuf[ps];
678 		assert (~ENDREF == 0);
679 		memset (tmpbuf, '\xff', ps);
680 
681 		size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
682 		off_t offset = sizeof (head);
683 
684 		size_t towrite;
685 		if (offset % ps != 0)
686 		  {
687 		    towrite = MIN (remaining, ps - (offset % ps));
688 		    if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
689 		      goto write_fail;
690 		    offset += towrite;
691 		    remaining -= towrite;
692 		  }
693 
694 		while (remaining > ps)
695 		  {
696 		    if (pwrite (fd, tmpbuf, ps, offset) == -1)
697 		      goto write_fail;
698 		    offset += ps;
699 		    remaining -= ps;
700 		  }
701 
702 		if (remaining > 0
703 		    && pwrite (fd, tmpbuf, remaining, offset) != remaining)
704 		  goto write_fail;
705 
706 		/* Create the header of the file.  */
707 		struct database_pers_head head =
708 		  {
709 		    .version = DB_VERSION,
710 		    .header_size = sizeof (head),
711 		    .module = dbs[cnt].suggested_module,
712 		    .data_size = (dbs[cnt].suggested_module
713 				  * DEFAULT_DATASIZE_PER_BUCKET),
714 		    .first_free = 0
715 		  };
716 		void *mem;
717 
718 		if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
719 		     != sizeof (head))
720 		    || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
721 			!= 0)
722 		    || (mem = mmap (NULL, dbs[cnt].max_db_size,
723 				    PROT_READ | PROT_WRITE,
724 				    MAP_SHARED, fd, 0)) == MAP_FAILED)
725 		  {
726 		  write_fail:
727 		    unlink (dbs[cnt].db_filename);
728 		    dbg_log (_("cannot write to database file %s: %s"),
729 			     dbs[cnt].db_filename, strerror (errno));
730 		    dbs[cnt].persistent = 0;
731 		  }
732 		else
733 		  {
734 		    /* Success.  */
735 		    dbs[cnt].head = mem;
736 		    dbs[cnt].data = (char *)
737 		      &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
738 						     ALIGN / sizeof (ref_t))];
739 		    dbs[cnt].memsize = total;
740 		    dbs[cnt].mmap_used = true;
741 
742 		    /* Remember the descriptors.  */
743 		    dbs[cnt].wr_fd = fd;
744 		    dbs[cnt].ro_fd = ro_fd;
745 		    fd = -1;
746 		    ro_fd = -1;
747 		  }
748 
749 		if (fd != -1)
750 		  close (fd);
751 		if (ro_fd != -1)
752 		  close (ro_fd);
753 	      }
754 	  }
755 
756 	if (dbs[cnt].head == NULL)
757 	  {
758 	    /* We do not use the persistent database.  Just
759 	       create an in-memory data structure.  */
760 	    assert (! dbs[cnt].persistent);
761 
762 	    dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
763 				     + (dbs[cnt].suggested_module
764 					* sizeof (ref_t)));
765 	    memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
766 	    assert (~ENDREF == 0);
767 	    memset (dbs[cnt].head->array, '\xff',
768 		    dbs[cnt].suggested_module * sizeof (ref_t));
769 	    dbs[cnt].head->module = dbs[cnt].suggested_module;
770 	    dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
771 					* dbs[cnt].head->module);
772 	    dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
773 	    dbs[cnt].head->first_free = 0;
774 
775 	    dbs[cnt].shared = 0;
776 	    assert (dbs[cnt].ro_fd == -1);
777 	  }
778       }
779 
780   /* Create the socket.  */
781   sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
782   if (sock < 0)
783     {
784       dbg_log (_("cannot open socket: %s"), strerror (errno));
785       do_exit (errno == EACCES ? 4 : 1, 0, NULL);
786     }
787   /* Bind a name to the socket.  */
788   struct sockaddr_un sock_addr;
789   sock_addr.sun_family = AF_UNIX;
790   strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
791   if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
792     {
793       dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
794       do_exit (errno == EACCES ? 4 : 1, 0, NULL);
795     }
796 
797   /* Set permissions for the socket.  */
798   chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
799 
800   /* Set the socket up to accept connections.  */
801   if (listen (sock, SOMAXCONN) < 0)
802     {
803       dbg_log (_("cannot enable socket to accept connections: %s"),
804 	       strerror (errno));
805       do_exit (1, 0, NULL);
806     }
807 
808 #ifdef HAVE_NETLINK
809   if (dbs[hstdb].enabled)
810     {
811       /* Try to open netlink socket to monitor network setting changes.  */
812       nl_status_fd = socket (AF_NETLINK,
813 			     SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
814 			     NETLINK_ROUTE);
815       if (nl_status_fd != -1)
816 	{
817 	  struct sockaddr_nl snl;
818 	  memset (&snl, '\0', sizeof (snl));
819 	  snl.nl_family = AF_NETLINK;
820 	  /* XXX Is this the best set to use?  */
821 	  snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
822 			   | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
823 			   | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
824 			   | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
825 			   | RTMGRP_IPV6_PREFIX);
826 
827 	  if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
828 	    {
829 	      close (nl_status_fd);
830 	      nl_status_fd = -1;
831 	    }
832 	  else
833 	    {
834 	      /* Start the timestamp process.  */
835 	      dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
836 		= __bump_nl_timestamp ();
837 	    }
838 	}
839     }
840 #endif
841 
842   /* Change to unprivileged uid/gid/groups if specified in config file */
843   if (server_user != NULL)
844     finish_drop_privileges ();
845 }
846 
847 #ifdef HAVE_INOTIFY
848 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
849 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
850 void
install_watches(struct traced_file * finfo)851 install_watches (struct traced_file *finfo)
852 {
853   /* Use inotify support if we have it.  */
854   if (finfo->inotify_descr[TRACED_FILE] < 0)
855     finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
856 							   finfo->fname,
857 							   TRACED_FILE_MASK);
858   if (finfo->inotify_descr[TRACED_FILE] < 0)
859     {
860       dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
861 		 finfo->fname, strerror (errno));
862       return;
863     }
864   dbg_log (_("monitoring file `%s` (%d)"),
865 	   finfo->fname, finfo->inotify_descr[TRACED_FILE]);
866   /* Additionally listen for events in the file's parent directory.
867      We do this because the file to be watched might be
868      deleted and then added back again.  When it is added back again
869      we must re-add the watch.  We must also cover IN_MOVED_TO to
870      detect a file being moved into the directory.  */
871   if (finfo->inotify_descr[TRACED_DIR] < 0)
872     finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
873 							  finfo->dname,
874 							  TRACED_DIR_MASK);
875   if (finfo->inotify_descr[TRACED_DIR] < 0)
876     {
877       dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
878 		 finfo->fname, strerror (errno));
879       return;
880     }
881   dbg_log (_("monitoring directory `%s` (%d)"),
882 	   finfo->dname, finfo->inotify_descr[TRACED_DIR]);
883 }
884 #endif
885 
886 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
887 
888    We support registering multiple files per database. Each call to
889    register_traced_file adds to the list of registered files.
890 
891    When we prune the database, either through timeout or a request to
892    invalidate, we will check to see if any of the registered files has changed.
893    When we accept new connections to handle a cache request we will also
894    check to see if any of the registered files has changed.
895 
896    If we have inotify support then we install an inotify fd to notify us of
897    file deletion or modification, both of which will require we invalidate
898    the cache for the database.  Without inotify support we stat the file and
899    store st_mtime to determine if the file has been modified.  */
900 void
register_traced_file(size_t dbidx,struct traced_file * finfo)901 register_traced_file (size_t dbidx, struct traced_file *finfo)
902 {
903   /* If the database is disabled or file checking is disabled
904      then ignore the registration.  */
905   if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
906     return;
907 
908   if (__glibc_unlikely (debug_level > 0))
909     dbg_log (_("monitoring file %s for database %s"),
910 	     finfo->fname, dbnames[dbidx]);
911 
912 #ifdef HAVE_INOTIFY
913   install_watches (finfo);
914 #endif
915   struct stat64 st;
916   if (stat64 (finfo->fname, &st) < 0)
917     {
918       /* We cannot stat() the file. Set mtime to zero and try again later.  */
919       dbg_log (_("stat failed for file `%s'; will try again later: %s"),
920 	       finfo->fname, strerror (errno));
921       finfo->mtime = 0;
922     }
923   else
924     finfo->mtime = st.st_mtime;
925 
926   /* Queue up the file name.  */
927   finfo->next = dbs[dbidx].traced_files;
928   dbs[dbidx].traced_files = finfo;
929 }
930 
931 
932 /* Close the connections.  */
933 void
close_sockets(void)934 close_sockets (void)
935 {
936   close (sock);
937 }
938 
939 
940 static void
invalidate_cache(char * key,int fd)941 invalidate_cache (char *key, int fd)
942 {
943   dbtype number;
944   int32_t resp;
945 
946   for (number = pwddb; number < lastdb; ++number)
947     if (strcmp (key, dbnames[number]) == 0)
948       {
949 	struct traced_file *runp = dbs[number].traced_files;
950 	while (runp != NULL)
951 	  {
952 	    /* Make sure we reload from file when checking mtime.  */
953 	    runp->mtime = 0;
954 #ifdef HAVE_INOTIFY
955 	    /* During an invalidation we try to reload the traced
956 	       file watches.  This allows the user to re-sync if
957 	       inotify events were lost.  Similar to what we do during
958 	       pruning.  */
959 	    install_watches (runp);
960 #endif
961 	    if (runp->call_res_init)
962 	      {
963 		res_init ();
964 		break;
965 	      }
966 	    runp = runp->next;
967 	  }
968 	break;
969       }
970 
971   if (number == lastdb)
972     {
973       resp = EINVAL;
974       writeall (fd, &resp, sizeof (resp));
975       return;
976     }
977 
978   if (dbs[number].enabled)
979     {
980       pthread_mutex_lock (&dbs[number].prune_run_lock);
981       prune_cache (&dbs[number], LONG_MAX, fd);
982       pthread_mutex_unlock (&dbs[number].prune_run_lock);
983     }
984   else
985     {
986       resp = 0;
987       writeall (fd, &resp, sizeof (resp));
988     }
989 }
990 
991 
992 #ifdef SCM_RIGHTS
993 static void
send_ro_fd(struct database_dyn * db,char * key,int fd)994 send_ro_fd (struct database_dyn *db, char *key, int fd)
995 {
996   /* If we do not have an read-only file descriptor do nothing.  */
997   if (db->ro_fd == -1)
998     return;
999 
1000   /* We need to send some data along with the descriptor.  */
1001   uint64_t mapsize = (db->head->data_size
1002 		      + roundup (db->head->module * sizeof (ref_t), ALIGN)
1003 		      + sizeof (struct database_pers_head));
1004   struct iovec iov[2];
1005   iov[0].iov_base = key;
1006   iov[0].iov_len = strlen (key) + 1;
1007   iov[1].iov_base = &mapsize;
1008   iov[1].iov_len = sizeof (mapsize);
1009 
1010   /* Prepare the control message to transfer the descriptor.  */
1011   union
1012   {
1013     struct cmsghdr hdr;
1014     char bytes[CMSG_SPACE (sizeof (int))];
1015   } buf;
1016   struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1017 			.msg_control = buf.bytes,
1018 			.msg_controllen = sizeof (buf) };
1019   struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1020 
1021   cmsg->cmsg_level = SOL_SOCKET;
1022   cmsg->cmsg_type = SCM_RIGHTS;
1023   cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1024 
1025   int *ip = (int *) CMSG_DATA (cmsg);
1026   *ip = db->ro_fd;
1027 
1028   msg.msg_controllen = cmsg->cmsg_len;
1029 
1030   /* Send the control message.  We repeat when we are interrupted but
1031      everything else is ignored.  */
1032 #ifndef MSG_NOSIGNAL
1033 # define MSG_NOSIGNAL 0
1034 #endif
1035   (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1036 
1037   if (__glibc_unlikely (debug_level > 0))
1038     dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1039 }
1040 #endif	/* SCM_RIGHTS */
1041 
1042 
1043 /* Handle new request.  */
1044 static void
handle_request(int fd,request_header * req,void * key,uid_t uid,pid_t pid)1045 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1046 {
1047   if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1048     {
1049       if (debug_level > 0)
1050 	dbg_log (_("\
1051 cannot handle old request version %d; current version is %d"),
1052 		 req->version, NSCD_VERSION);
1053       return;
1054     }
1055 
1056   /* Perform the SELinux check before we go on to the standard checks.  */
1057   if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1058     {
1059       if (debug_level > 0)
1060 	{
1061 #ifdef SO_PEERCRED
1062 	  char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1063 # ifdef PATH_MAX
1064 	  char buf[PATH_MAX];
1065 # else
1066 	  char buf[4096];
1067 # endif
1068 
1069 	  snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1070 	  ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
1071 
1072 	  if (n <= 0)
1073 	    dbg_log (_("\
1074 request from %ld not handled due to missing permission"), (long int) pid);
1075 	  else
1076 	    {
1077 	      buf[n] = '\0';
1078 	      dbg_log (_("\
1079 request from '%s' [%ld] not handled due to missing permission"),
1080 		       buf, (long int) pid);
1081 	    }
1082 #else
1083 	  dbg_log (_("request not handled due to missing permission"));
1084 #endif
1085 	}
1086       return;
1087     }
1088 
1089   struct database_dyn *db = reqinfo[req->type].db;
1090 
1091   /* See whether we can service the request from the cache.  */
1092   if (__builtin_expect (reqinfo[req->type].data_request, true))
1093     {
1094       if (__builtin_expect (debug_level, 0) > 0)
1095 	{
1096 	  if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1097 	    {
1098 	      char buf[INET6_ADDRSTRLEN];
1099 
1100 	      dbg_log ("\t%s (%s)", serv2str[req->type],
1101 		       inet_ntop (req->type == GETHOSTBYADDR
1102 				  ? AF_INET : AF_INET6,
1103 				  key, buf, sizeof (buf)));
1104 	    }
1105 	  else
1106 	    dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1107 	}
1108 
1109       /* Is this service enabled?  */
1110       if (__glibc_unlikely (!db->enabled))
1111 	{
1112 	  /* No, sent the prepared record.  */
1113 	  if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1114 					db->disabled_iov->iov_len,
1115 					MSG_NOSIGNAL))
1116 	      != (ssize_t) db->disabled_iov->iov_len
1117 	      && __builtin_expect (debug_level, 0) > 0)
1118 	    {
1119 	      /* We have problems sending the result.  */
1120 	      char buf[256];
1121 	      dbg_log (_("cannot write result: %s"),
1122 		       strerror_r (errno, buf, sizeof (buf)));
1123 	    }
1124 
1125 	  return;
1126 	}
1127 
1128       /* Be sure we can read the data.  */
1129       if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1130 	{
1131 	  ++db->head->rdlockdelayed;
1132 	  pthread_rwlock_rdlock (&db->lock);
1133 	}
1134 
1135       /* See whether we can handle it from the cache.  */
1136       struct datahead *cached;
1137       cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1138 						 db, uid);
1139       if (cached != NULL)
1140 	{
1141 	  /* Hurray it's in the cache.  */
1142 	  if (writeall (fd, cached->data, cached->recsize) != cached->recsize
1143 	      && __glibc_unlikely (debug_level > 0))
1144 	    {
1145 	      /* We have problems sending the result.  */
1146 	      char buf[256];
1147 	      dbg_log (_("cannot write result: %s"),
1148 		       strerror_r (errno, buf, sizeof (buf)));
1149 	    }
1150 
1151 	  pthread_rwlock_unlock (&db->lock);
1152 
1153 	  return;
1154 	}
1155 
1156       pthread_rwlock_unlock (&db->lock);
1157     }
1158   else if (__builtin_expect (debug_level, 0) > 0)
1159     {
1160       if (req->type == INVALIDATE)
1161 	dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1162       else
1163 	dbg_log ("\t%s", serv2str[req->type]);
1164     }
1165 
1166   /* Handle the request.  */
1167   switch (req->type)
1168     {
1169     case GETPWBYNAME:
1170       addpwbyname (db, fd, req, key, uid);
1171       break;
1172 
1173     case GETPWBYUID:
1174       addpwbyuid (db, fd, req, key, uid);
1175       break;
1176 
1177     case GETGRBYNAME:
1178       addgrbyname (db, fd, req, key, uid);
1179       break;
1180 
1181     case GETGRBYGID:
1182       addgrbygid (db, fd, req, key, uid);
1183       break;
1184 
1185     case GETHOSTBYNAME:
1186       addhstbyname (db, fd, req, key, uid);
1187       break;
1188 
1189     case GETHOSTBYNAMEv6:
1190       addhstbynamev6 (db, fd, req, key, uid);
1191       break;
1192 
1193     case GETHOSTBYADDR:
1194       addhstbyaddr (db, fd, req, key, uid);
1195       break;
1196 
1197     case GETHOSTBYADDRv6:
1198       addhstbyaddrv6 (db, fd, req, key, uid);
1199       break;
1200 
1201     case GETAI:
1202       addhstai (db, fd, req, key, uid);
1203       break;
1204 
1205     case INITGROUPS:
1206       addinitgroups (db, fd, req, key, uid);
1207       break;
1208 
1209     case GETSERVBYNAME:
1210       addservbyname (db, fd, req, key, uid);
1211       break;
1212 
1213     case GETSERVBYPORT:
1214       addservbyport (db, fd, req, key, uid);
1215       break;
1216 
1217     case GETNETGRENT:
1218       addgetnetgrent (db, fd, req, key, uid);
1219       break;
1220 
1221     case INNETGR:
1222       addinnetgr (db, fd, req, key, uid);
1223       break;
1224 
1225     case GETSTAT:
1226     case SHUTDOWN:
1227     case INVALIDATE:
1228       {
1229 	/* Get the callers credentials.  */
1230 #ifdef SO_PEERCRED
1231 	struct ucred caller;
1232 	socklen_t optlen = sizeof (caller);
1233 
1234 	if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1235 	  {
1236 	    char buf[256];
1237 
1238 	    dbg_log (_("error getting caller's id: %s"),
1239 		     strerror_r (errno, buf, sizeof (buf)));
1240 	    break;
1241 	  }
1242 
1243 	uid = caller.uid;
1244 #else
1245 	/* Some systems have no SO_PEERCRED implementation.  They don't
1246 	   care about security so we don't as well.  */
1247 	uid = 0;
1248 #endif
1249       }
1250 
1251       /* Accept shutdown, getstat and invalidate only from root.  For
1252 	 the stat call also allow the user specified in the config file.  */
1253       if (req->type == GETSTAT)
1254 	{
1255 	  if (uid == 0 || uid == stat_uid)
1256 	    send_stats (fd, dbs);
1257 	}
1258       else if (uid == 0)
1259 	{
1260 	  if (req->type == INVALIDATE)
1261 	    invalidate_cache (key, fd);
1262 	  else
1263 	    termination_handler (0);
1264 	}
1265       break;
1266 
1267     case GETFDPW:
1268     case GETFDGR:
1269     case GETFDHST:
1270     case GETFDSERV:
1271     case GETFDNETGR:
1272 #ifdef SCM_RIGHTS
1273       send_ro_fd (reqinfo[req->type].db, key, fd);
1274 #endif
1275       break;
1276 
1277     default:
1278       /* Ignore the command, it's nothing we know.  */
1279       break;
1280     }
1281 }
1282 
1283 static char *
read_cmdline(size_t * size)1284 read_cmdline (size_t *size)
1285 {
1286   int fd = open ("/proc/self/cmdline", O_RDONLY);
1287   if (fd < 0)
1288     return NULL;
1289   size_t current = 0;
1290   size_t limit = 1024;
1291   char *buffer = malloc (limit);
1292   if (buffer == NULL)
1293     {
1294       close (fd);
1295       errno = ENOMEM;
1296       return NULL;
1297     }
1298   while (1)
1299     {
1300       if (current == limit)
1301 	{
1302 	  char *newptr;
1303 	  if (2 * limit < limit
1304 	      || (newptr = realloc (buffer, 2 * limit)) == NULL)
1305 	    {
1306 	      free (buffer);
1307 	      close (fd);
1308 	      errno = ENOMEM;
1309 	      return NULL;
1310 	    }
1311 	  buffer = newptr;
1312 	  limit *= 2;
1313 	}
1314 
1315       ssize_t n = TEMP_FAILURE_RETRY (read (fd, buffer + current,
1316 					    limit - current));
1317       if (n == -1)
1318 	{
1319 	  int e = errno;
1320 	  free (buffer);
1321 	  close (fd);
1322 	  errno = e;
1323 	  return NULL;
1324 	}
1325       if (n == 0)
1326 	break;
1327       current += n;
1328     }
1329 
1330   close (fd);
1331   *size = current;
1332   return buffer;
1333 }
1334 
1335 
1336 /* Restart the process.  */
1337 static void
restart(void)1338 restart (void)
1339 {
1340   /* First determine the parameters.  We do not use the parameters
1341      passed to main because then nscd would use the system libc after
1342      restarting even if it was started by a non-system dynamic linker
1343      during glibc testing.  */
1344   size_t readlen;
1345   char *cmdline = read_cmdline (&readlen);
1346   if (cmdline == NULL)
1347     {
1348       dbg_log (_("\
1349 cannot open /proc/self/cmdline: %m; disabling paranoia mode"));
1350       paranoia = 0;
1351       return;
1352     }
1353 
1354   /* Parse the command line.  Worst case scenario: every two
1355      characters form one parameter (one character plus NUL).  */
1356   char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1357   int argc = 0;
1358 
1359   for (char *cp = cmdline; cp < cmdline + readlen;)
1360     {
1361       argv[argc++] = cp;
1362       cp = (char *) rawmemchr (cp, '\0') + 1;
1363     }
1364   argv[argc] = NULL;
1365 
1366   /* Second, change back to the old user if we changed it.  */
1367   if (server_user != NULL)
1368     {
1369       if (setresuid (old_uid, old_uid, old_uid) != 0)
1370 	{
1371 	  dbg_log (_("\
1372 cannot change to old UID: %s; disabling paranoia mode"),
1373 		   strerror (errno));
1374 
1375 	  paranoia = 0;
1376 	  free (cmdline);
1377 	  return;
1378 	}
1379 
1380       if (setresgid (old_gid, old_gid, old_gid) != 0)
1381 	{
1382 	  dbg_log (_("\
1383 cannot change to old GID: %s; disabling paranoia mode"),
1384 		   strerror (errno));
1385 
1386 	  ignore_value (setuid (server_uid));
1387 	  paranoia = 0;
1388 	  free (cmdline);
1389 	  return;
1390 	}
1391     }
1392 
1393   /* Next change back to the old working directory.  */
1394   if (chdir (oldcwd) == -1)
1395     {
1396       dbg_log (_("\
1397 cannot change to old working directory: %s; disabling paranoia mode"),
1398 	       strerror (errno));
1399 
1400       if (server_user != NULL)
1401 	{
1402 	  ignore_value (setuid (server_uid));
1403 	  ignore_value (setgid (server_gid));
1404 	}
1405       paranoia = 0;
1406       free (cmdline);
1407       return;
1408     }
1409 
1410   /* Synchronize memory.  */
1411   int32_t certainly[lastdb];
1412   for (int cnt = 0; cnt < lastdb; ++cnt)
1413     if (dbs[cnt].enabled)
1414       {
1415 	/* Make sure nobody keeps using the database.  */
1416 	dbs[cnt].head->timestamp = 0;
1417 	certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1418 	dbs[cnt].head->nscd_certainly_running = 0;
1419 
1420 	if (dbs[cnt].persistent)
1421 	  // XXX async OK?
1422 	  msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1423       }
1424 
1425   /* The preparations are done.  */
1426 #ifdef PATH_MAX
1427   char pathbuf[PATH_MAX];
1428 #else
1429   char pathbuf[256];
1430 #endif
1431   /* Try to exec the real nscd program so the process name (as reported
1432      in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1433      if readlink or the exec with the result of the readlink call fails.  */
1434   ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1435   if (n != -1)
1436     {
1437       pathbuf[n] = '\0';
1438       execv (pathbuf, argv);
1439     }
1440   execv ("/proc/self/exe", argv);
1441 
1442   /* If we come here, we will never be able to re-exec.  */
1443   dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1444 	   strerror (errno));
1445 
1446   if (server_user != NULL)
1447     {
1448       ignore_value (setuid (server_uid));
1449       ignore_value (setgid (server_gid));
1450     }
1451   if (chdir ("/") != 0)
1452     dbg_log (_("cannot change current working directory to \"/\": %s"),
1453 	     strerror (errno));
1454   paranoia = 0;
1455   free (cmdline);
1456 
1457   /* Reenable the databases.  */
1458   time_t now = time (NULL);
1459   for (int cnt = 0; cnt < lastdb; ++cnt)
1460     if (dbs[cnt].enabled)
1461       {
1462 	dbs[cnt].head->timestamp = now;
1463 	dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1464       }
1465 }
1466 
1467 
1468 /* List of file descriptors.  */
1469 struct fdlist
1470 {
1471   int fd;
1472   struct fdlist *next;
1473 };
1474 /* Memory allocated for the list.  */
1475 static struct fdlist *fdlist;
1476 /* List of currently ready-to-read file descriptors.  */
1477 static struct fdlist *readylist;
1478 
1479 /* Conditional variable and mutex to signal availability of entries in
1480    READYLIST.  The condvar is initialized dynamically since we might
1481    use a different clock depending on availability.  */
1482 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1483 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1484 
1485 /* The clock to use with the condvar.  */
1486 static clockid_t timeout_clock = CLOCK_REALTIME;
1487 
1488 /* Number of threads ready to handle the READYLIST.  */
1489 static unsigned long int nready;
1490 
1491 
1492 /* Function for the clean-up threads.  */
1493 static void *
1494 __attribute__ ((__noreturn__))
nscd_run_prune(void * p)1495 nscd_run_prune (void *p)
1496 {
1497   const long int my_number = (long int) p;
1498   assert (dbs[my_number].enabled);
1499 
1500   int dont_need_update = setup_thread (&dbs[my_number]);
1501 
1502   time_t now = time (NULL);
1503 
1504   /* We are running.  */
1505   dbs[my_number].head->timestamp = now;
1506 
1507   struct timespec prune_ts;
1508   if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1509     /* Should never happen.  */
1510     abort ();
1511 
1512   /* Compute the initial timeout time.  Prevent all the timers to go
1513      off at the same time by adding a db-based value.  */
1514   prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1515   dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1516 
1517   pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1518   pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1519   pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1520 
1521   pthread_mutex_lock (prune_lock);
1522   while (1)
1523     {
1524       /* Wait, but not forever.  */
1525       int e = 0;
1526       if (! dbs[my_number].clear_cache)
1527 	e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1528       assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1529 
1530       time_t next_wait;
1531       now = time (NULL);
1532       if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1533 	  || dbs[my_number].clear_cache)
1534 	{
1535 	  /* We will determine the new timout values based on the
1536 	     cache content.  Should there be concurrent additions to
1537 	     the cache which are not accounted for in the cache
1538 	     pruning we want to know about it.  Therefore set the
1539 	     timeout to the maximum.  It will be descreased when adding
1540 	     new entries to the cache, if necessary.  */
1541 	  dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1542 
1543 	  /* Unconditionally reset the flag.  */
1544 	  time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1545 	  dbs[my_number].clear_cache = 0;
1546 
1547 	  pthread_mutex_unlock (prune_lock);
1548 
1549 	  /* We use a separate lock for running the prune function (instead
1550 	     of keeping prune_lock locked) because this enables concurrent
1551 	     invocations of cache_add which might modify the timeout value.  */
1552 	  pthread_mutex_lock (prune_run_lock);
1553 	  next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1554 	  pthread_mutex_unlock (prune_run_lock);
1555 
1556 	  next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1557 	  /* If clients cannot determine for sure whether nscd is running
1558 	     we need to wake up occasionally to update the timestamp.
1559 	     Wait 90% of the update period.  */
1560 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1561 	  if (__glibc_unlikely (! dont_need_update))
1562 	    {
1563 	      next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1564 	      dbs[my_number].head->timestamp = now;
1565 	    }
1566 
1567 	  pthread_mutex_lock (prune_lock);
1568 
1569 	  /* Make it known when we will wake up again.  */
1570 	  if (now + next_wait < dbs[my_number].wakeup_time)
1571 	    dbs[my_number].wakeup_time = now + next_wait;
1572 	  else
1573 	    next_wait = dbs[my_number].wakeup_time - now;
1574 	}
1575       else
1576 	/* The cache was just pruned.  Do not do it again now.  Just
1577 	   use the new timeout value.  */
1578 	next_wait = dbs[my_number].wakeup_time - now;
1579 
1580       if (clock_gettime (timeout_clock, &prune_ts) == -1)
1581 	/* Should never happen.  */
1582 	abort ();
1583 
1584       /* Compute next timeout time.  */
1585       prune_ts.tv_sec += next_wait;
1586     }
1587 }
1588 
1589 
1590 /* This is the main loop.  It is replicated in different threads but
1591    the use of the ready list makes sure only one thread handles an
1592    incoming connection.  */
1593 static void *
1594 __attribute__ ((__noreturn__))
nscd_run_worker(void * p)1595 nscd_run_worker (void *p)
1596 {
1597   char buf[256];
1598 
1599   /* Initial locking.  */
1600   pthread_mutex_lock (&readylist_lock);
1601 
1602   /* One more thread available.  */
1603   ++nready;
1604 
1605   while (1)
1606     {
1607       while (readylist == NULL)
1608 	pthread_cond_wait (&readylist_cond, &readylist_lock);
1609 
1610       struct fdlist *it = readylist->next;
1611       if (readylist->next == readylist)
1612 	/* Just one entry on the list.  */
1613 	readylist = NULL;
1614       else
1615 	readylist->next = it->next;
1616 
1617       /* Extract the information and mark the record ready to be used
1618 	 again.  */
1619       int fd = it->fd;
1620       it->next = NULL;
1621 
1622       /* One more thread available.  */
1623       --nready;
1624 
1625       /* We are done with the list.  */
1626       pthread_mutex_unlock (&readylist_lock);
1627 
1628       /* Now read the request.  */
1629       request_header req;
1630       if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1631 			    != sizeof (req), 0))
1632 	{
1633 	  /* We failed to read data.  Note that this also might mean we
1634 	     failed because we would have blocked.  */
1635 	  if (debug_level > 0)
1636 	    dbg_log (_("short read while reading request: %s"),
1637 		     strerror_r (errno, buf, sizeof (buf)));
1638 	  goto close_and_out;
1639 	}
1640 
1641       /* Check whether this is a valid request type.  */
1642       if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1643 	goto close_and_out;
1644 
1645       /* Some systems have no SO_PEERCRED implementation.  They don't
1646 	 care about security so we don't as well.  */
1647       uid_t uid = -1;
1648 #ifdef SO_PEERCRED
1649       pid_t pid = 0;
1650 
1651       if (__glibc_unlikely (debug_level > 0))
1652 	{
1653 	  struct ucred caller;
1654 	  socklen_t optlen = sizeof (caller);
1655 
1656 	  if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1657 	    pid = caller.pid;
1658 	}
1659 #else
1660       const pid_t pid = 0;
1661 #endif
1662 
1663       /* It should not be possible to crash the nscd with a silly
1664 	 request (i.e., a terribly large key).  We limit the size to 1kb.  */
1665       if (__builtin_expect (req.key_len, 1) < 0
1666 	  || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1667 	{
1668 	  if (debug_level > 0)
1669 	    dbg_log (_("key length in request too long: %d"), req.key_len);
1670 	}
1671       else
1672 	{
1673 	  /* Get the key.  */
1674 	  char keybuf[MAXKEYLEN + 1];
1675 
1676 	  if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1677 							  req.key_len))
1678 				!= req.key_len, 0))
1679 	    {
1680 	      /* Again, this can also mean we would have blocked.  */
1681 	      if (debug_level > 0)
1682 		dbg_log (_("short read while reading request key: %s"),
1683 			 strerror_r (errno, buf, sizeof (buf)));
1684 	      goto close_and_out;
1685 	    }
1686 	  keybuf[req.key_len] = '\0';
1687 
1688 	  if (__builtin_expect (debug_level, 0) > 0)
1689 	    {
1690 #ifdef SO_PEERCRED
1691 	      if (pid != 0)
1692 		dbg_log (_("\
1693 handle_request: request received (Version = %d) from PID %ld"),
1694 			 req.version, (long int) pid);
1695 	      else
1696 #endif
1697 		dbg_log (_("\
1698 handle_request: request received (Version = %d)"), req.version);
1699 	    }
1700 
1701 	  /* Phew, we got all the data, now process it.  */
1702 	  handle_request (fd, &req, keybuf, uid, pid);
1703 	}
1704 
1705     close_and_out:
1706       /* We are done.  */
1707       close (fd);
1708 
1709       /* Re-locking.  */
1710       pthread_mutex_lock (&readylist_lock);
1711 
1712       /* One more thread available.  */
1713       ++nready;
1714     }
1715   /* NOTREACHED */
1716 }
1717 
1718 
1719 static unsigned int nconns;
1720 
1721 static void
fd_ready(int fd)1722 fd_ready (int fd)
1723 {
1724   pthread_mutex_lock (&readylist_lock);
1725 
1726   /* Find an empty entry in FDLIST.  */
1727   size_t inner;
1728   for (inner = 0; inner < nconns; ++inner)
1729     if (fdlist[inner].next == NULL)
1730       break;
1731   assert (inner < nconns);
1732 
1733   fdlist[inner].fd = fd;
1734 
1735   if (readylist == NULL)
1736     readylist = fdlist[inner].next = &fdlist[inner];
1737   else
1738     {
1739       fdlist[inner].next = readylist->next;
1740       readylist = readylist->next = &fdlist[inner];
1741     }
1742 
1743   bool do_signal = true;
1744   if (__glibc_unlikely (nready == 0))
1745     {
1746       ++client_queued;
1747       do_signal = false;
1748 
1749       /* Try to start another thread to help out.  */
1750       pthread_t th;
1751       if (nthreads < max_nthreads
1752 	  && pthread_create (&th, &attr, nscd_run_worker,
1753 			     (void *) (long int) nthreads) == 0)
1754 	{
1755 	  /* We got another thread.  */
1756 	  ++nthreads;
1757 	  /* The new thread might need a kick.  */
1758 	  do_signal = true;
1759 	}
1760 
1761     }
1762 
1763   pthread_mutex_unlock (&readylist_lock);
1764 
1765   /* Tell one of the worker threads there is work to do.  */
1766   if (do_signal)
1767     pthread_cond_signal (&readylist_cond);
1768 }
1769 
1770 
1771 /* Check whether restarting should happen.  */
1772 static bool
restart_p(time_t now)1773 restart_p (time_t now)
1774 {
1775   return (paranoia && readylist == NULL && nready == nthreads
1776 	  && now >= restart_time);
1777 }
1778 
1779 
1780 /* Array for times a connection was accepted.  */
1781 static time_t *starttime;
1782 
1783 #ifdef HAVE_INOTIFY
1784 /* Inotify event for changed file.  */
1785 union __inev
1786 {
1787   struct inotify_event i;
1788 # ifndef PATH_MAX
1789 #  define PATH_MAX 1024
1790 # endif
1791   char buf[sizeof (struct inotify_event) + PATH_MAX];
1792 };
1793 
1794 /* Returns 0 if the file is there otherwise -1.  */
1795 int
check_file(struct traced_file * finfo)1796 check_file (struct traced_file *finfo)
1797 {
1798   struct stat64 st;
1799   /* We could check mtime and if different re-add
1800      the watches, and invalidate the database, but we
1801      don't because we are called from inotify_check_files
1802      which should be doing that work.  If sufficient inotify
1803      events were lost then the next pruning or invalidation
1804      will do the stat and mtime check.  We don't do it here to
1805      keep the logic simple.  */
1806   if (stat64 (finfo->fname, &st) < 0)
1807     return -1;
1808   return 0;
1809 }
1810 
1811 /* Process the inotify event in INEV. If the event matches any of the files
1812    registered with a database then mark that database as requiring its cache
1813    to be cleared. We indicate the cache needs clearing by setting
1814    TO_CLEAR[DBCNT] to true for the matching database.  */
1815 static void
inotify_check_files(bool * to_clear,union __inev * inev)1816 inotify_check_files (bool *to_clear, union __inev *inev)
1817 {
1818   /* Check which of the files changed.  */
1819   for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1820     {
1821       struct traced_file *finfo = dbs[dbcnt].traced_files;
1822 
1823       while (finfo != NULL)
1824 	{
1825 	  /* The configuration file was moved or deleted.
1826 	     We stop watching it at that point, and reinitialize.  */
1827 	  if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1828 	      && ((inev->i.mask & IN_MOVE_SELF)
1829 		  || (inev->i.mask & IN_DELETE_SELF)
1830 		  || (inev->i.mask & IN_IGNORED)))
1831 	    {
1832 	      int ret;
1833 	      bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1834 
1835 	      if (check_file (finfo) == 0)
1836 	        {
1837 		  dbg_log (_("ignored inotify event for `%s` (file exists)"),
1838 			   finfo->fname);
1839 		  return;
1840 		}
1841 
1842 	      dbg_log (_("monitored file `%s` was %s, removing watch"),
1843 		       finfo->fname, moved ? "moved" : "deleted");
1844 	      /* File was moved out, remove the watch.  Watches are
1845 		 automatically removed when the file is deleted.  */
1846 	      if (moved)
1847 		{
1848 		  ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1849 		  if (ret < 0)
1850 		    dbg_log (_("failed to remove file watch `%s`: %s"),
1851 			     finfo->fname, strerror (errno));
1852 		}
1853 	      finfo->inotify_descr[TRACED_FILE] = -1;
1854 	      to_clear[dbcnt] = true;
1855 	      if (finfo->call_res_init)
1856 	        res_init ();
1857 	      return;
1858 	    }
1859 	  /* The configuration file was open for writing and has just closed.
1860 	     We reset the cache and reinitialize.  */
1861 	  if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1862 	      && inev->i.mask & IN_CLOSE_WRITE)
1863 	    {
1864 	      /* Mark cache as needing to be cleared and reinitialize.  */
1865 	      dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1866 	      to_clear[dbcnt] = true;
1867 	      if (finfo->call_res_init)
1868 	        res_init ();
1869 	      return;
1870 	    }
1871 	  /* The parent directory was moved or deleted.  We trigger one last
1872 	     invalidation.  At the next pruning or invalidation we may add
1873 	     this watch back if the file is present again.  */
1874 	  if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1875 	      && ((inev->i.mask & IN_DELETE_SELF)
1876 		  || (inev->i.mask & IN_MOVE_SELF)
1877 		  || (inev->i.mask & IN_IGNORED)))
1878 	    {
1879 	      bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1880 	      /* The directory watch may have already been removed
1881 		 but we don't know so we just remove it again and
1882 		 ignore the error.  Then we remove the file watch.
1883 		 Note: watches are automatically removed for deleted
1884 		 files.  */
1885 	      if (moved)
1886 		inotify_rm_watch (inotify_fd, inev->i.wd);
1887 	      if (finfo->inotify_descr[TRACED_FILE] != -1)
1888 		{
1889 		  dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1890 			   finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1891 		  if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1892 		    dbg_log (_("failed to remove file watch `%s`: %s"),
1893 			     finfo->dname, strerror (errno));
1894 		}
1895 	      finfo->inotify_descr[TRACED_FILE] = -1;
1896 	      finfo->inotify_descr[TRACED_DIR] = -1;
1897 	      to_clear[dbcnt] = true;
1898 	      if (finfo->call_res_init)
1899 	        res_init ();
1900 	      /* Continue to the next entry since this might be the
1901 		 parent directory for multiple registered files and
1902 		 we want to remove watches for all registered files.  */
1903 	      continue;
1904 	    }
1905 	  /* The parent directory had a create or moved to event.  */
1906 	  if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1907 	      && ((inev->i.mask & IN_MOVED_TO)
1908 		  || (inev->i.mask & IN_CREATE))
1909 	      && strcmp (inev->i.name, finfo->sfname) == 0)
1910 	    {
1911 	      /* We detected a directory change.  We look for the creation
1912 		 of the file we are tracking or the move of the same file
1913 		 into the directory.  */
1914 	      int ret;
1915 	      dbg_log (_("monitored file `%s` was %s, adding watch"),
1916 		       finfo->fname,
1917 		       inev->i.mask & IN_CREATE ? "created" : "moved into place");
1918 	      /* File was moved in or created.  Regenerate the watch.  */
1919 	      if (finfo->inotify_descr[TRACED_FILE] != -1)
1920 		inotify_rm_watch (inotify_fd,
1921 				  finfo->inotify_descr[TRACED_FILE]);
1922 
1923 	      ret = inotify_add_watch (inotify_fd,
1924 				       finfo->fname,
1925 				       TRACED_FILE_MASK);
1926 	      if (ret < 0)
1927 		dbg_log (_("failed to add file watch `%s`: %s"),
1928 			 finfo->fname, strerror (errno));
1929 
1930 	      finfo->inotify_descr[TRACED_FILE] = ret;
1931 
1932 	      /* The file is new or moved so mark cache as needing to
1933 		 be cleared and reinitialize.  */
1934 	      to_clear[dbcnt] = true;
1935 	      if (finfo->call_res_init)
1936 		res_init ();
1937 
1938 	      /* Done re-adding the watch.  Don't return, we may still
1939 		 have other files in this same directory, same watch
1940 		 descriptor, and need to process them.  */
1941 	    }
1942 	  /* Other events are ignored, and we move on to the next file.  */
1943 	  finfo = finfo->next;
1944         }
1945     }
1946 }
1947 
1948 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1949    for the associated database, otherwise do nothing. The TO_CLEAR array must
1950    have LASTDB entries.  */
1951 static inline void
clear_db_cache(bool * to_clear)1952 clear_db_cache (bool *to_clear)
1953 {
1954   for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1955     if (to_clear[dbcnt])
1956       {
1957 	pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1958 	dbs[dbcnt].clear_cache = 1;
1959 	pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1960 	pthread_cond_signal (&dbs[dbcnt].prune_cond);
1961       }
1962 }
1963 
1964 int
handle_inotify_events(void)1965 handle_inotify_events (void)
1966 {
1967   bool to_clear[lastdb] = { false, };
1968   union __inev inev;
1969 
1970   /* Read all inotify events for files registered via
1971      register_traced_file().  */
1972   while (1)
1973     {
1974       /* Potentially read multiple events into buf.  */
1975       ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1976 					     &inev.buf,
1977 					     sizeof (inev)));
1978       if (nb < (ssize_t) sizeof (struct inotify_event))
1979 	{
1980 	  /* Not even 1 event.  */
1981 	  if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
1982 	    return -1;
1983 	  /* Done reading events that are ready.  */
1984 	  break;
1985 	}
1986       /* Process all events.  The normal inotify interface delivers
1987 	 complete events on a read and never a partial event.  */
1988       char *eptr = &inev.buf[0];
1989       ssize_t count;
1990       while (1)
1991 	{
1992 	  /* Check which of the files changed.  */
1993 	  inotify_check_files (to_clear, &inev);
1994 	  count = sizeof (struct inotify_event) + inev.i.len;
1995 	  eptr += count;
1996 	  nb -= count;
1997 	  if (nb >= (ssize_t) sizeof (struct inotify_event))
1998 	    memcpy (&inev, eptr, nb);
1999 	  else
2000 	    break;
2001 	}
2002       continue;
2003     }
2004   /* Actually perform the cache clearing.  */
2005   clear_db_cache (to_clear);
2006   return 0;
2007 }
2008 
2009 #endif
2010 
2011 static void
2012 __attribute__ ((__noreturn__))
main_loop_poll(void)2013 main_loop_poll (void)
2014 {
2015   struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2016 						    * sizeof (conns[0]));
2017 
2018   conns[0].fd = sock;
2019   conns[0].events = POLLRDNORM;
2020   size_t nused = 1;
2021   size_t firstfree = 1;
2022 
2023 #ifdef HAVE_INOTIFY
2024   if (inotify_fd != -1)
2025     {
2026       conns[1].fd = inotify_fd;
2027       conns[1].events = POLLRDNORM;
2028       nused = 2;
2029       firstfree = 2;
2030     }
2031 #endif
2032 
2033 #ifdef HAVE_NETLINK
2034   size_t idx_nl_status_fd = 0;
2035   if (nl_status_fd != -1)
2036     {
2037       idx_nl_status_fd = nused;
2038       conns[nused].fd = nl_status_fd;
2039       conns[nused].events = POLLRDNORM;
2040       ++nused;
2041       firstfree = nused;
2042     }
2043 #endif
2044 
2045   while (1)
2046     {
2047       /* Wait for any event.  We wait at most a couple of seconds so
2048 	 that we can check whether we should close any of the accepted
2049 	 connections since we have not received a request.  */
2050 #define MAX_ACCEPT_TIMEOUT 30
2051 #define MIN_ACCEPT_TIMEOUT 5
2052 #define MAIN_THREAD_TIMEOUT \
2053   (MAX_ACCEPT_TIMEOUT * 1000						      \
2054    - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2055 
2056       int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2057 
2058       time_t now = time (NULL);
2059 
2060       /* If there is a descriptor ready for reading or there is a new
2061 	 connection, process this now.  */
2062       if (n > 0)
2063 	{
2064 	  if (conns[0].revents != 0)
2065 	    {
2066 	      /* We have a new incoming connection.  Accept the connection.  */
2067 	      int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2068 						    SOCK_NONBLOCK));
2069 
2070 	      /* Use the descriptor if we have not reached the limit.  */
2071 	      if (fd >= 0)
2072 		{
2073 		  if (firstfree < nconns)
2074 		    {
2075 		      conns[firstfree].fd = fd;
2076 		      conns[firstfree].events = POLLRDNORM;
2077 		      starttime[firstfree] = now;
2078 		      if (firstfree >= nused)
2079 			nused = firstfree + 1;
2080 
2081 		      do
2082 			++firstfree;
2083 		      while (firstfree < nused && conns[firstfree].fd != -1);
2084 		    }
2085 		  else
2086 		    /* We cannot use the connection so close it.  */
2087 		    close (fd);
2088 		}
2089 
2090 	      --n;
2091 	    }
2092 
2093 	  size_t first = 1;
2094 #ifdef HAVE_INOTIFY
2095 	  if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2096 	    {
2097 	      if (conns[1].revents != 0)
2098 		{
2099 		  int ret;
2100 		  ret = handle_inotify_events ();
2101 		  if (ret == -1)
2102 		    {
2103 		      /* Something went wrong when reading the inotify
2104 			 data.  Better disable inotify.  */
2105 		      dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2106 		      conns[1].fd = -1;
2107 		      firstfree = 1;
2108 		      if (nused == 2)
2109 			nused = 1;
2110 		      close (inotify_fd);
2111 		      inotify_fd = -1;
2112 		    }
2113 		  --n;
2114 		}
2115 
2116 	      first = 2;
2117 	    }
2118 #endif
2119 
2120 #ifdef HAVE_NETLINK
2121 	  if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2122 	    {
2123 	      char buf[4096];
2124 	      /* Read all the data.  We do not interpret it here.  */
2125 	      while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2126 					       sizeof (buf))) != -1)
2127 		;
2128 
2129 	      dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2130 		= __bump_nl_timestamp ();
2131 	    }
2132 #endif
2133 
2134 	  for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2135 	    if (conns[cnt].revents != 0)
2136 	      {
2137 		fd_ready (conns[cnt].fd);
2138 
2139 		/* Clean up the CONNS array.  */
2140 		conns[cnt].fd = -1;
2141 		if (cnt < firstfree)
2142 		  firstfree = cnt;
2143 		if (cnt == nused - 1)
2144 		  do
2145 		    --nused;
2146 		  while (conns[nused - 1].fd == -1);
2147 
2148 		--n;
2149 	      }
2150 	}
2151 
2152       /* Now find entries which have timed out.  */
2153       assert (nused > 0);
2154 
2155       /* We make the timeout length depend on the number of file
2156 	 descriptors currently used.  */
2157 #define ACCEPT_TIMEOUT \
2158   (MAX_ACCEPT_TIMEOUT							      \
2159    - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2160       time_t laststart = now - ACCEPT_TIMEOUT;
2161 
2162       for (size_t cnt = nused - 1; cnt > 0; --cnt)
2163 	{
2164 	  if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2165 	    {
2166 	      /* Remove the entry, it timed out.  */
2167 	      (void) close (conns[cnt].fd);
2168 	      conns[cnt].fd = -1;
2169 
2170 	      if (cnt < firstfree)
2171 		firstfree = cnt;
2172 	      if (cnt == nused - 1)
2173 		do
2174 		  --nused;
2175 		while (conns[nused - 1].fd == -1);
2176 	    }
2177 	}
2178 
2179       if (restart_p (now))
2180 	restart ();
2181     }
2182 }
2183 
2184 
2185 #ifdef HAVE_EPOLL
2186 static void
main_loop_epoll(int efd)2187 main_loop_epoll (int efd)
2188 {
2189   struct epoll_event ev = { 0, };
2190   int nused = 1;
2191   size_t highest = 0;
2192 
2193   /* Add the socket.  */
2194   ev.events = EPOLLRDNORM;
2195   ev.data.fd = sock;
2196   if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2197     /* We cannot use epoll.  */
2198     return;
2199 
2200 # ifdef HAVE_INOTIFY
2201   if (inotify_fd != -1)
2202     {
2203       ev.events = EPOLLRDNORM;
2204       ev.data.fd = inotify_fd;
2205       if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2206 	/* We cannot use epoll.  */
2207 	return;
2208       nused = 2;
2209     }
2210 # endif
2211 
2212 # ifdef HAVE_NETLINK
2213   if (nl_status_fd != -1)
2214     {
2215       ev.events = EPOLLRDNORM;
2216       ev.data.fd = nl_status_fd;
2217       if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2218 	/* We cannot use epoll.  */
2219 	return;
2220     }
2221 # endif
2222 
2223   while (1)
2224     {
2225       struct epoll_event revs[100];
2226 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2227 
2228       int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2229 
2230       time_t now = time (NULL);
2231 
2232       for (int cnt = 0; cnt < n; ++cnt)
2233 	if (revs[cnt].data.fd == sock)
2234 	  {
2235 	    /* A new connection.  */
2236 	    int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2237 						  SOCK_NONBLOCK));
2238 
2239 	    /* Use the descriptor if we have not reached the limit.  */
2240 	    if (fd >= 0)
2241 	      {
2242 		/* Try to add the  new descriptor.  */
2243 		ev.data.fd = fd;
2244 		if (fd >= nconns
2245 		    || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2246 		  /* The descriptor is too large or something went
2247 		     wrong.  Close the descriptor.  */
2248 		  close (fd);
2249 		else
2250 		  {
2251 		    /* Remember when we accepted the connection.  */
2252 		    starttime[fd] = now;
2253 
2254 		    if (fd > highest)
2255 		      highest = fd;
2256 
2257 		    ++nused;
2258 		  }
2259 	      }
2260 	  }
2261 # ifdef HAVE_INOTIFY
2262 	else if (revs[cnt].data.fd == inotify_fd)
2263 	  {
2264 	    int ret;
2265 	    ret = handle_inotify_events ();
2266 	    if (ret == -1)
2267 	      {
2268 		/* Something went wrong when reading the inotify
2269 		   data.  Better disable inotify.  */
2270 		dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2271 		(void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2272 		close (inotify_fd);
2273 		inotify_fd = -1;
2274 		break;
2275 	      }
2276 	  }
2277 # endif
2278 # ifdef HAVE_NETLINK
2279 	else if (revs[cnt].data.fd == nl_status_fd)
2280 	  {
2281 	    char buf[4096];
2282 	    /* Read all the data.  We do not interpret it here.  */
2283 	    while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2284 					     sizeof (buf))) != -1)
2285 	      ;
2286 
2287 	    __bump_nl_timestamp ();
2288 	  }
2289 # endif
2290 	else
2291 	  {
2292 	    /* Remove the descriptor from the epoll descriptor.  */
2293 	    (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2294 
2295 	    /* Get a worker to handle the request.  */
2296 	    fd_ready (revs[cnt].data.fd);
2297 
2298 	    /* Reset the time.  */
2299 	    starttime[revs[cnt].data.fd] = 0;
2300 	    if (revs[cnt].data.fd == highest)
2301 	      do
2302 		--highest;
2303 	      while (highest > 0 && starttime[highest] == 0);
2304 
2305 	    --nused;
2306 	  }
2307 
2308       /*  Now look for descriptors for accepted connections which have
2309 	  no reply in too long of a time.  */
2310       time_t laststart = now - ACCEPT_TIMEOUT;
2311       assert (starttime[sock] == 0);
2312 # ifdef HAVE_INOTIFY
2313       assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2314 # endif
2315       assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2316       for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2317 	if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2318 	  {
2319 	    /* We are waiting for this one for too long.  Close it.  */
2320 	    (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2321 
2322 	    (void) close (cnt);
2323 
2324 	    starttime[cnt] = 0;
2325 	    if (cnt == highest)
2326 	      --highest;
2327 	  }
2328 	else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2329 	  --highest;
2330 
2331       if (restart_p (now))
2332 	restart ();
2333     }
2334 }
2335 #endif
2336 
2337 
2338 /* Start all the threads we want.  The initial process is thread no. 1.  */
2339 void
start_threads(void)2340 start_threads (void)
2341 {
2342   /* Initialize the conditional variable we will use.  The only
2343      non-standard attribute we might use is the clock selection.  */
2344   pthread_condattr_t condattr;
2345   pthread_condattr_init (&condattr);
2346 
2347 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2348     && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2349   /* Determine whether the monotonous clock is available.  */
2350   struct timespec dummy;
2351 # if _POSIX_MONOTONIC_CLOCK == 0
2352   if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2353 # endif
2354 # if _POSIX_CLOCK_SELECTION == 0
2355     if (sysconf (_SC_CLOCK_SELECTION) > 0)
2356 # endif
2357       if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2358 	  && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2359 	timeout_clock = CLOCK_MONOTONIC;
2360 #endif
2361 
2362   /* Create the attribute for the threads.  They are all created
2363      detached.  */
2364   pthread_attr_init (&attr);
2365   pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2366   /* Use 1MB stacks, twice as much for 64-bit architectures.  */
2367   pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2368 
2369   /* We allow less than LASTDB threads only for debugging.  */
2370   if (debug_level == 0)
2371     nthreads = MAX (nthreads, lastdb);
2372 
2373   /* Create the threads which prune the databases.  */
2374   // XXX Ideally this work would be done by some of the worker threads.
2375   // XXX But this is problematic since we would need to be able to wake
2376   // XXX them up explicitly as well as part of the group handling the
2377   // XXX ready-list.  This requires an operation where we can wait on
2378   // XXX two conditional variables at the same time.  This operation
2379   // XXX does not exist (yet).
2380   for (long int i = 0; i < lastdb; ++i)
2381     {
2382       /* Initialize the conditional variable.  */
2383       if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2384 	{
2385 	  dbg_log (_("could not initialize conditional variable"));
2386 	  do_exit (1, 0, NULL);
2387 	}
2388 
2389       pthread_t th;
2390       if (dbs[i].enabled
2391 	  && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2392 	{
2393 	  dbg_log (_("could not start clean-up thread; terminating"));
2394 	  do_exit (1, 0, NULL);
2395 	}
2396     }
2397 
2398   pthread_condattr_destroy (&condattr);
2399 
2400   for (long int i = 0; i < nthreads; ++i)
2401     {
2402       pthread_t th;
2403       if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2404 	{
2405 	  if (i == 0)
2406 	    {
2407 	      dbg_log (_("could not start any worker thread; terminating"));
2408 	      do_exit (1, 0, NULL);
2409 	    }
2410 
2411 	  break;
2412 	}
2413     }
2414 
2415   /* Now it is safe to let the parent know that we're doing fine and it can
2416      exit.  */
2417   notify_parent (0);
2418 
2419   /* Determine how much room for descriptors we should initially
2420      allocate.  This might need to change later if we cap the number
2421      with MAXCONN.  */
2422   const long int nfds = sysconf (_SC_OPEN_MAX);
2423 #define MINCONN 32
2424 #define MAXCONN 16384
2425   if (nfds == -1 || nfds > MAXCONN)
2426     nconns = MAXCONN;
2427   else if (nfds < MINCONN)
2428     nconns = MINCONN;
2429   else
2430     nconns = nfds;
2431 
2432   /* We need memory to pass descriptors on to the worker threads.  */
2433   fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2434   /* Array to keep track when connection was accepted.  */
2435   starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2436 
2437   /* In the main thread we execute the loop which handles incoming
2438      connections.  */
2439 #ifdef HAVE_EPOLL
2440   int efd = epoll_create (100);
2441   if (efd != -1)
2442     {
2443       main_loop_epoll (efd);
2444       close (efd);
2445     }
2446 #endif
2447 
2448   main_loop_poll ();
2449 }
2450 
2451 
2452 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2453    this function is called, we are not listening on the nscd socket yet so
2454    we can just use the ordinary lookup functions without causing a lockup  */
2455 static void
begin_drop_privileges(void)2456 begin_drop_privileges (void)
2457 {
2458   struct passwd *pwd = getpwnam (server_user);
2459 
2460   if (pwd == NULL)
2461     {
2462       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2463       do_exit (EXIT_FAILURE, 0,
2464 	       _("Failed to run nscd as user '%s'"), server_user);
2465     }
2466 
2467   server_uid = pwd->pw_uid;
2468   server_gid = pwd->pw_gid;
2469 
2470   /* Save the old UID/GID if we have to change back.  */
2471   if (paranoia)
2472     {
2473       old_uid = getuid ();
2474       old_gid = getgid ();
2475     }
2476 
2477   if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2478     {
2479       /* This really must never happen.  */
2480       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2481       do_exit (EXIT_FAILURE, errno,
2482 	       _("initial getgrouplist failed"));
2483     }
2484 
2485   server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2486 
2487   if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2488       == -1)
2489     {
2490       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2491       do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2492     }
2493 }
2494 
2495 
2496 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2497    run nscd as the user specified in the configuration file.  */
2498 static void
finish_drop_privileges(void)2499 finish_drop_privileges (void)
2500 {
2501 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2502   /* We need to preserve the capabilities to connect to the audit daemon.  */
2503   cap_t new_caps = preserve_capabilities ();
2504 #endif
2505 
2506   if (setgroups (server_ngroups, server_groups) == -1)
2507     {
2508       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2509       do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2510     }
2511 
2512   int res;
2513   if (paranoia)
2514     res = setresgid (server_gid, server_gid, old_gid);
2515   else
2516     res = setgid (server_gid);
2517   if (res == -1)
2518     {
2519       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2520       do_exit (4, errno, "setgid");
2521     }
2522 
2523   if (paranoia)
2524     res = setresuid (server_uid, server_uid, old_uid);
2525   else
2526     res = setuid (server_uid);
2527   if (res == -1)
2528     {
2529       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2530       do_exit (4, errno, "setuid");
2531     }
2532 
2533 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2534   /* Remove the temporary capabilities.  */
2535   install_real_capabilities (new_caps);
2536 #endif
2537 }
2538