1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <https://www.gnu.org/licenses/>. */
17
18 #include <alloca.h>
19 #include <assert.h>
20 #include <atomic.h>
21 #include <error.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <grp.h>
25 #include <ifaddrs.h>
26 #include <libintl.h>
27 #include <pthread.h>
28 #include <pwd.h>
29 #include <resolv.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <stdint.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_NETLINK
36 # include <linux/netlink.h>
37 # include <linux/rtnetlink.h>
38 #endif
39 #ifdef HAVE_EPOLL
40 # include <sys/epoll.h>
41 #endif
42 #ifdef HAVE_INOTIFY
43 # include <sys/inotify.h>
44 #endif
45 #include <sys/mman.h>
46 #include <sys/param.h>
47 #include <sys/poll.h>
48 #include <sys/socket.h>
49 #include <sys/stat.h>
50 #include <sys/un.h>
51
52 #include "nscd.h"
53 #include "dbg_log.h"
54 #include "selinux.h"
55 #include <resolv/resolv.h>
56
57 #include <kernel-features.h>
58 #include <libc-diag.h>
59
60
61 /* Support to run nscd as an unprivileged user */
62 const char *server_user;
63 static uid_t server_uid;
64 static gid_t server_gid;
65 const char *stat_user;
66 uid_t stat_uid;
67 static gid_t *server_groups;
68 #ifndef NGROUPS
69 # define NGROUPS 32
70 #endif
71 static int server_ngroups;
72
73 static pthread_attr_t attr;
74
75 static void begin_drop_privileges (void);
76 static void finish_drop_privileges (void);
77
78 /* Map request type to a string. */
79 const char *const serv2str[LASTREQ] =
80 {
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV",
100 [GETNETGRENT] = "GETNETGRENT",
101 [INNETGR] = "INNETGR",
102 [GETFDNETGR] = "GETFDNETGR"
103 };
104
105 #ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
106 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107 #else
108 # define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
109 #endif
110
111 /* The control data structures for the services. */
112 struct database_dyn dbs[lastdb] =
113 {
114 [pwddb] = {
115 .lock = RWLOCK_INITIALIZER,
116 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
117 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
118 .enabled = 0,
119 .check_file = 1,
120 .persistent = 0,
121 .propagate = 1,
122 .shared = 0,
123 .max_db_size = DEFAULT_MAX_DB_SIZE,
124 .suggested_module = DEFAULT_SUGGESTED_MODULE,
125 .db_filename = _PATH_NSCD_PASSWD_DB,
126 .disabled_iov = &pwd_iov_disabled,
127 .postimeout = 3600,
128 .negtimeout = 20,
129 .wr_fd = -1,
130 .ro_fd = -1,
131 .mmap_used = false
132 },
133 [grpdb] = {
134 .lock = RWLOCK_INITIALIZER,
135 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
136 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
137 .enabled = 0,
138 .check_file = 1,
139 .persistent = 0,
140 .propagate = 1,
141 .shared = 0,
142 .max_db_size = DEFAULT_MAX_DB_SIZE,
143 .suggested_module = DEFAULT_SUGGESTED_MODULE,
144 .db_filename = _PATH_NSCD_GROUP_DB,
145 .disabled_iov = &grp_iov_disabled,
146 .postimeout = 3600,
147 .negtimeout = 60,
148 .wr_fd = -1,
149 .ro_fd = -1,
150 .mmap_used = false
151 },
152 [hstdb] = {
153 .lock = RWLOCK_INITIALIZER,
154 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
155 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
156 .enabled = 0,
157 .check_file = 1,
158 .persistent = 0,
159 .propagate = 0, /* Not used. */
160 .shared = 0,
161 .max_db_size = DEFAULT_MAX_DB_SIZE,
162 .suggested_module = DEFAULT_SUGGESTED_MODULE,
163 .db_filename = _PATH_NSCD_HOSTS_DB,
164 .disabled_iov = &hst_iov_disabled,
165 .postimeout = 3600,
166 .negtimeout = 20,
167 .wr_fd = -1,
168 .ro_fd = -1,
169 .mmap_used = false
170 },
171 [servdb] = {
172 .lock = RWLOCK_INITIALIZER,
173 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
174 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
175 .enabled = 0,
176 .check_file = 1,
177 .persistent = 0,
178 .propagate = 0, /* Not used. */
179 .shared = 0,
180 .max_db_size = DEFAULT_MAX_DB_SIZE,
181 .suggested_module = DEFAULT_SUGGESTED_MODULE,
182 .db_filename = _PATH_NSCD_SERVICES_DB,
183 .disabled_iov = &serv_iov_disabled,
184 .postimeout = 28800,
185 .negtimeout = 20,
186 .wr_fd = -1,
187 .ro_fd = -1,
188 .mmap_used = false
189 },
190 [netgrdb] = {
191 .lock = RWLOCK_INITIALIZER,
192 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
193 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
194 .enabled = 0,
195 .check_file = 1,
196 .persistent = 0,
197 .propagate = 0, /* Not used. */
198 .shared = 0,
199 .max_db_size = DEFAULT_MAX_DB_SIZE,
200 .suggested_module = DEFAULT_SUGGESTED_MODULE,
201 .db_filename = _PATH_NSCD_NETGROUP_DB,
202 .disabled_iov = &netgroup_iov_disabled,
203 .postimeout = 28800,
204 .negtimeout = 20,
205 .wr_fd = -1,
206 .ro_fd = -1,
207 .mmap_used = false
208 }
209 };
210
211
212 /* Mapping of request type to database. */
213 static struct
214 {
215 bool data_request;
216 struct database_dyn *db;
217 } const reqinfo[LASTREQ] =
218 {
219 [GETPWBYNAME] = { true, &dbs[pwddb] },
220 [GETPWBYUID] = { true, &dbs[pwddb] },
221 [GETGRBYNAME] = { true, &dbs[grpdb] },
222 [GETGRBYGID] = { true, &dbs[grpdb] },
223 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
224 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
225 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
226 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
227 [SHUTDOWN] = { false, NULL },
228 [GETSTAT] = { false, NULL },
229 [GETFDPW] = { false, &dbs[pwddb] },
230 [GETFDGR] = { false, &dbs[grpdb] },
231 [GETFDHST] = { false, &dbs[hstdb] },
232 [GETAI] = { true, &dbs[hstdb] },
233 [INITGROUPS] = { true, &dbs[grpdb] },
234 [GETSERVBYNAME] = { true, &dbs[servdb] },
235 [GETSERVBYPORT] = { true, &dbs[servdb] },
236 [GETFDSERV] = { false, &dbs[servdb] },
237 [GETNETGRENT] = { true, &dbs[netgrdb] },
238 [INNETGR] = { true, &dbs[netgrdb] },
239 [GETFDNETGR] = { false, &dbs[netgrdb] }
240 };
241
242
243 /* Initial number of threads to use. */
244 int nthreads = -1;
245 /* Maximum number of threads to use. */
246 int max_nthreads = 32;
247
248 /* Socket for incoming connections. */
249 static int sock;
250
251 #ifdef HAVE_INOTIFY
252 /* Inotify descriptor. */
253 int inotify_fd = -1;
254 #endif
255
256 #ifdef HAVE_NETLINK
257 /* Descriptor for netlink status updates. */
258 static int nl_status_fd = -1;
259 #endif
260
261 /* Number of times clients had to wait. */
262 unsigned long int client_queued;
263
264
265 ssize_t
writeall(int fd,const void * buf,size_t len)266 writeall (int fd, const void *buf, size_t len)
267 {
268 size_t n = len;
269 ssize_t ret;
270 do
271 {
272 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
273 if (ret <= 0)
274 break;
275 buf = (const char *) buf + ret;
276 n -= ret;
277 }
278 while (n > 0);
279 return ret < 0 ? ret : len - n;
280 }
281
282
283 enum usekey
284 {
285 use_not = 0,
286 /* The following three are not really used, they are symbolic constants. */
287 use_first = 16,
288 use_begin = 32,
289 use_end = 64,
290
291 use_he = 1,
292 use_he_begin = use_he | use_begin,
293 use_he_end = use_he | use_end,
294 use_data = 3,
295 use_data_begin = use_data | use_begin,
296 use_data_end = use_data | use_end,
297 use_data_first = use_data_begin | use_first
298 };
299
300
301 static int
check_use(const char * data,nscd_ssize_t first_free,uint8_t * usemap,enum usekey use,ref_t start,size_t len)302 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
303 enum usekey use, ref_t start, size_t len)
304 {
305 if (len < 2)
306 return 0;
307
308 if (start > first_free || start + len > first_free
309 || (start & BLOCK_ALIGN_M1))
310 return 0;
311
312 if (usemap[start] == use_not)
313 {
314 /* Add the start marker. */
315 usemap[start] = use | use_begin;
316 use &= ~use_first;
317
318 while (--len > 0)
319 if (usemap[++start] != use_not)
320 return 0;
321 else
322 usemap[start] = use;
323
324 /* Add the end marker. */
325 usemap[start] = use | use_end;
326 }
327 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
328 {
329 /* Hash entries can't be shared. */
330 if (use == use_he)
331 return 0;
332
333 usemap[start] |= (use & use_first);
334 use &= ~use_first;
335
336 while (--len > 1)
337 if (usemap[++start] != use)
338 return 0;
339
340 if (usemap[++start] != (use | use_end))
341 return 0;
342 }
343 else
344 /* Points to a wrong object or somewhere in the middle. */
345 return 0;
346
347 return 1;
348 }
349
350
351 /* Verify data in persistent database. */
352 static int
verify_persistent_db(void * mem,struct database_pers_head * readhead,int dbnr)353 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
354 {
355 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
356 || dbnr == netgrdb);
357
358 time_t now = time (NULL);
359
360 struct database_pers_head *head = mem;
361 struct database_pers_head head_copy = *head;
362
363 /* Check that the header that was read matches the head in the database. */
364 if (memcmp (head, readhead, sizeof (*head)) != 0)
365 return 0;
366
367 /* First some easy tests: make sure the database header is sane. */
368 if (head->version != DB_VERSION
369 || head->header_size != sizeof (*head)
370 /* We allow a timestamp to be one hour ahead of the current time.
371 This should cover daylight saving time changes. */
372 || head->timestamp > now + 60 * 60 + 60
373 || (head->gc_cycle & 1)
374 || head->module == 0
375 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
376 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
377 || head->first_free < 0
378 || head->first_free > head->data_size
379 || (head->first_free & BLOCK_ALIGN_M1) != 0
380 || head->maxnentries < 0
381 || head->maxnsearched < 0)
382 return 0;
383
384 uint8_t *usemap = calloc (head->first_free, 1);
385 if (usemap == NULL)
386 return 0;
387
388 const char *data = (char *) &head->array[roundup (head->module,
389 ALIGN / sizeof (ref_t))];
390
391 nscd_ssize_t he_cnt = 0;
392 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
393 {
394 ref_t trail = head->array[cnt];
395 ref_t work = trail;
396 int tick = 0;
397
398 while (work != ENDREF)
399 {
400 if (! check_use (data, head->first_free, usemap, use_he, work,
401 sizeof (struct hashentry)))
402 goto fail;
403
404 /* Now we know we can dereference the record. */
405 struct hashentry *here = (struct hashentry *) (data + work);
406
407 ++he_cnt;
408
409 /* Make sure the record is for this type of service. */
410 if (here->type >= LASTREQ
411 || reqinfo[here->type].db != &dbs[dbnr])
412 goto fail;
413
414 /* Validate boolean field value. */
415 if (here->first != false && here->first != true)
416 goto fail;
417
418 if (here->len < 0)
419 goto fail;
420
421 /* Now the data. */
422 if (here->packet < 0
423 || here->packet > head->first_free
424 || here->packet + sizeof (struct datahead) > head->first_free)
425 goto fail;
426
427 struct datahead *dh = (struct datahead *) (data + here->packet);
428
429 if (! check_use (data, head->first_free, usemap,
430 use_data | (here->first ? use_first : 0),
431 here->packet, dh->allocsize))
432 goto fail;
433
434 if (dh->allocsize < sizeof (struct datahead)
435 || dh->recsize > dh->allocsize
436 || (dh->notfound != false && dh->notfound != true)
437 || (dh->usable != false && dh->usable != true))
438 goto fail;
439
440 if (here->key < here->packet + sizeof (struct datahead)
441 || here->key > here->packet + dh->allocsize
442 || here->key + here->len > here->packet + dh->allocsize)
443 goto fail;
444
445 work = here->next;
446
447 if (work == trail)
448 /* A circular list, this must not happen. */
449 goto fail;
450 if (tick)
451 trail = ((struct hashentry *) (data + trail))->next;
452 tick = 1 - tick;
453 }
454 }
455
456 if (he_cnt != head->nentries)
457 goto fail;
458
459 /* See if all data and keys had at least one reference from
460 he->first == true hashentry. */
461 for (ref_t idx = 0; idx < head->first_free; ++idx)
462 {
463 if (usemap[idx] == use_data_begin)
464 goto fail;
465 }
466
467 /* Finally, make sure the database hasn't changed since the first test. */
468 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
469 goto fail;
470
471 free (usemap);
472 return 1;
473
474 fail:
475 free (usemap);
476 return 0;
477 }
478
479
480 /* Initialize database information structures. */
481 void
nscd_init(void)482 nscd_init (void)
483 {
484 /* Look up unprivileged uid/gid/groups before we start listening on the
485 socket */
486 if (server_user != NULL)
487 begin_drop_privileges ();
488
489 if (nthreads == -1)
490 /* No configuration for this value, assume a default. */
491 nthreads = 4;
492
493 for (size_t cnt = 0; cnt < lastdb; ++cnt)
494 if (dbs[cnt].enabled)
495 {
496 pthread_rwlock_init (&dbs[cnt].lock, NULL);
497 pthread_mutex_init (&dbs[cnt].memlock, NULL);
498
499 if (dbs[cnt].persistent)
500 {
501 /* Try to open the appropriate file on disk. */
502 int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
503 if (fd != -1)
504 {
505 char *msg = NULL;
506 struct stat64 st;
507 void *mem;
508 size_t total;
509 struct database_pers_head head;
510 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
511 sizeof (head)));
512 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
513 {
514 fail_db_errno:
515 /* The code is single-threaded at this point so
516 using strerror is just fine. */
517 msg = strerror (errno);
518 fail_db:
519 dbg_log (_("invalid persistent database file \"%s\": %s"),
520 dbs[cnt].db_filename, msg);
521 unlink (dbs[cnt].db_filename);
522 }
523 else if (head.module == 0 && head.data_size == 0)
524 {
525 /* The file has been created, but the head has not
526 been initialized yet. */
527 msg = _("uninitialized header");
528 goto fail_db;
529 }
530 else if (head.header_size != (int) sizeof (head))
531 {
532 msg = _("header size does not match");
533 goto fail_db;
534 }
535 else if ((total = (sizeof (head)
536 + roundup (head.module * sizeof (ref_t),
537 ALIGN)
538 + head.data_size))
539 > st.st_size
540 || total < sizeof (head))
541 {
542 msg = _("file size does not match");
543 goto fail_db;
544 }
545 /* Note we map with the maximum size allowed for the
546 database. This is likely much larger than the
547 actual file size. This is OK on most OSes since
548 extensions of the underlying file will
549 automatically translate more pages available for
550 memory access. */
551 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
552 PROT_READ | PROT_WRITE,
553 MAP_SHARED, fd, 0))
554 == MAP_FAILED)
555 goto fail_db_errno;
556 else if (!verify_persistent_db (mem, &head, cnt))
557 {
558 munmap (mem, total);
559 msg = _("verification failed");
560 goto fail_db;
561 }
562 else
563 {
564 /* Success. We have the database. */
565 dbs[cnt].head = mem;
566 dbs[cnt].memsize = total;
567 dbs[cnt].data = (char *)
568 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
569 ALIGN / sizeof (ref_t))];
570 dbs[cnt].mmap_used = true;
571
572 if (dbs[cnt].suggested_module > head.module)
573 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
574 dbnames[cnt]);
575
576 dbs[cnt].wr_fd = fd;
577 fd = -1;
578 /* We also need a read-only descriptor. */
579 if (dbs[cnt].shared)
580 {
581 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
582 O_RDONLY | O_CLOEXEC);
583 if (dbs[cnt].ro_fd == -1)
584 dbg_log (_("\
585 cannot create read-only descriptor for \"%s\"; no mmap"),
586 dbs[cnt].db_filename);
587 }
588
589 // XXX Shall we test whether the descriptors actually
590 // XXX point to the same file?
591 }
592
593 /* Close the file descriptors in case something went
594 wrong in which case the variable have not been
595 assigned -1. */
596 if (fd != -1)
597 close (fd);
598 }
599 else if (errno == EACCES)
600 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
601 dbs[cnt].db_filename);
602 }
603
604 if (dbs[cnt].head == NULL)
605 {
606 /* No database loaded. Allocate the data structure,
607 possibly on disk. */
608 struct database_pers_head head;
609 size_t total = (sizeof (head)
610 + roundup (dbs[cnt].suggested_module
611 * sizeof (ref_t), ALIGN)
612 + (dbs[cnt].suggested_module
613 * DEFAULT_DATASIZE_PER_BUCKET));
614
615 /* Try to create the database. If we do not need a
616 persistent database create a temporary file. */
617 int fd;
618 int ro_fd = -1;
619 if (dbs[cnt].persistent)
620 {
621 fd = open (dbs[cnt].db_filename,
622 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
623 S_IRUSR | S_IWUSR);
624 if (fd != -1 && dbs[cnt].shared)
625 ro_fd = open (dbs[cnt].db_filename,
626 O_RDONLY | O_CLOEXEC);
627 }
628 else
629 {
630 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
631 fd = mkostemp (fname, O_CLOEXEC);
632
633 /* We do not need the file name anymore after we
634 opened another file descriptor in read-only mode. */
635 if (fd != -1)
636 {
637 if (dbs[cnt].shared)
638 ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
639
640 unlink (fname);
641 }
642 }
643
644 if (fd == -1)
645 {
646 if (errno == EEXIST)
647 {
648 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
649 dbnames[cnt], dbs[cnt].db_filename);
650 do_exit (1, 0, NULL);
651 }
652
653 if (dbs[cnt].persistent)
654 dbg_log (_("cannot create %s; no persistent database used"),
655 dbs[cnt].db_filename);
656 else
657 dbg_log (_("cannot create %s; no sharing possible"),
658 dbs[cnt].db_filename);
659
660 dbs[cnt].persistent = 0;
661 // XXX remember: no mmap
662 }
663 else
664 {
665 /* Tell the user if we could not create the read-only
666 descriptor. */
667 if (ro_fd == -1 && dbs[cnt].shared)
668 dbg_log (_("\
669 cannot create read-only descriptor for \"%s\"; no mmap"),
670 dbs[cnt].db_filename);
671
672 /* Before we create the header, initialize the hash
673 table. That way if we get interrupted while writing
674 the header we can recognize a partially initialized
675 database. */
676 size_t ps = sysconf (_SC_PAGESIZE);
677 char tmpbuf[ps];
678 assert (~ENDREF == 0);
679 memset (tmpbuf, '\xff', ps);
680
681 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
682 off_t offset = sizeof (head);
683
684 size_t towrite;
685 if (offset % ps != 0)
686 {
687 towrite = MIN (remaining, ps - (offset % ps));
688 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
689 goto write_fail;
690 offset += towrite;
691 remaining -= towrite;
692 }
693
694 while (remaining > ps)
695 {
696 if (pwrite (fd, tmpbuf, ps, offset) == -1)
697 goto write_fail;
698 offset += ps;
699 remaining -= ps;
700 }
701
702 if (remaining > 0
703 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
704 goto write_fail;
705
706 /* Create the header of the file. */
707 struct database_pers_head head =
708 {
709 .version = DB_VERSION,
710 .header_size = sizeof (head),
711 .module = dbs[cnt].suggested_module,
712 .data_size = (dbs[cnt].suggested_module
713 * DEFAULT_DATASIZE_PER_BUCKET),
714 .first_free = 0
715 };
716 void *mem;
717
718 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
719 != sizeof (head))
720 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
721 != 0)
722 || (mem = mmap (NULL, dbs[cnt].max_db_size,
723 PROT_READ | PROT_WRITE,
724 MAP_SHARED, fd, 0)) == MAP_FAILED)
725 {
726 write_fail:
727 unlink (dbs[cnt].db_filename);
728 dbg_log (_("cannot write to database file %s: %s"),
729 dbs[cnt].db_filename, strerror (errno));
730 dbs[cnt].persistent = 0;
731 }
732 else
733 {
734 /* Success. */
735 dbs[cnt].head = mem;
736 dbs[cnt].data = (char *)
737 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
738 ALIGN / sizeof (ref_t))];
739 dbs[cnt].memsize = total;
740 dbs[cnt].mmap_used = true;
741
742 /* Remember the descriptors. */
743 dbs[cnt].wr_fd = fd;
744 dbs[cnt].ro_fd = ro_fd;
745 fd = -1;
746 ro_fd = -1;
747 }
748
749 if (fd != -1)
750 close (fd);
751 if (ro_fd != -1)
752 close (ro_fd);
753 }
754 }
755
756 if (dbs[cnt].head == NULL)
757 {
758 /* We do not use the persistent database. Just
759 create an in-memory data structure. */
760 assert (! dbs[cnt].persistent);
761
762 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
763 + (dbs[cnt].suggested_module
764 * sizeof (ref_t)));
765 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
766 assert (~ENDREF == 0);
767 memset (dbs[cnt].head->array, '\xff',
768 dbs[cnt].suggested_module * sizeof (ref_t));
769 dbs[cnt].head->module = dbs[cnt].suggested_module;
770 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
771 * dbs[cnt].head->module);
772 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
773 dbs[cnt].head->first_free = 0;
774
775 dbs[cnt].shared = 0;
776 assert (dbs[cnt].ro_fd == -1);
777 }
778 }
779
780 /* Create the socket. */
781 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
782 if (sock < 0)
783 {
784 dbg_log (_("cannot open socket: %s"), strerror (errno));
785 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
786 }
787 /* Bind a name to the socket. */
788 struct sockaddr_un sock_addr;
789 sock_addr.sun_family = AF_UNIX;
790 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
791 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
792 {
793 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
794 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
795 }
796
797 /* Set permissions for the socket. */
798 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
799
800 /* Set the socket up to accept connections. */
801 if (listen (sock, SOMAXCONN) < 0)
802 {
803 dbg_log (_("cannot enable socket to accept connections: %s"),
804 strerror (errno));
805 do_exit (1, 0, NULL);
806 }
807
808 #ifdef HAVE_NETLINK
809 if (dbs[hstdb].enabled)
810 {
811 /* Try to open netlink socket to monitor network setting changes. */
812 nl_status_fd = socket (AF_NETLINK,
813 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
814 NETLINK_ROUTE);
815 if (nl_status_fd != -1)
816 {
817 struct sockaddr_nl snl;
818 memset (&snl, '\0', sizeof (snl));
819 snl.nl_family = AF_NETLINK;
820 /* XXX Is this the best set to use? */
821 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
822 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
823 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
824 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
825 | RTMGRP_IPV6_PREFIX);
826
827 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
828 {
829 close (nl_status_fd);
830 nl_status_fd = -1;
831 }
832 else
833 {
834 /* Start the timestamp process. */
835 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
836 = __bump_nl_timestamp ();
837 }
838 }
839 }
840 #endif
841
842 /* Change to unprivileged uid/gid/groups if specified in config file */
843 if (server_user != NULL)
844 finish_drop_privileges ();
845 }
846
847 #ifdef HAVE_INOTIFY
848 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
849 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
850 void
install_watches(struct traced_file * finfo)851 install_watches (struct traced_file *finfo)
852 {
853 /* Use inotify support if we have it. */
854 if (finfo->inotify_descr[TRACED_FILE] < 0)
855 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
856 finfo->fname,
857 TRACED_FILE_MASK);
858 if (finfo->inotify_descr[TRACED_FILE] < 0)
859 {
860 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
861 finfo->fname, strerror (errno));
862 return;
863 }
864 dbg_log (_("monitoring file `%s` (%d)"),
865 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
866 /* Additionally listen for events in the file's parent directory.
867 We do this because the file to be watched might be
868 deleted and then added back again. When it is added back again
869 we must re-add the watch. We must also cover IN_MOVED_TO to
870 detect a file being moved into the directory. */
871 if (finfo->inotify_descr[TRACED_DIR] < 0)
872 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
873 finfo->dname,
874 TRACED_DIR_MASK);
875 if (finfo->inotify_descr[TRACED_DIR] < 0)
876 {
877 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
878 finfo->fname, strerror (errno));
879 return;
880 }
881 dbg_log (_("monitoring directory `%s` (%d)"),
882 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
883 }
884 #endif
885
886 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
887
888 We support registering multiple files per database. Each call to
889 register_traced_file adds to the list of registered files.
890
891 When we prune the database, either through timeout or a request to
892 invalidate, we will check to see if any of the registered files has changed.
893 When we accept new connections to handle a cache request we will also
894 check to see if any of the registered files has changed.
895
896 If we have inotify support then we install an inotify fd to notify us of
897 file deletion or modification, both of which will require we invalidate
898 the cache for the database. Without inotify support we stat the file and
899 store st_mtime to determine if the file has been modified. */
900 void
register_traced_file(size_t dbidx,struct traced_file * finfo)901 register_traced_file (size_t dbidx, struct traced_file *finfo)
902 {
903 /* If the database is disabled or file checking is disabled
904 then ignore the registration. */
905 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
906 return;
907
908 if (__glibc_unlikely (debug_level > 0))
909 dbg_log (_("monitoring file %s for database %s"),
910 finfo->fname, dbnames[dbidx]);
911
912 #ifdef HAVE_INOTIFY
913 install_watches (finfo);
914 #endif
915 struct stat64 st;
916 if (stat64 (finfo->fname, &st) < 0)
917 {
918 /* We cannot stat() the file. Set mtime to zero and try again later. */
919 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
920 finfo->fname, strerror (errno));
921 finfo->mtime = 0;
922 }
923 else
924 finfo->mtime = st.st_mtime;
925
926 /* Queue up the file name. */
927 finfo->next = dbs[dbidx].traced_files;
928 dbs[dbidx].traced_files = finfo;
929 }
930
931
932 /* Close the connections. */
933 void
close_sockets(void)934 close_sockets (void)
935 {
936 close (sock);
937 }
938
939
940 static void
invalidate_cache(char * key,int fd)941 invalidate_cache (char *key, int fd)
942 {
943 dbtype number;
944 int32_t resp;
945
946 for (number = pwddb; number < lastdb; ++number)
947 if (strcmp (key, dbnames[number]) == 0)
948 {
949 struct traced_file *runp = dbs[number].traced_files;
950 while (runp != NULL)
951 {
952 /* Make sure we reload from file when checking mtime. */
953 runp->mtime = 0;
954 #ifdef HAVE_INOTIFY
955 /* During an invalidation we try to reload the traced
956 file watches. This allows the user to re-sync if
957 inotify events were lost. Similar to what we do during
958 pruning. */
959 install_watches (runp);
960 #endif
961 if (runp->call_res_init)
962 {
963 res_init ();
964 break;
965 }
966 runp = runp->next;
967 }
968 break;
969 }
970
971 if (number == lastdb)
972 {
973 resp = EINVAL;
974 writeall (fd, &resp, sizeof (resp));
975 return;
976 }
977
978 if (dbs[number].enabled)
979 {
980 pthread_mutex_lock (&dbs[number].prune_run_lock);
981 prune_cache (&dbs[number], LONG_MAX, fd);
982 pthread_mutex_unlock (&dbs[number].prune_run_lock);
983 }
984 else
985 {
986 resp = 0;
987 writeall (fd, &resp, sizeof (resp));
988 }
989 }
990
991
992 #ifdef SCM_RIGHTS
993 static void
send_ro_fd(struct database_dyn * db,char * key,int fd)994 send_ro_fd (struct database_dyn *db, char *key, int fd)
995 {
996 /* If we do not have an read-only file descriptor do nothing. */
997 if (db->ro_fd == -1)
998 return;
999
1000 /* We need to send some data along with the descriptor. */
1001 uint64_t mapsize = (db->head->data_size
1002 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1003 + sizeof (struct database_pers_head));
1004 struct iovec iov[2];
1005 iov[0].iov_base = key;
1006 iov[0].iov_len = strlen (key) + 1;
1007 iov[1].iov_base = &mapsize;
1008 iov[1].iov_len = sizeof (mapsize);
1009
1010 /* Prepare the control message to transfer the descriptor. */
1011 union
1012 {
1013 struct cmsghdr hdr;
1014 char bytes[CMSG_SPACE (sizeof (int))];
1015 } buf;
1016 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1017 .msg_control = buf.bytes,
1018 .msg_controllen = sizeof (buf) };
1019 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1020
1021 cmsg->cmsg_level = SOL_SOCKET;
1022 cmsg->cmsg_type = SCM_RIGHTS;
1023 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1024
1025 int *ip = (int *) CMSG_DATA (cmsg);
1026 *ip = db->ro_fd;
1027
1028 msg.msg_controllen = cmsg->cmsg_len;
1029
1030 /* Send the control message. We repeat when we are interrupted but
1031 everything else is ignored. */
1032 #ifndef MSG_NOSIGNAL
1033 # define MSG_NOSIGNAL 0
1034 #endif
1035 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1036
1037 if (__glibc_unlikely (debug_level > 0))
1038 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1039 }
1040 #endif /* SCM_RIGHTS */
1041
1042
1043 /* Handle new request. */
1044 static void
handle_request(int fd,request_header * req,void * key,uid_t uid,pid_t pid)1045 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1046 {
1047 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1048 {
1049 if (debug_level > 0)
1050 dbg_log (_("\
1051 cannot handle old request version %d; current version is %d"),
1052 req->version, NSCD_VERSION);
1053 return;
1054 }
1055
1056 /* Perform the SELinux check before we go on to the standard checks. */
1057 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1058 {
1059 if (debug_level > 0)
1060 {
1061 #ifdef SO_PEERCRED
1062 char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1063 # ifdef PATH_MAX
1064 char buf[PATH_MAX];
1065 # else
1066 char buf[4096];
1067 # endif
1068
1069 snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1070 ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
1071
1072 if (n <= 0)
1073 dbg_log (_("\
1074 request from %ld not handled due to missing permission"), (long int) pid);
1075 else
1076 {
1077 buf[n] = '\0';
1078 dbg_log (_("\
1079 request from '%s' [%ld] not handled due to missing permission"),
1080 buf, (long int) pid);
1081 }
1082 #else
1083 dbg_log (_("request not handled due to missing permission"));
1084 #endif
1085 }
1086 return;
1087 }
1088
1089 struct database_dyn *db = reqinfo[req->type].db;
1090
1091 /* See whether we can service the request from the cache. */
1092 if (__builtin_expect (reqinfo[req->type].data_request, true))
1093 {
1094 if (__builtin_expect (debug_level, 0) > 0)
1095 {
1096 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1097 {
1098 char buf[INET6_ADDRSTRLEN];
1099
1100 dbg_log ("\t%s (%s)", serv2str[req->type],
1101 inet_ntop (req->type == GETHOSTBYADDR
1102 ? AF_INET : AF_INET6,
1103 key, buf, sizeof (buf)));
1104 }
1105 else
1106 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1107 }
1108
1109 /* Is this service enabled? */
1110 if (__glibc_unlikely (!db->enabled))
1111 {
1112 /* No, sent the prepared record. */
1113 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1114 db->disabled_iov->iov_len,
1115 MSG_NOSIGNAL))
1116 != (ssize_t) db->disabled_iov->iov_len
1117 && __builtin_expect (debug_level, 0) > 0)
1118 {
1119 /* We have problems sending the result. */
1120 char buf[256];
1121 dbg_log (_("cannot write result: %s"),
1122 strerror_r (errno, buf, sizeof (buf)));
1123 }
1124
1125 return;
1126 }
1127
1128 /* Be sure we can read the data. */
1129 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1130 {
1131 ++db->head->rdlockdelayed;
1132 pthread_rwlock_rdlock (&db->lock);
1133 }
1134
1135 /* See whether we can handle it from the cache. */
1136 struct datahead *cached;
1137 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1138 db, uid);
1139 if (cached != NULL)
1140 {
1141 /* Hurray it's in the cache. */
1142 if (writeall (fd, cached->data, cached->recsize) != cached->recsize
1143 && __glibc_unlikely (debug_level > 0))
1144 {
1145 /* We have problems sending the result. */
1146 char buf[256];
1147 dbg_log (_("cannot write result: %s"),
1148 strerror_r (errno, buf, sizeof (buf)));
1149 }
1150
1151 pthread_rwlock_unlock (&db->lock);
1152
1153 return;
1154 }
1155
1156 pthread_rwlock_unlock (&db->lock);
1157 }
1158 else if (__builtin_expect (debug_level, 0) > 0)
1159 {
1160 if (req->type == INVALIDATE)
1161 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1162 else
1163 dbg_log ("\t%s", serv2str[req->type]);
1164 }
1165
1166 /* Handle the request. */
1167 switch (req->type)
1168 {
1169 case GETPWBYNAME:
1170 addpwbyname (db, fd, req, key, uid);
1171 break;
1172
1173 case GETPWBYUID:
1174 addpwbyuid (db, fd, req, key, uid);
1175 break;
1176
1177 case GETGRBYNAME:
1178 addgrbyname (db, fd, req, key, uid);
1179 break;
1180
1181 case GETGRBYGID:
1182 addgrbygid (db, fd, req, key, uid);
1183 break;
1184
1185 case GETHOSTBYNAME:
1186 addhstbyname (db, fd, req, key, uid);
1187 break;
1188
1189 case GETHOSTBYNAMEv6:
1190 addhstbynamev6 (db, fd, req, key, uid);
1191 break;
1192
1193 case GETHOSTBYADDR:
1194 addhstbyaddr (db, fd, req, key, uid);
1195 break;
1196
1197 case GETHOSTBYADDRv6:
1198 addhstbyaddrv6 (db, fd, req, key, uid);
1199 break;
1200
1201 case GETAI:
1202 addhstai (db, fd, req, key, uid);
1203 break;
1204
1205 case INITGROUPS:
1206 addinitgroups (db, fd, req, key, uid);
1207 break;
1208
1209 case GETSERVBYNAME:
1210 addservbyname (db, fd, req, key, uid);
1211 break;
1212
1213 case GETSERVBYPORT:
1214 addservbyport (db, fd, req, key, uid);
1215 break;
1216
1217 case GETNETGRENT:
1218 addgetnetgrent (db, fd, req, key, uid);
1219 break;
1220
1221 case INNETGR:
1222 addinnetgr (db, fd, req, key, uid);
1223 break;
1224
1225 case GETSTAT:
1226 case SHUTDOWN:
1227 case INVALIDATE:
1228 {
1229 /* Get the callers credentials. */
1230 #ifdef SO_PEERCRED
1231 struct ucred caller;
1232 socklen_t optlen = sizeof (caller);
1233
1234 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1235 {
1236 char buf[256];
1237
1238 dbg_log (_("error getting caller's id: %s"),
1239 strerror_r (errno, buf, sizeof (buf)));
1240 break;
1241 }
1242
1243 uid = caller.uid;
1244 #else
1245 /* Some systems have no SO_PEERCRED implementation. They don't
1246 care about security so we don't as well. */
1247 uid = 0;
1248 #endif
1249 }
1250
1251 /* Accept shutdown, getstat and invalidate only from root. For
1252 the stat call also allow the user specified in the config file. */
1253 if (req->type == GETSTAT)
1254 {
1255 if (uid == 0 || uid == stat_uid)
1256 send_stats (fd, dbs);
1257 }
1258 else if (uid == 0)
1259 {
1260 if (req->type == INVALIDATE)
1261 invalidate_cache (key, fd);
1262 else
1263 termination_handler (0);
1264 }
1265 break;
1266
1267 case GETFDPW:
1268 case GETFDGR:
1269 case GETFDHST:
1270 case GETFDSERV:
1271 case GETFDNETGR:
1272 #ifdef SCM_RIGHTS
1273 send_ro_fd (reqinfo[req->type].db, key, fd);
1274 #endif
1275 break;
1276
1277 default:
1278 /* Ignore the command, it's nothing we know. */
1279 break;
1280 }
1281 }
1282
1283 static char *
read_cmdline(size_t * size)1284 read_cmdline (size_t *size)
1285 {
1286 int fd = open ("/proc/self/cmdline", O_RDONLY);
1287 if (fd < 0)
1288 return NULL;
1289 size_t current = 0;
1290 size_t limit = 1024;
1291 char *buffer = malloc (limit);
1292 if (buffer == NULL)
1293 {
1294 close (fd);
1295 errno = ENOMEM;
1296 return NULL;
1297 }
1298 while (1)
1299 {
1300 if (current == limit)
1301 {
1302 char *newptr;
1303 if (2 * limit < limit
1304 || (newptr = realloc (buffer, 2 * limit)) == NULL)
1305 {
1306 free (buffer);
1307 close (fd);
1308 errno = ENOMEM;
1309 return NULL;
1310 }
1311 buffer = newptr;
1312 limit *= 2;
1313 }
1314
1315 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buffer + current,
1316 limit - current));
1317 if (n == -1)
1318 {
1319 int e = errno;
1320 free (buffer);
1321 close (fd);
1322 errno = e;
1323 return NULL;
1324 }
1325 if (n == 0)
1326 break;
1327 current += n;
1328 }
1329
1330 close (fd);
1331 *size = current;
1332 return buffer;
1333 }
1334
1335
1336 /* Restart the process. */
1337 static void
restart(void)1338 restart (void)
1339 {
1340 /* First determine the parameters. We do not use the parameters
1341 passed to main because then nscd would use the system libc after
1342 restarting even if it was started by a non-system dynamic linker
1343 during glibc testing. */
1344 size_t readlen;
1345 char *cmdline = read_cmdline (&readlen);
1346 if (cmdline == NULL)
1347 {
1348 dbg_log (_("\
1349 cannot open /proc/self/cmdline: %m; disabling paranoia mode"));
1350 paranoia = 0;
1351 return;
1352 }
1353
1354 /* Parse the command line. Worst case scenario: every two
1355 characters form one parameter (one character plus NUL). */
1356 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1357 int argc = 0;
1358
1359 for (char *cp = cmdline; cp < cmdline + readlen;)
1360 {
1361 argv[argc++] = cp;
1362 cp = (char *) rawmemchr (cp, '\0') + 1;
1363 }
1364 argv[argc] = NULL;
1365
1366 /* Second, change back to the old user if we changed it. */
1367 if (server_user != NULL)
1368 {
1369 if (setresuid (old_uid, old_uid, old_uid) != 0)
1370 {
1371 dbg_log (_("\
1372 cannot change to old UID: %s; disabling paranoia mode"),
1373 strerror (errno));
1374
1375 paranoia = 0;
1376 free (cmdline);
1377 return;
1378 }
1379
1380 if (setresgid (old_gid, old_gid, old_gid) != 0)
1381 {
1382 dbg_log (_("\
1383 cannot change to old GID: %s; disabling paranoia mode"),
1384 strerror (errno));
1385
1386 ignore_value (setuid (server_uid));
1387 paranoia = 0;
1388 free (cmdline);
1389 return;
1390 }
1391 }
1392
1393 /* Next change back to the old working directory. */
1394 if (chdir (oldcwd) == -1)
1395 {
1396 dbg_log (_("\
1397 cannot change to old working directory: %s; disabling paranoia mode"),
1398 strerror (errno));
1399
1400 if (server_user != NULL)
1401 {
1402 ignore_value (setuid (server_uid));
1403 ignore_value (setgid (server_gid));
1404 }
1405 paranoia = 0;
1406 free (cmdline);
1407 return;
1408 }
1409
1410 /* Synchronize memory. */
1411 int32_t certainly[lastdb];
1412 for (int cnt = 0; cnt < lastdb; ++cnt)
1413 if (dbs[cnt].enabled)
1414 {
1415 /* Make sure nobody keeps using the database. */
1416 dbs[cnt].head->timestamp = 0;
1417 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1418 dbs[cnt].head->nscd_certainly_running = 0;
1419
1420 if (dbs[cnt].persistent)
1421 // XXX async OK?
1422 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1423 }
1424
1425 /* The preparations are done. */
1426 #ifdef PATH_MAX
1427 char pathbuf[PATH_MAX];
1428 #else
1429 char pathbuf[256];
1430 #endif
1431 /* Try to exec the real nscd program so the process name (as reported
1432 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1433 if readlink or the exec with the result of the readlink call fails. */
1434 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1435 if (n != -1)
1436 {
1437 pathbuf[n] = '\0';
1438 execv (pathbuf, argv);
1439 }
1440 execv ("/proc/self/exe", argv);
1441
1442 /* If we come here, we will never be able to re-exec. */
1443 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1444 strerror (errno));
1445
1446 if (server_user != NULL)
1447 {
1448 ignore_value (setuid (server_uid));
1449 ignore_value (setgid (server_gid));
1450 }
1451 if (chdir ("/") != 0)
1452 dbg_log (_("cannot change current working directory to \"/\": %s"),
1453 strerror (errno));
1454 paranoia = 0;
1455 free (cmdline);
1456
1457 /* Reenable the databases. */
1458 time_t now = time (NULL);
1459 for (int cnt = 0; cnt < lastdb; ++cnt)
1460 if (dbs[cnt].enabled)
1461 {
1462 dbs[cnt].head->timestamp = now;
1463 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1464 }
1465 }
1466
1467
1468 /* List of file descriptors. */
1469 struct fdlist
1470 {
1471 int fd;
1472 struct fdlist *next;
1473 };
1474 /* Memory allocated for the list. */
1475 static struct fdlist *fdlist;
1476 /* List of currently ready-to-read file descriptors. */
1477 static struct fdlist *readylist;
1478
1479 /* Conditional variable and mutex to signal availability of entries in
1480 READYLIST. The condvar is initialized dynamically since we might
1481 use a different clock depending on availability. */
1482 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1483 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1484
1485 /* The clock to use with the condvar. */
1486 static clockid_t timeout_clock = CLOCK_REALTIME;
1487
1488 /* Number of threads ready to handle the READYLIST. */
1489 static unsigned long int nready;
1490
1491
1492 /* Function for the clean-up threads. */
1493 static void *
1494 __attribute__ ((__noreturn__))
nscd_run_prune(void * p)1495 nscd_run_prune (void *p)
1496 {
1497 const long int my_number = (long int) p;
1498 assert (dbs[my_number].enabled);
1499
1500 int dont_need_update = setup_thread (&dbs[my_number]);
1501
1502 time_t now = time (NULL);
1503
1504 /* We are running. */
1505 dbs[my_number].head->timestamp = now;
1506
1507 struct timespec prune_ts;
1508 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1509 /* Should never happen. */
1510 abort ();
1511
1512 /* Compute the initial timeout time. Prevent all the timers to go
1513 off at the same time by adding a db-based value. */
1514 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1515 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1516
1517 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1518 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1519 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1520
1521 pthread_mutex_lock (prune_lock);
1522 while (1)
1523 {
1524 /* Wait, but not forever. */
1525 int e = 0;
1526 if (! dbs[my_number].clear_cache)
1527 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1528 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1529
1530 time_t next_wait;
1531 now = time (NULL);
1532 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1533 || dbs[my_number].clear_cache)
1534 {
1535 /* We will determine the new timout values based on the
1536 cache content. Should there be concurrent additions to
1537 the cache which are not accounted for in the cache
1538 pruning we want to know about it. Therefore set the
1539 timeout to the maximum. It will be descreased when adding
1540 new entries to the cache, if necessary. */
1541 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1542
1543 /* Unconditionally reset the flag. */
1544 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1545 dbs[my_number].clear_cache = 0;
1546
1547 pthread_mutex_unlock (prune_lock);
1548
1549 /* We use a separate lock for running the prune function (instead
1550 of keeping prune_lock locked) because this enables concurrent
1551 invocations of cache_add which might modify the timeout value. */
1552 pthread_mutex_lock (prune_run_lock);
1553 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1554 pthread_mutex_unlock (prune_run_lock);
1555
1556 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1557 /* If clients cannot determine for sure whether nscd is running
1558 we need to wake up occasionally to update the timestamp.
1559 Wait 90% of the update period. */
1560 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1561 if (__glibc_unlikely (! dont_need_update))
1562 {
1563 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1564 dbs[my_number].head->timestamp = now;
1565 }
1566
1567 pthread_mutex_lock (prune_lock);
1568
1569 /* Make it known when we will wake up again. */
1570 if (now + next_wait < dbs[my_number].wakeup_time)
1571 dbs[my_number].wakeup_time = now + next_wait;
1572 else
1573 next_wait = dbs[my_number].wakeup_time - now;
1574 }
1575 else
1576 /* The cache was just pruned. Do not do it again now. Just
1577 use the new timeout value. */
1578 next_wait = dbs[my_number].wakeup_time - now;
1579
1580 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1581 /* Should never happen. */
1582 abort ();
1583
1584 /* Compute next timeout time. */
1585 prune_ts.tv_sec += next_wait;
1586 }
1587 }
1588
1589
1590 /* This is the main loop. It is replicated in different threads but
1591 the use of the ready list makes sure only one thread handles an
1592 incoming connection. */
1593 static void *
1594 __attribute__ ((__noreturn__))
nscd_run_worker(void * p)1595 nscd_run_worker (void *p)
1596 {
1597 char buf[256];
1598
1599 /* Initial locking. */
1600 pthread_mutex_lock (&readylist_lock);
1601
1602 /* One more thread available. */
1603 ++nready;
1604
1605 while (1)
1606 {
1607 while (readylist == NULL)
1608 pthread_cond_wait (&readylist_cond, &readylist_lock);
1609
1610 struct fdlist *it = readylist->next;
1611 if (readylist->next == readylist)
1612 /* Just one entry on the list. */
1613 readylist = NULL;
1614 else
1615 readylist->next = it->next;
1616
1617 /* Extract the information and mark the record ready to be used
1618 again. */
1619 int fd = it->fd;
1620 it->next = NULL;
1621
1622 /* One more thread available. */
1623 --nready;
1624
1625 /* We are done with the list. */
1626 pthread_mutex_unlock (&readylist_lock);
1627
1628 /* Now read the request. */
1629 request_header req;
1630 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1631 != sizeof (req), 0))
1632 {
1633 /* We failed to read data. Note that this also might mean we
1634 failed because we would have blocked. */
1635 if (debug_level > 0)
1636 dbg_log (_("short read while reading request: %s"),
1637 strerror_r (errno, buf, sizeof (buf)));
1638 goto close_and_out;
1639 }
1640
1641 /* Check whether this is a valid request type. */
1642 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1643 goto close_and_out;
1644
1645 /* Some systems have no SO_PEERCRED implementation. They don't
1646 care about security so we don't as well. */
1647 uid_t uid = -1;
1648 #ifdef SO_PEERCRED
1649 pid_t pid = 0;
1650
1651 if (__glibc_unlikely (debug_level > 0))
1652 {
1653 struct ucred caller;
1654 socklen_t optlen = sizeof (caller);
1655
1656 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1657 pid = caller.pid;
1658 }
1659 #else
1660 const pid_t pid = 0;
1661 #endif
1662
1663 /* It should not be possible to crash the nscd with a silly
1664 request (i.e., a terribly large key). We limit the size to 1kb. */
1665 if (__builtin_expect (req.key_len, 1) < 0
1666 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1667 {
1668 if (debug_level > 0)
1669 dbg_log (_("key length in request too long: %d"), req.key_len);
1670 }
1671 else
1672 {
1673 /* Get the key. */
1674 char keybuf[MAXKEYLEN + 1];
1675
1676 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1677 req.key_len))
1678 != req.key_len, 0))
1679 {
1680 /* Again, this can also mean we would have blocked. */
1681 if (debug_level > 0)
1682 dbg_log (_("short read while reading request key: %s"),
1683 strerror_r (errno, buf, sizeof (buf)));
1684 goto close_and_out;
1685 }
1686 keybuf[req.key_len] = '\0';
1687
1688 if (__builtin_expect (debug_level, 0) > 0)
1689 {
1690 #ifdef SO_PEERCRED
1691 if (pid != 0)
1692 dbg_log (_("\
1693 handle_request: request received (Version = %d) from PID %ld"),
1694 req.version, (long int) pid);
1695 else
1696 #endif
1697 dbg_log (_("\
1698 handle_request: request received (Version = %d)"), req.version);
1699 }
1700
1701 /* Phew, we got all the data, now process it. */
1702 handle_request (fd, &req, keybuf, uid, pid);
1703 }
1704
1705 close_and_out:
1706 /* We are done. */
1707 close (fd);
1708
1709 /* Re-locking. */
1710 pthread_mutex_lock (&readylist_lock);
1711
1712 /* One more thread available. */
1713 ++nready;
1714 }
1715 /* NOTREACHED */
1716 }
1717
1718
1719 static unsigned int nconns;
1720
1721 static void
fd_ready(int fd)1722 fd_ready (int fd)
1723 {
1724 pthread_mutex_lock (&readylist_lock);
1725
1726 /* Find an empty entry in FDLIST. */
1727 size_t inner;
1728 for (inner = 0; inner < nconns; ++inner)
1729 if (fdlist[inner].next == NULL)
1730 break;
1731 assert (inner < nconns);
1732
1733 fdlist[inner].fd = fd;
1734
1735 if (readylist == NULL)
1736 readylist = fdlist[inner].next = &fdlist[inner];
1737 else
1738 {
1739 fdlist[inner].next = readylist->next;
1740 readylist = readylist->next = &fdlist[inner];
1741 }
1742
1743 bool do_signal = true;
1744 if (__glibc_unlikely (nready == 0))
1745 {
1746 ++client_queued;
1747 do_signal = false;
1748
1749 /* Try to start another thread to help out. */
1750 pthread_t th;
1751 if (nthreads < max_nthreads
1752 && pthread_create (&th, &attr, nscd_run_worker,
1753 (void *) (long int) nthreads) == 0)
1754 {
1755 /* We got another thread. */
1756 ++nthreads;
1757 /* The new thread might need a kick. */
1758 do_signal = true;
1759 }
1760
1761 }
1762
1763 pthread_mutex_unlock (&readylist_lock);
1764
1765 /* Tell one of the worker threads there is work to do. */
1766 if (do_signal)
1767 pthread_cond_signal (&readylist_cond);
1768 }
1769
1770
1771 /* Check whether restarting should happen. */
1772 static bool
restart_p(time_t now)1773 restart_p (time_t now)
1774 {
1775 return (paranoia && readylist == NULL && nready == nthreads
1776 && now >= restart_time);
1777 }
1778
1779
1780 /* Array for times a connection was accepted. */
1781 static time_t *starttime;
1782
1783 #ifdef HAVE_INOTIFY
1784 /* Inotify event for changed file. */
1785 union __inev
1786 {
1787 struct inotify_event i;
1788 # ifndef PATH_MAX
1789 # define PATH_MAX 1024
1790 # endif
1791 char buf[sizeof (struct inotify_event) + PATH_MAX];
1792 };
1793
1794 /* Returns 0 if the file is there otherwise -1. */
1795 int
check_file(struct traced_file * finfo)1796 check_file (struct traced_file *finfo)
1797 {
1798 struct stat64 st;
1799 /* We could check mtime and if different re-add
1800 the watches, and invalidate the database, but we
1801 don't because we are called from inotify_check_files
1802 which should be doing that work. If sufficient inotify
1803 events were lost then the next pruning or invalidation
1804 will do the stat and mtime check. We don't do it here to
1805 keep the logic simple. */
1806 if (stat64 (finfo->fname, &st) < 0)
1807 return -1;
1808 return 0;
1809 }
1810
1811 /* Process the inotify event in INEV. If the event matches any of the files
1812 registered with a database then mark that database as requiring its cache
1813 to be cleared. We indicate the cache needs clearing by setting
1814 TO_CLEAR[DBCNT] to true for the matching database. */
1815 static void
inotify_check_files(bool * to_clear,union __inev * inev)1816 inotify_check_files (bool *to_clear, union __inev *inev)
1817 {
1818 /* Check which of the files changed. */
1819 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1820 {
1821 struct traced_file *finfo = dbs[dbcnt].traced_files;
1822
1823 while (finfo != NULL)
1824 {
1825 /* The configuration file was moved or deleted.
1826 We stop watching it at that point, and reinitialize. */
1827 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1828 && ((inev->i.mask & IN_MOVE_SELF)
1829 || (inev->i.mask & IN_DELETE_SELF)
1830 || (inev->i.mask & IN_IGNORED)))
1831 {
1832 int ret;
1833 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1834
1835 if (check_file (finfo) == 0)
1836 {
1837 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1838 finfo->fname);
1839 return;
1840 }
1841
1842 dbg_log (_("monitored file `%s` was %s, removing watch"),
1843 finfo->fname, moved ? "moved" : "deleted");
1844 /* File was moved out, remove the watch. Watches are
1845 automatically removed when the file is deleted. */
1846 if (moved)
1847 {
1848 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1849 if (ret < 0)
1850 dbg_log (_("failed to remove file watch `%s`: %s"),
1851 finfo->fname, strerror (errno));
1852 }
1853 finfo->inotify_descr[TRACED_FILE] = -1;
1854 to_clear[dbcnt] = true;
1855 if (finfo->call_res_init)
1856 res_init ();
1857 return;
1858 }
1859 /* The configuration file was open for writing and has just closed.
1860 We reset the cache and reinitialize. */
1861 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1862 && inev->i.mask & IN_CLOSE_WRITE)
1863 {
1864 /* Mark cache as needing to be cleared and reinitialize. */
1865 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1866 to_clear[dbcnt] = true;
1867 if (finfo->call_res_init)
1868 res_init ();
1869 return;
1870 }
1871 /* The parent directory was moved or deleted. We trigger one last
1872 invalidation. At the next pruning or invalidation we may add
1873 this watch back if the file is present again. */
1874 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1875 && ((inev->i.mask & IN_DELETE_SELF)
1876 || (inev->i.mask & IN_MOVE_SELF)
1877 || (inev->i.mask & IN_IGNORED)))
1878 {
1879 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1880 /* The directory watch may have already been removed
1881 but we don't know so we just remove it again and
1882 ignore the error. Then we remove the file watch.
1883 Note: watches are automatically removed for deleted
1884 files. */
1885 if (moved)
1886 inotify_rm_watch (inotify_fd, inev->i.wd);
1887 if (finfo->inotify_descr[TRACED_FILE] != -1)
1888 {
1889 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1890 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1891 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1892 dbg_log (_("failed to remove file watch `%s`: %s"),
1893 finfo->dname, strerror (errno));
1894 }
1895 finfo->inotify_descr[TRACED_FILE] = -1;
1896 finfo->inotify_descr[TRACED_DIR] = -1;
1897 to_clear[dbcnt] = true;
1898 if (finfo->call_res_init)
1899 res_init ();
1900 /* Continue to the next entry since this might be the
1901 parent directory for multiple registered files and
1902 we want to remove watches for all registered files. */
1903 continue;
1904 }
1905 /* The parent directory had a create or moved to event. */
1906 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1907 && ((inev->i.mask & IN_MOVED_TO)
1908 || (inev->i.mask & IN_CREATE))
1909 && strcmp (inev->i.name, finfo->sfname) == 0)
1910 {
1911 /* We detected a directory change. We look for the creation
1912 of the file we are tracking or the move of the same file
1913 into the directory. */
1914 int ret;
1915 dbg_log (_("monitored file `%s` was %s, adding watch"),
1916 finfo->fname,
1917 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1918 /* File was moved in or created. Regenerate the watch. */
1919 if (finfo->inotify_descr[TRACED_FILE] != -1)
1920 inotify_rm_watch (inotify_fd,
1921 finfo->inotify_descr[TRACED_FILE]);
1922
1923 ret = inotify_add_watch (inotify_fd,
1924 finfo->fname,
1925 TRACED_FILE_MASK);
1926 if (ret < 0)
1927 dbg_log (_("failed to add file watch `%s`: %s"),
1928 finfo->fname, strerror (errno));
1929
1930 finfo->inotify_descr[TRACED_FILE] = ret;
1931
1932 /* The file is new or moved so mark cache as needing to
1933 be cleared and reinitialize. */
1934 to_clear[dbcnt] = true;
1935 if (finfo->call_res_init)
1936 res_init ();
1937
1938 /* Done re-adding the watch. Don't return, we may still
1939 have other files in this same directory, same watch
1940 descriptor, and need to process them. */
1941 }
1942 /* Other events are ignored, and we move on to the next file. */
1943 finfo = finfo->next;
1944 }
1945 }
1946 }
1947
1948 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1949 for the associated database, otherwise do nothing. The TO_CLEAR array must
1950 have LASTDB entries. */
1951 static inline void
clear_db_cache(bool * to_clear)1952 clear_db_cache (bool *to_clear)
1953 {
1954 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1955 if (to_clear[dbcnt])
1956 {
1957 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1958 dbs[dbcnt].clear_cache = 1;
1959 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1960 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1961 }
1962 }
1963
1964 int
handle_inotify_events(void)1965 handle_inotify_events (void)
1966 {
1967 bool to_clear[lastdb] = { false, };
1968 union __inev inev;
1969
1970 /* Read all inotify events for files registered via
1971 register_traced_file(). */
1972 while (1)
1973 {
1974 /* Potentially read multiple events into buf. */
1975 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1976 &inev.buf,
1977 sizeof (inev)));
1978 if (nb < (ssize_t) sizeof (struct inotify_event))
1979 {
1980 /* Not even 1 event. */
1981 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
1982 return -1;
1983 /* Done reading events that are ready. */
1984 break;
1985 }
1986 /* Process all events. The normal inotify interface delivers
1987 complete events on a read and never a partial event. */
1988 char *eptr = &inev.buf[0];
1989 ssize_t count;
1990 while (1)
1991 {
1992 /* Check which of the files changed. */
1993 inotify_check_files (to_clear, &inev);
1994 count = sizeof (struct inotify_event) + inev.i.len;
1995 eptr += count;
1996 nb -= count;
1997 if (nb >= (ssize_t) sizeof (struct inotify_event))
1998 memcpy (&inev, eptr, nb);
1999 else
2000 break;
2001 }
2002 continue;
2003 }
2004 /* Actually perform the cache clearing. */
2005 clear_db_cache (to_clear);
2006 return 0;
2007 }
2008
2009 #endif
2010
2011 static void
2012 __attribute__ ((__noreturn__))
main_loop_poll(void)2013 main_loop_poll (void)
2014 {
2015 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2016 * sizeof (conns[0]));
2017
2018 conns[0].fd = sock;
2019 conns[0].events = POLLRDNORM;
2020 size_t nused = 1;
2021 size_t firstfree = 1;
2022
2023 #ifdef HAVE_INOTIFY
2024 if (inotify_fd != -1)
2025 {
2026 conns[1].fd = inotify_fd;
2027 conns[1].events = POLLRDNORM;
2028 nused = 2;
2029 firstfree = 2;
2030 }
2031 #endif
2032
2033 #ifdef HAVE_NETLINK
2034 size_t idx_nl_status_fd = 0;
2035 if (nl_status_fd != -1)
2036 {
2037 idx_nl_status_fd = nused;
2038 conns[nused].fd = nl_status_fd;
2039 conns[nused].events = POLLRDNORM;
2040 ++nused;
2041 firstfree = nused;
2042 }
2043 #endif
2044
2045 while (1)
2046 {
2047 /* Wait for any event. We wait at most a couple of seconds so
2048 that we can check whether we should close any of the accepted
2049 connections since we have not received a request. */
2050 #define MAX_ACCEPT_TIMEOUT 30
2051 #define MIN_ACCEPT_TIMEOUT 5
2052 #define MAIN_THREAD_TIMEOUT \
2053 (MAX_ACCEPT_TIMEOUT * 1000 \
2054 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2055
2056 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2057
2058 time_t now = time (NULL);
2059
2060 /* If there is a descriptor ready for reading or there is a new
2061 connection, process this now. */
2062 if (n > 0)
2063 {
2064 if (conns[0].revents != 0)
2065 {
2066 /* We have a new incoming connection. Accept the connection. */
2067 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2068 SOCK_NONBLOCK));
2069
2070 /* Use the descriptor if we have not reached the limit. */
2071 if (fd >= 0)
2072 {
2073 if (firstfree < nconns)
2074 {
2075 conns[firstfree].fd = fd;
2076 conns[firstfree].events = POLLRDNORM;
2077 starttime[firstfree] = now;
2078 if (firstfree >= nused)
2079 nused = firstfree + 1;
2080
2081 do
2082 ++firstfree;
2083 while (firstfree < nused && conns[firstfree].fd != -1);
2084 }
2085 else
2086 /* We cannot use the connection so close it. */
2087 close (fd);
2088 }
2089
2090 --n;
2091 }
2092
2093 size_t first = 1;
2094 #ifdef HAVE_INOTIFY
2095 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2096 {
2097 if (conns[1].revents != 0)
2098 {
2099 int ret;
2100 ret = handle_inotify_events ();
2101 if (ret == -1)
2102 {
2103 /* Something went wrong when reading the inotify
2104 data. Better disable inotify. */
2105 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2106 conns[1].fd = -1;
2107 firstfree = 1;
2108 if (nused == 2)
2109 nused = 1;
2110 close (inotify_fd);
2111 inotify_fd = -1;
2112 }
2113 --n;
2114 }
2115
2116 first = 2;
2117 }
2118 #endif
2119
2120 #ifdef HAVE_NETLINK
2121 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2122 {
2123 char buf[4096];
2124 /* Read all the data. We do not interpret it here. */
2125 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2126 sizeof (buf))) != -1)
2127 ;
2128
2129 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2130 = __bump_nl_timestamp ();
2131 }
2132 #endif
2133
2134 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2135 if (conns[cnt].revents != 0)
2136 {
2137 fd_ready (conns[cnt].fd);
2138
2139 /* Clean up the CONNS array. */
2140 conns[cnt].fd = -1;
2141 if (cnt < firstfree)
2142 firstfree = cnt;
2143 if (cnt == nused - 1)
2144 do
2145 --nused;
2146 while (conns[nused - 1].fd == -1);
2147
2148 --n;
2149 }
2150 }
2151
2152 /* Now find entries which have timed out. */
2153 assert (nused > 0);
2154
2155 /* We make the timeout length depend on the number of file
2156 descriptors currently used. */
2157 #define ACCEPT_TIMEOUT \
2158 (MAX_ACCEPT_TIMEOUT \
2159 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2160 time_t laststart = now - ACCEPT_TIMEOUT;
2161
2162 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2163 {
2164 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2165 {
2166 /* Remove the entry, it timed out. */
2167 (void) close (conns[cnt].fd);
2168 conns[cnt].fd = -1;
2169
2170 if (cnt < firstfree)
2171 firstfree = cnt;
2172 if (cnt == nused - 1)
2173 do
2174 --nused;
2175 while (conns[nused - 1].fd == -1);
2176 }
2177 }
2178
2179 if (restart_p (now))
2180 restart ();
2181 }
2182 }
2183
2184
2185 #ifdef HAVE_EPOLL
2186 static void
main_loop_epoll(int efd)2187 main_loop_epoll (int efd)
2188 {
2189 struct epoll_event ev = { 0, };
2190 int nused = 1;
2191 size_t highest = 0;
2192
2193 /* Add the socket. */
2194 ev.events = EPOLLRDNORM;
2195 ev.data.fd = sock;
2196 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2197 /* We cannot use epoll. */
2198 return;
2199
2200 # ifdef HAVE_INOTIFY
2201 if (inotify_fd != -1)
2202 {
2203 ev.events = EPOLLRDNORM;
2204 ev.data.fd = inotify_fd;
2205 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2206 /* We cannot use epoll. */
2207 return;
2208 nused = 2;
2209 }
2210 # endif
2211
2212 # ifdef HAVE_NETLINK
2213 if (nl_status_fd != -1)
2214 {
2215 ev.events = EPOLLRDNORM;
2216 ev.data.fd = nl_status_fd;
2217 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2218 /* We cannot use epoll. */
2219 return;
2220 }
2221 # endif
2222
2223 while (1)
2224 {
2225 struct epoll_event revs[100];
2226 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2227
2228 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2229
2230 time_t now = time (NULL);
2231
2232 for (int cnt = 0; cnt < n; ++cnt)
2233 if (revs[cnt].data.fd == sock)
2234 {
2235 /* A new connection. */
2236 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2237 SOCK_NONBLOCK));
2238
2239 /* Use the descriptor if we have not reached the limit. */
2240 if (fd >= 0)
2241 {
2242 /* Try to add the new descriptor. */
2243 ev.data.fd = fd;
2244 if (fd >= nconns
2245 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2246 /* The descriptor is too large or something went
2247 wrong. Close the descriptor. */
2248 close (fd);
2249 else
2250 {
2251 /* Remember when we accepted the connection. */
2252 starttime[fd] = now;
2253
2254 if (fd > highest)
2255 highest = fd;
2256
2257 ++nused;
2258 }
2259 }
2260 }
2261 # ifdef HAVE_INOTIFY
2262 else if (revs[cnt].data.fd == inotify_fd)
2263 {
2264 int ret;
2265 ret = handle_inotify_events ();
2266 if (ret == -1)
2267 {
2268 /* Something went wrong when reading the inotify
2269 data. Better disable inotify. */
2270 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2271 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2272 close (inotify_fd);
2273 inotify_fd = -1;
2274 break;
2275 }
2276 }
2277 # endif
2278 # ifdef HAVE_NETLINK
2279 else if (revs[cnt].data.fd == nl_status_fd)
2280 {
2281 char buf[4096];
2282 /* Read all the data. We do not interpret it here. */
2283 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2284 sizeof (buf))) != -1)
2285 ;
2286
2287 __bump_nl_timestamp ();
2288 }
2289 # endif
2290 else
2291 {
2292 /* Remove the descriptor from the epoll descriptor. */
2293 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2294
2295 /* Get a worker to handle the request. */
2296 fd_ready (revs[cnt].data.fd);
2297
2298 /* Reset the time. */
2299 starttime[revs[cnt].data.fd] = 0;
2300 if (revs[cnt].data.fd == highest)
2301 do
2302 --highest;
2303 while (highest > 0 && starttime[highest] == 0);
2304
2305 --nused;
2306 }
2307
2308 /* Now look for descriptors for accepted connections which have
2309 no reply in too long of a time. */
2310 time_t laststart = now - ACCEPT_TIMEOUT;
2311 assert (starttime[sock] == 0);
2312 # ifdef HAVE_INOTIFY
2313 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2314 # endif
2315 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2316 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2317 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2318 {
2319 /* We are waiting for this one for too long. Close it. */
2320 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2321
2322 (void) close (cnt);
2323
2324 starttime[cnt] = 0;
2325 if (cnt == highest)
2326 --highest;
2327 }
2328 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2329 --highest;
2330
2331 if (restart_p (now))
2332 restart ();
2333 }
2334 }
2335 #endif
2336
2337
2338 /* Start all the threads we want. The initial process is thread no. 1. */
2339 void
start_threads(void)2340 start_threads (void)
2341 {
2342 /* Initialize the conditional variable we will use. The only
2343 non-standard attribute we might use is the clock selection. */
2344 pthread_condattr_t condattr;
2345 pthread_condattr_init (&condattr);
2346
2347 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2348 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2349 /* Determine whether the monotonous clock is available. */
2350 struct timespec dummy;
2351 # if _POSIX_MONOTONIC_CLOCK == 0
2352 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2353 # endif
2354 # if _POSIX_CLOCK_SELECTION == 0
2355 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2356 # endif
2357 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2358 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2359 timeout_clock = CLOCK_MONOTONIC;
2360 #endif
2361
2362 /* Create the attribute for the threads. They are all created
2363 detached. */
2364 pthread_attr_init (&attr);
2365 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2366 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2367 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2368
2369 /* We allow less than LASTDB threads only for debugging. */
2370 if (debug_level == 0)
2371 nthreads = MAX (nthreads, lastdb);
2372
2373 /* Create the threads which prune the databases. */
2374 // XXX Ideally this work would be done by some of the worker threads.
2375 // XXX But this is problematic since we would need to be able to wake
2376 // XXX them up explicitly as well as part of the group handling the
2377 // XXX ready-list. This requires an operation where we can wait on
2378 // XXX two conditional variables at the same time. This operation
2379 // XXX does not exist (yet).
2380 for (long int i = 0; i < lastdb; ++i)
2381 {
2382 /* Initialize the conditional variable. */
2383 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2384 {
2385 dbg_log (_("could not initialize conditional variable"));
2386 do_exit (1, 0, NULL);
2387 }
2388
2389 pthread_t th;
2390 if (dbs[i].enabled
2391 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2392 {
2393 dbg_log (_("could not start clean-up thread; terminating"));
2394 do_exit (1, 0, NULL);
2395 }
2396 }
2397
2398 pthread_condattr_destroy (&condattr);
2399
2400 for (long int i = 0; i < nthreads; ++i)
2401 {
2402 pthread_t th;
2403 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2404 {
2405 if (i == 0)
2406 {
2407 dbg_log (_("could not start any worker thread; terminating"));
2408 do_exit (1, 0, NULL);
2409 }
2410
2411 break;
2412 }
2413 }
2414
2415 /* Now it is safe to let the parent know that we're doing fine and it can
2416 exit. */
2417 notify_parent (0);
2418
2419 /* Determine how much room for descriptors we should initially
2420 allocate. This might need to change later if we cap the number
2421 with MAXCONN. */
2422 const long int nfds = sysconf (_SC_OPEN_MAX);
2423 #define MINCONN 32
2424 #define MAXCONN 16384
2425 if (nfds == -1 || nfds > MAXCONN)
2426 nconns = MAXCONN;
2427 else if (nfds < MINCONN)
2428 nconns = MINCONN;
2429 else
2430 nconns = nfds;
2431
2432 /* We need memory to pass descriptors on to the worker threads. */
2433 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2434 /* Array to keep track when connection was accepted. */
2435 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2436
2437 /* In the main thread we execute the loop which handles incoming
2438 connections. */
2439 #ifdef HAVE_EPOLL
2440 int efd = epoll_create (100);
2441 if (efd != -1)
2442 {
2443 main_loop_epoll (efd);
2444 close (efd);
2445 }
2446 #endif
2447
2448 main_loop_poll ();
2449 }
2450
2451
2452 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2453 this function is called, we are not listening on the nscd socket yet so
2454 we can just use the ordinary lookup functions without causing a lockup */
2455 static void
begin_drop_privileges(void)2456 begin_drop_privileges (void)
2457 {
2458 struct passwd *pwd = getpwnam (server_user);
2459
2460 if (pwd == NULL)
2461 {
2462 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2463 do_exit (EXIT_FAILURE, 0,
2464 _("Failed to run nscd as user '%s'"), server_user);
2465 }
2466
2467 server_uid = pwd->pw_uid;
2468 server_gid = pwd->pw_gid;
2469
2470 /* Save the old UID/GID if we have to change back. */
2471 if (paranoia)
2472 {
2473 old_uid = getuid ();
2474 old_gid = getgid ();
2475 }
2476
2477 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2478 {
2479 /* This really must never happen. */
2480 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2481 do_exit (EXIT_FAILURE, errno,
2482 _("initial getgrouplist failed"));
2483 }
2484
2485 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2486
2487 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2488 == -1)
2489 {
2490 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2491 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2492 }
2493 }
2494
2495
2496 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2497 run nscd as the user specified in the configuration file. */
2498 static void
finish_drop_privileges(void)2499 finish_drop_privileges (void)
2500 {
2501 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2502 /* We need to preserve the capabilities to connect to the audit daemon. */
2503 cap_t new_caps = preserve_capabilities ();
2504 #endif
2505
2506 if (setgroups (server_ngroups, server_groups) == -1)
2507 {
2508 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2509 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2510 }
2511
2512 int res;
2513 if (paranoia)
2514 res = setresgid (server_gid, server_gid, old_gid);
2515 else
2516 res = setgid (server_gid);
2517 if (res == -1)
2518 {
2519 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2520 do_exit (4, errno, "setgid");
2521 }
2522
2523 if (paranoia)
2524 res = setresuid (server_uid, server_uid, old_uid);
2525 else
2526 res = setuid (server_uid);
2527 if (res == -1)
2528 {
2529 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2530 do_exit (4, errno, "setuid");
2531 }
2532
2533 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2534 /* Remove the temporary capabilities. */
2535 install_real_capabilities (new_caps);
2536 #endif
2537 }
2538