1 /* Copyright (C) 1998-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8 
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library; if not, see
16    <https://www.gnu.org/licenses/>.  */
17 
18 #include <assert.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdbool.h>
22 #include <stddef.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <time.h>
26 #include <unistd.h>
27 #include <stdint.h>
28 #include <sys/mman.h>
29 #include <sys/param.h>
30 #include <sys/poll.h>
31 #include <sys/socket.h>
32 #include <sys/stat.h>
33 #include <sys/time.h>
34 #include <sys/uio.h>
35 #include <sys/un.h>
36 #include <not-cancel.h>
37 #include <kernel-features.h>
38 #include <nss.h>
39 #include <struct___timespec64.h>
40 
41 #include "nscd-client.h"
42 
43 /* Extra time we wait if the socket is still receiving data.  This
44    value is in milliseconds.  Note that the other side is nscd on the
45    local machine and it is already transmitting data.  So the wait
46    time need not be long.  */
47 #define EXTRA_RECEIVE_TIME 200
48 
49 
50 static int
wait_on_socket(int sock,long int usectmo)51 wait_on_socket (int sock, long int usectmo)
52 {
53   struct pollfd fds[1];
54   fds[0].fd = sock;
55   fds[0].events = POLLIN | POLLERR | POLLHUP;
56   int n = __poll (fds, 1, usectmo);
57   if (n == -1 && __builtin_expect (errno == EINTR, 0))
58     {
59       /* Handle the case where the poll() call is interrupted by a
60 	 signal.  We cannot just use TEMP_FAILURE_RETRY since it might
61 	 lead to infinite loops.  */
62       struct __timespec64 now;
63       __clock_gettime64 (CLOCK_REALTIME, &now);
64       int64_t end = (now.tv_sec * 1000 + usectmo
65                      + (now.tv_nsec + 500000) / 1000000);
66       long int timeout = usectmo;
67       while (1)
68 	{
69 	  n = __poll (fds, 1, timeout);
70 	  if (n != -1 || errno != EINTR)
71 	    break;
72 
73 	  /* Recompute the timeout time.  */
74           __clock_gettime64 (CLOCK_REALTIME, &now);
75 	  timeout = end - ((now.tv_sec * 1000
76                             + (now.tv_nsec + 500000) / 1000000));
77 	}
78     }
79 
80   return n;
81 }
82 
83 
84 ssize_t
__readall(int fd,void * buf,size_t len)85 __readall (int fd, void *buf, size_t len)
86 {
87   size_t n = len;
88   ssize_t ret;
89   do
90     {
91     again:
92       ret = TEMP_FAILURE_RETRY (__read (fd, buf, n));
93       if (ret <= 0)
94 	{
95 	  if (__builtin_expect (ret < 0 && errno == EAGAIN, 0)
96 	      /* The socket is still receiving data.  Wait a bit more.  */
97 	      && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
98 	    goto again;
99 
100 	  break;
101 	}
102       buf = (char *) buf + ret;
103       n -= ret;
104     }
105   while (n > 0);
106   return ret < 0 ? ret : len - n;
107 }
108 
109 
110 ssize_t
__readvall(int fd,const struct iovec * iov,int iovcnt)111 __readvall (int fd, const struct iovec *iov, int iovcnt)
112 {
113   ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt));
114   if (ret <= 0)
115     {
116       if (__glibc_likely (ret == 0 || errno != EAGAIN))
117 	/* A genuine error or no data to read.  */
118 	return ret;
119 
120       /* The data has not all yet been received.  Do as if we have not
121 	 read anything yet.  */
122       ret = 0;
123     }
124 
125   size_t total = 0;
126   for (int i = 0; i < iovcnt; ++i)
127     total += iov[i].iov_len;
128 
129   if (ret < total)
130     {
131       struct iovec iov_buf[iovcnt];
132       ssize_t r = ret;
133 
134       struct iovec *iovp = memcpy (iov_buf, iov, iovcnt * sizeof (*iov));
135       do
136 	{
137 	  while (iovp->iov_len <= r)
138 	    {
139 	      r -= iovp->iov_len;
140 	      --iovcnt;
141 	      ++iovp;
142 	    }
143 	  iovp->iov_base = (char *) iovp->iov_base + r;
144 	  iovp->iov_len -= r;
145 	again:
146 	  r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt));
147 	  if (r <= 0)
148 	    {
149 	      if (__builtin_expect (r < 0 && errno == EAGAIN, 0)
150 		  /* The socket is still receiving data.  Wait a bit more.  */
151 		  && wait_on_socket (fd, EXTRA_RECEIVE_TIME) > 0)
152 		goto again;
153 
154 	      break;
155 	    }
156 	  ret += r;
157 	}
158       while (ret < total);
159       if (r < 0)
160 	ret = r;
161     }
162   return ret;
163 }
164 
165 
166 static int
open_socket(request_type type,const char * key,size_t keylen)167 open_socket (request_type type, const char *key, size_t keylen)
168 {
169   int sock;
170 
171   sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
172   if (sock < 0)
173     return -1;
174 
175   size_t real_sizeof_reqdata = sizeof (request_header) + keylen;
176   struct
177   {
178     request_header req;
179     char key[];
180   } *reqdata = alloca (real_sizeof_reqdata);
181 
182   struct sockaddr_un sun;
183   sun.sun_family = AF_UNIX;
184   strcpy (sun.sun_path, _PATH_NSCDSOCKET);
185   if (__connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0
186       && errno != EINPROGRESS)
187     goto out;
188 
189   reqdata->req.version = NSCD_VERSION;
190   reqdata->req.type = type;
191   reqdata->req.key_len = keylen;
192 
193   memcpy (reqdata->key, key, keylen);
194 
195   bool first_try = true;
196   struct __timespec64 tvend = { 0, 0 };
197   while (1)
198     {
199 #ifndef MSG_NOSIGNAL
200 # define MSG_NOSIGNAL 0
201 #endif
202       ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, reqdata,
203 						 real_sizeof_reqdata,
204 						 MSG_NOSIGNAL));
205       if (__glibc_likely (wres == (ssize_t) real_sizeof_reqdata))
206 	/* We managed to send the request.  */
207 	return sock;
208 
209       if (wres != -1 || errno != EAGAIN)
210 	/* Something is really wrong, no chance to continue.  */
211 	break;
212 
213       /* The daemon is busy wait for it.  */
214       int to;
215       struct __timespec64 now;
216       __clock_gettime64 (CLOCK_REALTIME, &now);
217       if (first_try)
218 	{
219 	  tvend.tv_nsec = now.tv_nsec;
220 	  tvend.tv_sec = now.tv_sec + 5;
221 	  to = 5 * 1000;
222 	  first_try = false;
223 	}
224       else
225 	to = ((tvend.tv_sec - now.tv_sec) * 1000
226 	      + (tvend.tv_nsec - now.tv_nsec) / 1000000);
227 
228       struct pollfd fds[1];
229       fds[0].fd = sock;
230       fds[0].events = POLLOUT | POLLERR | POLLHUP;
231       if (__poll (fds, 1, to) <= 0)
232 	/* The connection timed out or broke down.  */
233 	break;
234 
235       /* We try to write again.  */
236     }
237 
238  out:
239   __close_nocancel_nostatus (sock);
240 
241   return -1;
242 }
243 
244 
245 void
__nscd_unmap(struct mapped_database * mapped)246 __nscd_unmap (struct mapped_database *mapped)
247 {
248   assert (mapped->counter == 0);
249   __munmap ((void *) mapped->head, mapped->mapsize);
250   free (mapped);
251 }
252 
253 
254 /* Try to get a file descriptor for the shared meory segment
255    containing the database.  */
256 struct mapped_database *
__nscd_get_mapping(request_type type,const char * key,struct mapped_database ** mappedp)257 __nscd_get_mapping (request_type type, const char *key,
258 		    struct mapped_database **mappedp)
259 {
260   struct mapped_database *result = NO_MAPPING;
261 #ifdef SCM_RIGHTS
262   const size_t keylen = strlen (key) + 1;
263   int saved_errno = errno;
264 
265   int mapfd = -1;
266   char resdata[keylen];
267 
268   /* Open a socket and send the request.  */
269   int sock = open_socket (type, key, keylen);
270   if (sock < 0)
271     goto out;
272 
273   /* Room for the data sent along with the file descriptor.  We expect
274      the key name back.  */
275   uint64_t mapsize;
276   struct iovec iov[2];
277   iov[0].iov_base = resdata;
278   iov[0].iov_len = keylen;
279   iov[1].iov_base = &mapsize;
280   iov[1].iov_len = sizeof (mapsize);
281 
282   union
283   {
284     struct cmsghdr hdr;
285     char bytes[CMSG_SPACE (sizeof (int))];
286   } buf;
287   struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
288 			.msg_control = buf.bytes,
289 			.msg_controllen = sizeof (buf) };
290   struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
291 
292   cmsg->cmsg_level = SOL_SOCKET;
293   cmsg->cmsg_type = SCM_RIGHTS;
294   cmsg->cmsg_len = CMSG_LEN (sizeof (int));
295 
296   /* This access is well-aligned since BUF is correctly aligned for an
297      int and CMSG_DATA preserves this alignment.  */
298   memset (CMSG_DATA (cmsg), '\xff', sizeof (int));
299 
300   msg.msg_controllen = cmsg->cmsg_len;
301 
302   if (wait_on_socket (sock, 5 * 1000) <= 0)
303     goto out_close2;
304 
305 # ifndef MSG_CMSG_CLOEXEC
306 #  define MSG_CMSG_CLOEXEC 0
307 # endif
308   ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC));
309 
310   if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL
311 			|| (CMSG_FIRSTHDR (&msg)->cmsg_len
312 			    != CMSG_LEN (sizeof (int))), 0))
313     goto out_close2;
314 
315   int *ip = (void *) CMSG_DATA (cmsg);
316   mapfd = *ip;
317 
318   if (__glibc_unlikely (n != keylen && n != keylen + sizeof (mapsize)))
319     goto out_close;
320 
321   if (__glibc_unlikely (strcmp (resdata, key) != 0))
322     goto out_close;
323 
324   if (__glibc_unlikely (n == keylen))
325     {
326       struct __stat64_t64 st;
327       if (__glibc_unlikely (__fstat64_time64 (mapfd, &st) != 0)
328 	  || __builtin_expect (st.st_size < sizeof (struct database_pers_head),
329 			       0))
330 	goto out_close;
331 
332       mapsize = st.st_size;
333     }
334 
335   /* The file is large enough, map it now.  */
336   void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0);
337   if (__glibc_likely (mapping != MAP_FAILED))
338     {
339       /* Check whether the database is correct and up-to-date.  */
340       struct database_pers_head *head = mapping;
341 
342       if (__builtin_expect (head->version != DB_VERSION, 0)
343 	  || __builtin_expect (head->header_size != sizeof (*head), 0)
344 	  /* Catch some misconfiguration.  The server should catch
345 	     them now but some older versions did not.  */
346 	  || __builtin_expect (head->module == 0, 0)
347 	  /* This really should not happen but who knows, maybe the update
348 	     thread got stuck.  */
349 	  || __builtin_expect (! head->nscd_certainly_running
350 			       && (head->timestamp + MAPPING_TIMEOUT
351 				   < time_now ()), 0))
352 	{
353 	out_unmap:
354 	  __munmap (mapping, mapsize);
355 	  goto out_close;
356 	}
357 
358       size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t),
359 					       ALIGN)
360 		     + head->data_size);
361 
362       if (__glibc_unlikely (mapsize < size))
363 	goto out_unmap;
364 
365       /* Allocate a record for the mapping.  */
366       struct mapped_database *newp = malloc (sizeof (*newp));
367       if (newp == NULL)
368 	/* Ugh, after all we went through the memory allocation failed.  */
369 	goto out_unmap;
370 
371       newp->head = mapping;
372       newp->data = ((char *) mapping + head->header_size
373 		    + roundup (head->module * sizeof (ref_t), ALIGN));
374       newp->mapsize = size;
375       newp->datasize = head->data_size;
376       /* Set counter to 1 to show it is usable.  */
377       newp->counter = 1;
378 
379       result = newp;
380     }
381 
382  out_close:
383   __close (mapfd);
384  out_close2:
385   __close (sock);
386  out:
387   __set_errno (saved_errno);
388 #endif	/* SCM_RIGHTS */
389 
390   struct mapped_database *oldval = *mappedp;
391   *mappedp = result;
392 
393   if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0)
394     __nscd_unmap (oldval);
395 
396   return result;
397 }
398 
399 struct mapped_database *
__nscd_get_map_ref(request_type type,const char * name,volatile struct locked_map_ptr * mapptr,int * gc_cyclep)400 __nscd_get_map_ref (request_type type, const char *name,
401 		    volatile struct locked_map_ptr *mapptr, int *gc_cyclep)
402 {
403   struct mapped_database *cur = mapptr->mapped;
404   if (cur == NO_MAPPING)
405     return cur;
406 
407   if (!__nscd_acquire_maplock (mapptr))
408     return NO_MAPPING;
409 
410   cur = mapptr->mapped;
411 
412   if (__glibc_likely (cur != NO_MAPPING))
413     {
414       /* If not mapped or timestamp not updated, request new map.  */
415       if (cur == NULL
416 	  || (cur->head->nscd_certainly_running == 0
417 	      && cur->head->timestamp + MAPPING_TIMEOUT < time_now ())
418 	  || cur->head->data_size > cur->datasize)
419 	cur = __nscd_get_mapping (type, name,
420 				  (struct mapped_database **) &mapptr->mapped);
421 
422       if (__glibc_likely (cur != NO_MAPPING))
423 	{
424 	  if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0,
425 				0))
426 	    cur = NO_MAPPING;
427 	  else
428 	    atomic_increment (&cur->counter);
429 	}
430     }
431 
432   mapptr->lock = 0;
433 
434   return cur;
435 }
436 
437 
438 /* Using sizeof (hashentry) is not always correct to determine the size of
439    the data structure as found in the nscd cache.  The program could be
440    a 64-bit process and nscd could be a 32-bit process.  In this case
441    sizeof (hashentry) would overestimate the size.  The following is
442    the minimum size of such an entry, good enough for our tests here.  */
443 #define MINIMUM_HASHENTRY_SIZE \
444   (offsetof (struct hashentry, dellist) + sizeof (int32_t))
445 
446 /* Don't return const struct datahead *, as eventhough the record
447    is normally constant, it can change arbitrarily during nscd
448    garbage collection.  */
449 struct datahead *
__nscd_cache_search(request_type type,const char * key,size_t keylen,const struct mapped_database * mapped,size_t datalen)450 __nscd_cache_search (request_type type, const char *key, size_t keylen,
451 		     const struct mapped_database *mapped, size_t datalen)
452 {
453   unsigned long int hash = __nss_hash (key, keylen) % mapped->head->module;
454   size_t datasize = mapped->datasize;
455 
456   ref_t trail = mapped->head->array[hash];
457   trail = atomic_forced_read (trail);
458   ref_t work = trail;
459   size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
460 				+ offsetof (struct datahead, data) / 2);
461   int tick = 0;
462 
463   while (work != ENDREF && work + MINIMUM_HASHENTRY_SIZE <= datasize)
464     {
465       struct hashentry *here = (struct hashentry *) (mapped->data + work);
466       ref_t here_key, here_packet;
467 
468 #if !_STRING_ARCH_unaligned
469       /* Although during garbage collection when moving struct hashentry
470 	 records around we first copy from old to new location and then
471 	 adjust pointer from previous hashentry to it, there is no barrier
472 	 between those memory writes.  It is very unlikely to hit it,
473 	 so check alignment only if a misaligned load can crash the
474 	 application.  */
475       if ((uintptr_t) here & (__alignof__ (*here) - 1))
476 	return NULL;
477 #endif
478 
479       if (type == here->type
480 	  && keylen == here->len
481 	  && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
482 	  && memcmp (key, mapped->data + here_key, keylen) == 0
483 	  && ((here_packet = atomic_forced_read (here->packet))
484 	      + sizeof (struct datahead) <= datasize))
485 	{
486 	  /* We found the entry.  Increment the appropriate counter.  */
487 	  struct datahead *dh
488 	    = (struct datahead *) (mapped->data + here_packet);
489 
490 #if !_STRING_ARCH_unaligned
491 	  if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
492 	    return NULL;
493 #endif
494 
495 	  /* See whether we must ignore the entry or whether something
496 	     is wrong because garbage collection is in progress.  */
497 	  if (dh->usable
498 	      && here_packet + dh->allocsize <= datasize
499 	      && (here_packet + offsetof (struct datahead, data) + datalen
500 		  <= datasize))
501 	    return dh;
502 	}
503 
504       work = atomic_forced_read (here->next);
505       /* Prevent endless loops.  This should never happen but perhaps
506 	 the database got corrupted, accidentally or deliberately.  */
507       if (work == trail || loop_cnt-- == 0)
508 	break;
509       if (tick)
510 	{
511 	  struct hashentry *trailelem;
512 	  trailelem = (struct hashentry *) (mapped->data + trail);
513 
514 #if !_STRING_ARCH_unaligned
515 	  /* We have to redo the checks.  Maybe the data changed.  */
516 	  if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
517 	    return NULL;
518 #endif
519 
520 	  if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
521 	    return NULL;
522 
523 	  trail = atomic_forced_read (trailelem->next);
524 	}
525       tick = 1 - tick;
526     }
527 
528   return NULL;
529 }
530 
531 
532 /* Create a socket connected to a name. */
533 int
__nscd_open_socket(const char * key,size_t keylen,request_type type,void * response,size_t responselen)534 __nscd_open_socket (const char *key, size_t keylen, request_type type,
535 		    void *response, size_t responselen)
536 {
537   /* This should never happen and it is something the nscd daemon
538      enforces, too.  He it helps to limit the amount of stack
539      used.  */
540   if (keylen > MAXKEYLEN)
541     return -1;
542 
543   int saved_errno = errno;
544 
545   int sock = open_socket (type, key, keylen);
546   if (sock >= 0)
547     {
548       /* Wait for data.  */
549       if (wait_on_socket (sock, 5 * 1000) > 0)
550 	{
551 	  ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response,
552 						       responselen));
553 	  if (nbytes == (ssize_t) responselen)
554 	    return sock;
555 	}
556 
557       __close_nocancel_nostatus (sock);
558     }
559 
560   __set_errno (saved_errno);
561 
562   return -1;
563 }
564