1 /* Code to load locale data from the locale archive file.
2    Copyright (C) 2002-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <locale.h>
20 #include <stddef.h>
21 #include <stdlib.h>
22 #include <stdbool.h>
23 #include <errno.h>
24 #include <assert.h>
25 #include <string.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <stdint.h>
29 #include <sys/mman.h>
30 #include <sys/stat.h>
31 #include <sys/param.h>
32 
33 #include "localeinfo.h"
34 #include "locarchive.h"
35 #include <not-cancel.h>
36 
37 /* Define the hash function.  We define the function as static inline.  */
38 #define compute_hashval static inline compute_hashval
39 #define hashval_t uint32_t
40 #include "hashval.h"
41 #undef compute_hashval
42 
43 
44 /* Name of the locale archive file.  */
45 static const char archfname[] = COMPLOCALEDIR "/locale-archive";
46 
47 /* Size of initial mapping window, optimal if large enough to
48    cover the header plus the initial locale.  */
49 #define ARCHIVE_MAPPING_WINDOW	(2 * 1024 * 1024)
50 
51 #ifndef MAP_COPY
52 /* This is not quite as good as MAP_COPY since unexamined pages
53    can change out from under us and give us inconsistent data.
54    But we rely on the user not to diddle the system's live archive.
55    Even though we only ever use PROT_READ, using MAP_SHARED would
56    not give the system sufficient freedom to e.g. let the on disk
57    file go away because it doesn't know we won't call mprotect later.  */
58 # define MAP_COPY MAP_PRIVATE
59 #endif
60 #ifndef MAP_FILE
61  /* Some systems do not have this flag; it is superfluous.  */
62 # define MAP_FILE 0
63 #endif
64 
65 /* Record of contiguous pages already mapped from the locale archive.  */
66 struct archmapped
67 {
68   void *ptr;
69   uint32_t from;
70   uint32_t len;
71   struct archmapped *next;
72 };
73 static struct archmapped *archmapped;
74 
75 /* This describes the mapping at the beginning of the file that contains
76    the header data.  There could be data in the following partial page,
77    so this is searched like any other.  Once the archive has been used,
78    ARCHMAPPED points to this; if mapping the archive header failed,
79    then headmap.ptr is null.  */
80 static struct archmapped headmap;
81 static struct __stat64_t64 archive_stat; /* stat of archive when header mapped.  */
82 
83 /* Record of locales that we have already loaded from the archive.  */
84 struct locale_in_archive
85 {
86   struct locale_in_archive *next;
87   char *name;
88   struct __locale_data *data[__LC_LAST];
89 };
90 static struct locale_in_archive *archloaded;
91 
92 
93 /* Local structure and subroutine of _nl_load_archive, see below.  */
94 struct range
95 {
96   uint32_t from;
97   uint32_t len;
98   int category;
99   void *result;
100 };
101 
102 static int
rangecmp(const void * p1,const void * p2)103 rangecmp (const void *p1, const void *p2)
104 {
105   return ((struct range *) p1)->from - ((struct range *) p2)->from;
106 }
107 
108 
109 /* Calculate the amount of space needed for all the tables described
110    by the given header.  Note we do not include the empty table space
111    that has been preallocated in the file, so our mapping may not be
112    large enough if localedef adds data to the file in place.  However,
113    doing that would permute the header fields while we are accessing
114    them and thus not be safe anyway, so we don't allow for that.  */
115 static inline off_t
calculate_head_size(const struct locarhead * h)116 calculate_head_size (const struct locarhead *h)
117 {
118   off_t namehash_end = (h->namehash_offset
119 			+ h->namehash_size * sizeof (struct namehashent));
120   off_t string_end =  h->string_offset + h->string_used;
121   off_t locrectab_end = (h->locrectab_offset
122 			 + h->locrectab_used * sizeof (struct locrecent));
123   return MAX (namehash_end, MAX (string_end, locrectab_end));
124 }
125 
126 
127 /* Find the locale *NAMEP in the locale archive, and return the
128    internalized data structure for its CATEGORY data.  If this locale has
129    already been loaded from the archive, just returns the existing data
130    structure.  If successful, sets *NAMEP to point directly into the mapped
131    archive string table; that way, the next call can short-circuit strcmp.  */
132 struct __locale_data *
_nl_load_locale_from_archive(int category,const char ** namep)133 _nl_load_locale_from_archive (int category, const char **namep)
134 {
135   const char *name = *namep;
136   struct
137   {
138     void *addr;
139     size_t len;
140   } results[__LC_LAST];
141   struct locale_in_archive *lia;
142   struct locarhead *head;
143   struct namehashent *namehashtab;
144   struct locrecent *locrec;
145   struct archmapped *mapped;
146   struct archmapped *last;
147   unsigned long int hval;
148   size_t idx;
149   size_t incr;
150   struct range ranges[__LC_LAST - 1];
151   int nranges;
152   int cnt;
153   size_t ps = __sysconf (_SC_PAGE_SIZE);
154   int fd = -1;
155 
156   /* Check if we have already loaded this locale from the archive.
157      If we previously loaded the locale but found bogons in the data,
158      then we will have stored a null pointer to return here.  */
159   for (lia = archloaded; lia != NULL; lia = lia->next)
160     if (name == lia->name || !strcmp (name, lia->name))
161       {
162 	*namep = lia->name;
163 	return lia->data[category];
164       }
165 
166   {
167     /* If the name contains a codeset, then we normalize the name before
168        doing the lookup.  */
169     const char *p = strchr (name, '.');
170     if (p != NULL && p[1] != '@' && p[1] != '\0')
171       {
172 	const char *rest = __strchrnul (++p, '@');
173 	const char *normalized_codeset = _nl_normalize_codeset (p, rest - p);
174 	if (normalized_codeset == NULL)	/* malloc failure */
175 	  return NULL;
176 	if (strncmp (normalized_codeset, p, rest - p) != 0
177 	    || normalized_codeset[rest - p] != '\0')
178 	  {
179 	    /* There is a normalized codeset name that is different from
180 	       what was specified; reconstruct a new locale name using it.  */
181 	    size_t normlen = strlen (normalized_codeset);
182 	    size_t restlen = strlen (rest) + 1;
183 	    char *newname = alloca (p - name + normlen + restlen);
184 	    memcpy (__mempcpy (__mempcpy (newname, name, p - name),
185 			       normalized_codeset, normlen),
186 		    rest, restlen);
187 	    name = newname;
188 	  }
189 	free ((char *) normalized_codeset);
190       }
191   }
192 
193   /* Make sure the archive is loaded.  */
194   if (archmapped == NULL)
195     {
196       void *result;
197       size_t headsize, mapsize;
198 
199       /* We do this early as a sign that we have tried to open the archive.
200 	 If headmap.ptr remains null, that's an indication that we tried
201 	 and failed, so we won't try again.  */
202       archmapped = &headmap;
203 
204       /* The archive has never been opened.  */
205       fd = __open_nocancel (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC);
206       if (fd < 0)
207 	/* Cannot open the archive, for whatever reason.  */
208 	return NULL;
209 
210       if (__fstat64_time64 (fd, &archive_stat) == -1)
211 	{
212 	  /* stat failed, very strange.  */
213 	close_and_out:
214 	  if (fd >= 0)
215 	    __close_nocancel_nostatus (fd);
216 	  return NULL;
217 	}
218 
219 
220       /* Map an initial window probably large enough to cover the header
221 	 and the first locale's data.  With a large address space, we can
222 	 just map the whole file and be sure everything is covered.  */
223 
224       mapsize = (sizeof (void *) > 4 ? archive_stat.st_size
225 		 : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW));
226 
227       result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
228       if (result == MAP_FAILED)
229 	goto close_and_out;
230 
231       /* Check whether the file is large enough for the sizes given in
232 	 the header.  Theoretically an archive could be so large that
233 	 just the header fails to fit in our initial mapping window.  */
234       headsize = calculate_head_size ((const struct locarhead *) result);
235       if (headsize > mapsize)
236 	{
237 	  (void) __munmap (result, mapsize);
238 	  if (sizeof (void *) > 4 || headsize > archive_stat.st_size)
239 	    /* The file is not big enough for the header.  Bogus.  */
240 	    goto close_and_out;
241 
242 	  /* Freakishly long header.  */
243 	  /* XXX could use mremap when available */
244 	  mapsize = (headsize + ps - 1) & ~(ps - 1);
245 	  result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY,
246 			     fd, 0);
247 	  if (result == MAP_FAILED)
248 	    goto close_and_out;
249 	}
250 
251       if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size)
252 	{
253 	  /* We've mapped the whole file already, so we can be
254 	     sure we won't need this file descriptor later.  */
255 	  __close_nocancel_nostatus (fd);
256 	  fd = -1;
257 	}
258 
259       headmap.ptr = result;
260       /* headmap.from already initialized to zero.  */
261       headmap.len = mapsize;
262     }
263 
264   /* If there is no archive or it cannot be loaded for some reason fail.  */
265   if (__glibc_unlikely (headmap.ptr == NULL))
266     goto close_and_out;
267 
268   /* We have the archive available.  To find the name we first have to
269      determine its hash value.  */
270   hval = compute_hashval (name, strlen (name));
271 
272   head = headmap.ptr;
273   namehashtab = (struct namehashent *) ((char *) head
274 					+ head->namehash_offset);
275 
276   /* Avoid division by 0 if the file is corrupted.  */
277   if (__glibc_unlikely (head->namehash_size <= 2))
278     goto close_and_out;
279 
280   idx = hval % head->namehash_size;
281   incr = 1 + hval % (head->namehash_size - 2);
282 
283   /* If the name_offset field is zero this means this is a
284      deleted entry and therefore no entry can be found.  */
285   while (1)
286     {
287       if (namehashtab[idx].name_offset == 0)
288 	/* Not found.  */
289 	goto close_and_out;
290 
291       if (namehashtab[idx].hashval == hval
292 	  && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0)
293 	/* Found the entry.  */
294 	break;
295 
296       idx += incr;
297       if (idx >= head->namehash_size)
298 	idx -= head->namehash_size;
299     }
300 
301   /* We found an entry.  It might be a placeholder for a removed one.  */
302   if (namehashtab[idx].locrec_offset == 0)
303     goto close_and_out;
304 
305   locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset);
306 
307   if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */)
308     {
309       /* We already have the whole locale archive mapped in.  */
310       assert (headmap.len == archive_stat.st_size);
311       for (cnt = 0; cnt < __LC_LAST; ++cnt)
312 	if (cnt != LC_ALL)
313 	  {
314 	    if (locrec->record[cnt].offset + locrec->record[cnt].len
315 		> headmap.len)
316 	      /* The archive locrectab contains bogus offsets.  */
317 	      goto close_and_out;
318 	    results[cnt].addr = headmap.ptr + locrec->record[cnt].offset;
319 	    results[cnt].len = locrec->record[cnt].len;
320 	  }
321     }
322   else
323     {
324       /* Get the offsets of the data files and sort them.  */
325       for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt)
326 	if (cnt != LC_ALL)
327 	  {
328 	    ranges[nranges].from = locrec->record[cnt].offset;
329 	    ranges[nranges].len = locrec->record[cnt].len;
330 	    ranges[nranges].category = cnt;
331 	    ranges[nranges].result = NULL;
332 
333 	    ++nranges;
334 	  }
335 
336       qsort (ranges, nranges, sizeof (ranges[0]), rangecmp);
337 
338       /* The information about mmap'd blocks is kept in a list.
339 	 Skip over the blocks which are before the data we need.  */
340       last = mapped = archmapped;
341       for (cnt = 0; cnt < nranges; ++cnt)
342 	{
343 	  int upper;
344 	  size_t from;
345 	  size_t to;
346 	  void *addr;
347 	  struct archmapped *newp;
348 
349 	  /* Determine whether the appropriate page is already mapped.  */
350 	  while (mapped != NULL
351 		 && (mapped->from + mapped->len
352 		     <= ranges[cnt].from + ranges[cnt].len))
353 	    {
354 	      last = mapped;
355 	      mapped = mapped->next;
356 	    }
357 
358 	  /* Do we have a match?  */
359 	  if (mapped != NULL
360 	      && mapped->from <= ranges[cnt].from
361 	      && (ranges[cnt].from + ranges[cnt].len
362 		  <= mapped->from + mapped->len))
363 	    {
364 	      /* Yep, already loaded.  */
365 	      results[ranges[cnt].category].addr = ((char *) mapped->ptr
366 						    + ranges[cnt].from
367 						    - mapped->from);
368 	      results[ranges[cnt].category].len = ranges[cnt].len;
369 	      continue;
370 	    }
371 
372 	  /* Map the range with the locale data from the file.  We will
373 	     try to cover as much of the locale as possible.  I.e., if the
374 	     next category (next as in "next offset") is on the current or
375 	     immediately following page we use it as well.  */
376 	  assert (powerof2 (ps));
377 	  from = ranges[cnt].from & ~(ps - 1);
378 	  upper = cnt;
379 	  do
380 	    {
381 	      to = ranges[upper].from + ranges[upper].len;
382 	      if (to > (size_t) archive_stat.st_size)
383 		/* The archive locrectab contains bogus offsets.  */
384 		goto close_and_out;
385 	      to = (to + ps - 1) & ~(ps - 1);
386 
387 	      /* If a range is already mmaped in, stop.	 */
388 	      if (mapped != NULL && ranges[upper].from >= mapped->from)
389 		break;
390 
391 	      ++upper;
392 	    }
393 	  /* Loop while still in contiguous pages. */
394 	  while (upper < nranges && ranges[upper].from < to + ps);
395 
396 	  /* Open the file if it hasn't happened yet.  */
397 	  if (fd == -1)
398 	    {
399 	      struct __stat64_t64 st;
400 	      fd = __open_nocancel (archfname,
401 				    O_RDONLY|O_LARGEFILE|O_CLOEXEC);
402 	      if (fd == -1)
403 		/* Cannot open the archive, for whatever reason.  */
404 		return NULL;
405 	      /* Now verify we think this is really the same archive file
406 		 we opened before.  If it has been changed we cannot trust
407 		 the header we read previously.  */
408 	      if (__fstat64_time64 (fd, &st) < 0
409 		  || st.st_size != archive_stat.st_size
410 		  || st.st_mtime != archive_stat.st_mtime
411 		  || st.st_dev != archive_stat.st_dev
412 		  || st.st_ino != archive_stat.st_ino)
413 		goto close_and_out;
414 	    }
415 
416 	  /* Map the range from the archive.  */
417 	  addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY,
418 			   fd, from);
419 	  if (addr == MAP_FAILED)
420 	    goto close_and_out;
421 
422 	  /* Allocate a record for this mapping.  */
423 	  newp = (struct archmapped *) malloc (sizeof (struct archmapped));
424 	  if (newp == NULL)
425 	    {
426 	      (void) __munmap (addr, to - from);
427 	      goto close_and_out;
428 	    }
429 
430 	  /* And queue it.  */
431 	  newp->ptr = addr;
432 	  newp->from = from;
433 	  newp->len = to - from;
434 	  assert (last->next == mapped);
435 	  newp->next = mapped;
436 	  last->next = newp;
437 	  last = newp;
438 
439 	  /* Determine the load addresses for the category data.  */
440 	  do
441 	    {
442 	      assert (ranges[cnt].from >= from);
443 	      results[ranges[cnt].category].addr = ((char *) addr
444 						    + ranges[cnt].from - from);
445 	      results[ranges[cnt].category].len = ranges[cnt].len;
446 	    }
447 	  while (++cnt < upper);
448 	  --cnt;		/* The 'for' will increase 'cnt' again.  */
449 	}
450     }
451 
452   /* We don't need the file descriptor any longer.  */
453   if (fd >= 0)
454     __close_nocancel_nostatus (fd);
455   fd = -1;
456 
457   /* We succeeded in mapping all the necessary regions of the archive.
458      Now we need the expected data structures to point into the data.  */
459 
460   lia = malloc (sizeof *lia);
461   if (__glibc_unlikely (lia == NULL))
462     return NULL;
463 
464   lia->name = __strdup (*namep);
465   if (__glibc_unlikely (lia->name == NULL))
466     {
467       free (lia);
468       return NULL;
469     }
470 
471   lia->next = archloaded;
472   archloaded = lia;
473 
474   for (cnt = 0; cnt < __LC_LAST; ++cnt)
475     if (cnt != LC_ALL)
476       {
477 	lia->data[cnt] = _nl_intern_locale_data (cnt,
478 						 results[cnt].addr,
479 						 results[cnt].len);
480 	if (__glibc_likely (lia->data[cnt] != NULL))
481 	  {
482 	    /* _nl_intern_locale_data leaves us these fields to initialize.  */
483 	    lia->data[cnt]->alloc = ld_archive;
484 	    lia->data[cnt]->name = lia->name;
485 
486 	    /* We do this instead of bumping the count each time we return
487 	       this data because the mappings stay around forever anyway
488 	       and we might as well hold on to a little more memory and not
489 	       have to rebuild it on the next lookup of the same thing.
490 	       If we were to maintain the usage_count normally and let the
491 	       structures be freed, we would have to remove the elements
492 	       from archloaded too.  */
493 	    lia->data[cnt]->usage_count = UNDELETABLE;
494 	  }
495       }
496 
497   *namep = lia->name;
498   return lia->data[category];
499 }
500 
501 void __libc_freeres_fn_section
_nl_archive_subfreeres(void)502 _nl_archive_subfreeres (void)
503 {
504   struct locale_in_archive *lia;
505   struct archmapped *am;
506 
507   /* Toss out our cached locales.  */
508   lia = archloaded;
509   while (lia != NULL)
510     {
511       int category;
512       struct locale_in_archive *dead = lia;
513       lia = lia->next;
514 
515       free (dead->name);
516       for (category = 0; category < __LC_LAST; ++category)
517 	if (category != LC_ALL && dead->data[category] != NULL)
518 	  _nl_unload_locale (category, dead->data[category]);
519       free (dead);
520     }
521   archloaded = NULL;
522 
523   if (archmapped != NULL)
524     {
525       /* Now toss all the mapping windows, which we know nothing is using any
526 	 more because we just tossed all the locales that point into them.  */
527 
528       assert (archmapped == &headmap);
529       archmapped = NULL;
530       (void) __munmap (headmap.ptr, headmap.len);
531       am = headmap.next;
532       while (am != NULL)
533 	{
534 	  struct archmapped *dead = am;
535 	  am = am->next;
536 	  (void) __munmap (dead->ptr, dead->len);
537 	  free (dead);
538 	}
539     }
540 }
541