1 /* Copyright (C) 1995-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published
6    by the Free Software Foundation; version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
16 
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 
21 #include <alloca.h>
22 #include <byteswap.h>
23 #include <endian.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <obstack.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <wctype.h>
31 #include <stdint.h>
32 #include <sys/uio.h>
33 
34 #include "localedef.h"
35 #include "charmap.h"
36 #include "localeinfo.h"
37 #include "langinfo.h"
38 #include "linereader.h"
39 #include "locfile-token.h"
40 #include "locfile.h"
41 
42 #include <assert.h>
43 
44 
45 /* The bit used for representing a special class.  */
46 #define BITPOS(class) ((class) - tok_upper)
47 #define BIT(class) (_ISbit (BITPOS (class)))
48 #define BITw(class) (_ISwbit (BITPOS (class)))
49 
50 #define ELEM(ctype, collection, idx, value)				      \
51   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
52 	     &ctype->collection##_act idx, value)
53 
54 
55 /* To be compatible with former implementations we for now restrict
56    the number of bits for character classes to 16.  When compatibility
57    is not necessary anymore increase the number to 32.  */
58 #define char_class_t uint16_t
59 #define char_class32_t uint32_t
60 
61 
62 /* Type to describe a transliteration action.  We have a possibly
63    multiple character from-string and a set of multiple character
64    to-strings.  All are 32bit values since this is what is used in
65    the gconv functions.  */
66 struct translit_to_t
67 {
68   uint32_t *str;
69 
70   struct translit_to_t *next;
71 };
72 
73 struct translit_t
74 {
75   uint32_t *from;
76 
77   const char *fname;
78   size_t lineno;
79 
80   struct translit_to_t *to;
81 
82   struct translit_t *next;
83 };
84 
85 struct translit_ignore_t
86 {
87   uint32_t from;
88   uint32_t to;
89   uint32_t step;
90 
91   const char *fname;
92   size_t lineno;
93 
94   struct translit_ignore_t *next;
95 };
96 
97 
98 /* Type to describe a transliteration include statement.  */
99 struct translit_include_t
100 {
101   const char *copy_locale;
102   const char *copy_repertoire;
103 
104   struct translit_include_t *next;
105 };
106 
107 /* Provide some dummy pointer for empty string.  */
108 static uint32_t no_str[] = { 0 };
109 
110 
111 /* Sparse table of uint32_t.  */
112 #define TABLE idx_table
113 #define ELEMENT uint32_t
114 #define DEFAULT ((uint32_t) ~0)
115 #define NO_ADD_LOCALE
116 #include "3level.h"
117 
118 #define TABLE wcwidth_table
119 #define ELEMENT uint8_t
120 #define DEFAULT 0xff
121 #include "3level.h"
122 
123 #define TABLE wctrans_table
124 #define ELEMENT int32_t
125 #define DEFAULT 0
126 #define wctrans_table_add wctrans_table_add_internal
127 #include "3level.h"
128 #undef wctrans_table_add
129 /* The wctrans_table must actually store the difference between the
130    desired result and the argument.  */
131 static inline void
wctrans_table_add(struct wctrans_table * t,uint32_t wc,uint32_t mapped_wc)132 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
133 {
134   wctrans_table_add_internal (t, wc, mapped_wc - wc);
135 }
136 
137 /* Construction of sparse 3-level tables.
138    See wchar-lookup.h for their structure and the meaning of p and q.  */
139 
140 struct wctype_table
141 {
142   /* Parameters.  */
143   unsigned int p;
144   unsigned int q;
145   /* Working representation.  */
146   size_t level1_alloc;
147   size_t level1_size;
148   uint32_t *level1;
149   size_t level2_alloc;
150   size_t level2_size;
151   uint32_t *level2;
152   size_t level3_alloc;
153   size_t level3_size;
154   uint32_t *level3;
155   size_t result_size;
156 };
157 
158 static void add_locale_wctype_table (struct locale_file *file,
159 				     struct wctype_table *t);
160 
161 /* The real definition of the struct for the LC_CTYPE locale.  */
162 struct locale_ctype_t
163 {
164   uint32_t *charnames;
165   size_t charnames_max;
166   size_t charnames_act;
167   /* An index lookup table, to speedup find_idx.  */
168   struct idx_table charnames_idx;
169 
170   struct repertoire_t *repertoire;
171 
172   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
173 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
174   size_t nr_charclass;
175   const char *classnames[MAX_NR_CHARCLASS];
176   uint32_t last_class_char;
177   uint32_t class256_collection[256];
178   uint32_t *class_collection;
179   size_t class_collection_max;
180   size_t class_collection_act;
181   uint32_t class_done;
182   uint32_t class_offset;
183 
184   struct charseq **mbdigits;
185   size_t mbdigits_act;
186   size_t mbdigits_max;
187   uint32_t *wcdigits;
188   size_t wcdigits_act;
189   size_t wcdigits_max;
190 
191   struct charseq *mboutdigits[10];
192   uint32_t wcoutdigits[10];
193   size_t outdigits_act;
194 
195   /* If the following number ever turns out to be too small simply
196      increase it.  But I doubt it will.  --drepper@gnu */
197 #define MAX_NR_CHARMAP 16
198   const char *mapnames[MAX_NR_CHARMAP];
199   uint32_t *map_collection[MAX_NR_CHARMAP];
200   uint32_t map256_collection[2][256];
201   size_t map_collection_max[MAX_NR_CHARMAP];
202   size_t map_collection_act[MAX_NR_CHARMAP];
203   size_t map_collection_nr;
204   size_t last_map_idx;
205   int tomap_done[MAX_NR_CHARMAP];
206   uint32_t map_offset;
207 
208   /* Transliteration information.  */
209   struct translit_include_t *translit_include;
210   struct translit_t *translit;
211   struct translit_ignore_t *translit_ignore;
212   uint32_t ntranslit_ignore;
213 
214   uint32_t *default_missing;
215   const char *default_missing_file;
216   size_t default_missing_lineno;
217 
218   uint32_t to_nonascii;
219   uint32_t nonascii_case;
220 
221   /* The arrays for the binary representation.  */
222   char_class_t *ctype_b;
223   char_class32_t *ctype32_b;
224   uint32_t **map_b;
225   uint32_t **map32_b;
226   uint32_t **class_b;
227   struct wctype_table *class_3level;
228   struct wctrans_table *map_3level;
229   uint32_t *class_name_ptr;
230   uint32_t *map_name_ptr;
231   struct wcwidth_table width;
232   uint32_t mb_cur_max;
233   const char *codeset_name;
234   uint32_t *translit_from_idx;
235   uint32_t *translit_from_tbl;
236   uint32_t *translit_to_idx;
237   uint32_t *translit_to_tbl;
238   uint32_t translit_idx_size;
239   size_t translit_from_tbl_size;
240   size_t translit_to_tbl_size;
241 
242   struct obstack mempool;
243 };
244 
245 
246 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
247    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
248 #define EMPTY ((uint32_t) ~0)
249 
250 
251 #define obstack_chunk_alloc xmalloc
252 #define obstack_chunk_free free
253 
254 
255 /* Prototypes for local functions.  */
256 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
257 			   const struct charmap_t *charmap,
258 			   struct localedef_t *copy_locale,
259 			   int ignore_content);
260 static void ctype_class_new (struct linereader *lr,
261 			     struct locale_ctype_t *ctype, const char *name);
262 static void ctype_map_new (struct linereader *lr,
263 			   struct locale_ctype_t *ctype,
264 			   const char *name, const struct charmap_t *charmap);
265 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
266 			   size_t *max, size_t *act, uint32_t idx);
267 static void set_class_defaults (struct locale_ctype_t *ctype,
268 				const struct charmap_t *charmap,
269 				struct repertoire_t *repertoire);
270 static void allocate_arrays (struct locale_ctype_t *ctype,
271 			     const struct charmap_t *charmap,
272 			     struct repertoire_t *repertoire);
273 
274 
275 static const char *longnames[] =
276 {
277   "zero", "one", "two", "three", "four",
278   "five", "six", "seven", "eight", "nine"
279 };
280 static const char *uninames[] =
281 {
282   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
283   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
284 };
285 static const unsigned char digits[] = "0123456789";
286 
287 
288 static void
ctype_startup(struct linereader * lr,struct localedef_t * locale,const struct charmap_t * charmap,struct localedef_t * copy_locale,int ignore_content)289 ctype_startup (struct linereader *lr, struct localedef_t *locale,
290 	       const struct charmap_t *charmap,
291 	       struct localedef_t *copy_locale, int ignore_content)
292 {
293   unsigned int cnt;
294   struct locale_ctype_t *ctype;
295 
296   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
297     {
298       if (copy_locale == NULL)
299 	{
300 	  /* Allocate the needed room.  */
301 	  locale->categories[LC_CTYPE].ctype = ctype =
302 	    (struct locale_ctype_t *) xcalloc (1,
303 					       sizeof (struct locale_ctype_t));
304 
305 	  /* We have seen no names yet.  */
306 	  ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
307 	  ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
308 						   * sizeof (uint32_t));
309 	  for (cnt = 0; cnt < 256; ++cnt)
310 	    ctype->charnames[cnt] = cnt;
311 	  ctype->charnames_act = 256;
312 	  idx_table_init (&ctype->charnames_idx);
313 
314 	  /* Fill character class information.  */
315 	  ctype->last_class_char = ILLEGAL_CHAR_VALUE;
316 	  /* The order of the following instructions determines the bit
317 	     positions!  */
318 	  ctype_class_new (lr, ctype, "upper");
319 	  ctype_class_new (lr, ctype, "lower");
320 	  ctype_class_new (lr, ctype, "alpha");
321 	  ctype_class_new (lr, ctype, "digit");
322 	  ctype_class_new (lr, ctype, "xdigit");
323 	  ctype_class_new (lr, ctype, "space");
324 	  ctype_class_new (lr, ctype, "print");
325 	  ctype_class_new (lr, ctype, "graph");
326 	  ctype_class_new (lr, ctype, "blank");
327 	  ctype_class_new (lr, ctype, "cntrl");
328 	  ctype_class_new (lr, ctype, "punct");
329 	  ctype_class_new (lr, ctype, "alnum");
330 
331 	  ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
332 	  ctype->class_collection
333 	    = (uint32_t *) xcalloc (sizeof (unsigned long int),
334 				    ctype->class_collection_max);
335 	  ctype->class_collection_act = 256;
336 
337 	  /* Fill character map information.  */
338 	  ctype->last_map_idx = MAX_NR_CHARMAP;
339 	  ctype_map_new (lr, ctype, "toupper", charmap);
340 	  ctype_map_new (lr, ctype, "tolower", charmap);
341 
342 	  /* Fill first 256 entries in `toXXX' arrays.  */
343 	  for (cnt = 0; cnt < 256; ++cnt)
344 	    {
345 	      ctype->map_collection[0][cnt] = cnt;
346 	      ctype->map_collection[1][cnt] = cnt;
347 
348 	      ctype->map256_collection[0][cnt] = cnt;
349 	      ctype->map256_collection[1][cnt] = cnt;
350 	    }
351 
352 	  if (enc_not_ascii_compatible)
353 	    ctype->to_nonascii = 1;
354 
355 	  obstack_init (&ctype->mempool);
356 	}
357       else
358 	ctype = locale->categories[LC_CTYPE].ctype =
359 	  copy_locale->categories[LC_CTYPE].ctype;
360     }
361 }
362 
363 
364 void
ctype_finish(struct localedef_t * locale,const struct charmap_t * charmap)365 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
366 {
367   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
368 #define NCLASS 12
369   static const struct
370   {
371     const char *name;
372     const char allow[NCLASS];
373   }
374   valid_table[NCLASS] =
375   {
376     /* The order is important.  See token.h for more information.
377        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
378     { "upper",  "--MX-XDDXXX-" },
379     { "lower",  "--MX-XDDXXX-" },
380     { "alpha",  "---X-XDDXXX-" },
381     { "digit",  "XXX--XDDXXX-" },
382     { "xdigit", "-----XDDXXX-" },
383     { "space",  "XXXXX------X" },
384     { "print",  "---------X--" },
385     { "graph",  "---------X--" },
386     { "blank",  "XXXXXM-----X" },
387     { "cntrl",  "XXXXX-XX--XX" },
388     { "punct",  "XXXXX-DD-X-X" },
389     { "alnum",  "-----XDDXXX-" }
390   };
391   size_t cnt;
392   int cls1, cls2;
393   uint32_t space_value;
394   struct charseq *space_seq;
395   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
396   int warned;
397   const void *key;
398   size_t len;
399   void *vdata;
400   void *curs;
401 
402   /* Now resolve copying and also handle completely missing definitions.  */
403   if (ctype == NULL)
404     {
405       const char *repertoire_name;
406 
407       /* First see whether we were supposed to copy.  If yes, find the
408 	 actual definition.  */
409       if (locale->copy_name[LC_CTYPE] != NULL)
410 	{
411 	  /* Find the copying locale.  This has to happen transitively since
412 	     the locale we are copying from might also copying another one.  */
413 	  struct localedef_t *from = locale;
414 
415 	  do
416 	    from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
417 				from->repertoire_name, charmap);
418 	  while (from->categories[LC_CTYPE].ctype == NULL
419 		 && from->copy_name[LC_CTYPE] != NULL);
420 
421 	  ctype = locale->categories[LC_CTYPE].ctype
422 	    = from->categories[LC_CTYPE].ctype;
423 	}
424 
425       /* If there is still no definition issue an warning and create an
426 	 empty one.  */
427       if (ctype == NULL)
428 	{
429 	  record_warning (_("\
430 No definition for %s category found"), "LC_CTYPE");
431 	  ctype_startup (NULL, locale, charmap, NULL, 0);
432 	  ctype = locale->categories[LC_CTYPE].ctype;
433 	}
434 
435       /* Get the repertoire we have to use.  */
436       repertoire_name = locale->repertoire_name ?: repertoire_global;
437       if (repertoire_name != NULL)
438 	ctype->repertoire = repertoire_read (repertoire_name);
439     }
440 
441   /* We need the name of the currently used 8-bit character set to
442      make correct conversion between this 8-bit representation and the
443      ISO 10646 character set used internally for wide characters.  */
444   ctype->codeset_name = charmap->code_set_name;
445   if (ctype->codeset_name == NULL)
446     {
447       record_error (0, 0, _("\
448 No character set name specified in charmap"));
449       ctype->codeset_name = "//UNKNOWN//";
450     }
451 
452   /* Set default value for classes not specified.  */
453   set_class_defaults (ctype, charmap, ctype->repertoire);
454 
455   /* Check according to table.  */
456   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
457     {
458       uint32_t tmp = ctype->class_collection[cnt];
459 
460       if (tmp != 0)
461 	{
462 	  for (cls1 = 0; cls1 < NCLASS; ++cls1)
463 	    if ((tmp & _ISwbit (cls1)) != 0)
464 	      for (cls2 = 0; cls2 < NCLASS; ++cls2)
465 		if (valid_table[cls1].allow[cls2] != '-')
466 		  {
467 		    int eq = (tmp & _ISwbit (cls2)) != 0;
468 		    switch (valid_table[cls1].allow[cls2])
469 		      {
470 		      case 'M':
471 			if (!eq)
472 			  {
473 			    uint32_t value = ctype->charnames[cnt];
474 
475 			    record_error (0, 0, _("\
476 character L'\\u%0*x' in class `%s' must be in class `%s'"),
477 					  value > 0xffff ? 8 : 4,
478 					  value,
479 					  valid_table[cls1].name,
480 					  valid_table[cls2].name);
481 			  }
482 			break;
483 
484 		      case 'X':
485 			if (eq)
486 			  {
487 			    uint32_t value = ctype->charnames[cnt];
488 
489 			    record_error (0, 0, _("\
490 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
491 					  value > 0xffff ? 8 : 4,
492 					  value,
493 					  valid_table[cls1].name,
494 					  valid_table[cls2].name);
495 			  }
496 			break;
497 
498 		      case 'D':
499 			ctype->class_collection[cnt] |= _ISwbit (cls2);
500 			break;
501 
502 		      default:
503 			record_error (5, 0, _("\
504 internal error in %s, line %u"), __FUNCTION__, __LINE__);
505 		      }
506 		  }
507 	}
508     }
509 
510   for (cnt = 0; cnt < 256; ++cnt)
511     {
512       uint32_t tmp = ctype->class256_collection[cnt];
513 
514       if (tmp != 0)
515 	{
516 	  for (cls1 = 0; cls1 < NCLASS; ++cls1)
517 	    if ((tmp & _ISbit (cls1)) != 0)
518 	      for (cls2 = 0; cls2 < NCLASS; ++cls2)
519 		if (valid_table[cls1].allow[cls2] != '-')
520 		  {
521 		    int eq = (tmp & _ISbit (cls2)) != 0;
522 		    switch (valid_table[cls1].allow[cls2])
523 		      {
524 		      case 'M':
525 			if (!eq)
526 			  {
527 			    char buf[17];
528 
529 			    snprintf (buf, sizeof buf, "\\%Zo", cnt);
530 
531 			    record_error (0, 0, _("\
532 character '%s' in class `%s' must be in class `%s'"),
533 					  buf,
534 					  valid_table[cls1].name,
535 					  valid_table[cls2].name);
536 			  }
537 			break;
538 
539 		      case 'X':
540 			if (eq)
541 			  {
542 			    char buf[17];
543 
544 			    snprintf (buf, sizeof buf, "\\%Zo", cnt);
545 
546 			    record_error (0, 0, _("\
547 character '%s' in class `%s' must not be in class `%s'"),
548 					  buf,
549 					  valid_table[cls1].name,
550 					  valid_table[cls2].name);
551 			  }
552 			break;
553 
554 		      case 'D':
555 			ctype->class256_collection[cnt] |= _ISbit (cls2);
556 			break;
557 
558 		      default:
559 			record_error (5, 0, _("\
560 internal error in %s, line %u"), __FUNCTION__, __LINE__);
561 		      }
562 		  }
563 	}
564     }
565 
566   /* ... and now test <SP> as a special case.  */
567   space_value = 32;
568   if (((cnt = BITPOS (tok_space),
569 	(ELEM (ctype, class_collection, , space_value)
570 	 & BITw (tok_space)) == 0)
571        || (cnt = BITPOS (tok_blank),
572 	   (ELEM (ctype, class_collection, , space_value)
573 	    & BITw (tok_blank)) == 0)))
574     {
575       record_error (0, 0, _("<SP> character not in class `%s'"),
576 		    valid_table[cnt].name);
577     }
578   else if (((cnt = BITPOS (tok_punct),
579 	     (ELEM (ctype, class_collection, , space_value)
580 	      & BITw (tok_punct)) != 0)
581 	    || (cnt = BITPOS (tok_graph),
582 		(ELEM (ctype, class_collection, , space_value)
583 		 & BITw (tok_graph))
584 		!= 0)))
585     {
586       record_error (0, 0, _("\
587 <SP> character must not be in class `%s'"),
588 				valid_table[cnt].name);
589     }
590   else
591     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
592 
593   space_seq = charmap_find_value (charmap, "SP", 2);
594   if (space_seq == NULL)
595     space_seq = charmap_find_value (charmap, "space", 5);
596   if (space_seq == NULL)
597     space_seq = charmap_find_value (charmap, "U00000020", 9);
598   if (space_seq == NULL || space_seq->nbytes != 1)
599     {
600       record_error (0, 0, _("\
601 character <SP> not defined in character map"));
602     }
603   else if (((cnt = BITPOS (tok_space),
604 	     (ctype->class256_collection[space_seq->bytes[0]]
605 	      & BIT (tok_space)) == 0)
606 	    || (cnt = BITPOS (tok_blank),
607 		(ctype->class256_collection[space_seq->bytes[0]]
608 		 & BIT (tok_blank)) == 0)))
609     {
610        record_error (0, 0, _("<SP> character not in class `%s'"),
611 		     valid_table[cnt].name);
612     }
613   else if (((cnt = BITPOS (tok_punct),
614 	     (ctype->class256_collection[space_seq->bytes[0]]
615 	      & BIT (tok_punct)) != 0)
616 	    || (cnt = BITPOS (tok_graph),
617 		(ctype->class256_collection[space_seq->bytes[0]]
618 		 & BIT (tok_graph)) != 0)))
619     {
620       record_error (0, 0, _("\
621 <SP> character must not be in class `%s'"),
622 		    valid_table[cnt].name);
623     }
624   else
625     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
626 
627   /* Check whether all single-byte characters make to their upper/lowercase
628      equivalent according to the ASCII rules.  */
629   for (cnt = 'A'; cnt <= 'Z'; ++cnt)
630     {
631       uint32_t uppval = ctype->map256_collection[0][cnt];
632       uint32_t lowval = ctype->map256_collection[1][cnt];
633       uint32_t lowuppval = ctype->map256_collection[0][lowval];
634       uint32_t lowlowval = ctype->map256_collection[1][lowval];
635 
636       if (uppval != cnt
637 	  || lowval != cnt + 0x20
638 	  || lowuppval != cnt
639 	  || lowlowval != cnt + 0x20)
640 	ctype->nonascii_case = 1;
641     }
642   for (cnt = 0; cnt < 256; ++cnt)
643     if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
644       if (ctype->map256_collection[0][cnt] != cnt
645 	  || ctype->map256_collection[1][cnt] != cnt)
646 	ctype->nonascii_case = 1;
647 
648   /* Now that the tests are done make sure the name array contains all
649      characters which are handled in the WIDTH section of the
650      character set definition file.  */
651   if (charmap->width_rules != NULL)
652     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
653       {
654 	unsigned char bytes[charmap->mb_cur_max];
655 	int nbytes = charmap->width_rules[cnt].from->nbytes;
656 
657 	/* We have the range of character for which the width is
658            specified described using byte sequences of the multibyte
659            charset.  We have to convert this to UCS4 now.  And we
660            cannot simply convert the beginning and the end of the
661            sequence, we have to iterate over the byte sequence and
662            convert it for every single character.  */
663 	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
664 
665 	while (nbytes < charmap->width_rules[cnt].to->nbytes
666 	       || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
667 			  nbytes) <= 0)
668 	  {
669 	    /* Find the UCS value for `bytes'.  */
670 	    int inner;
671 	    uint32_t wch;
672 	    struct charseq *seq
673 	      = charmap_find_symbol (charmap, (char *) bytes, nbytes);
674 
675 	    if (seq == NULL)
676 	      wch = ILLEGAL_CHAR_VALUE;
677 	    else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
678 	      wch = seq->ucs4;
679 	    else
680 	      wch = repertoire_find_value (ctype->repertoire, seq->name,
681 					   strlen (seq->name));
682 
683 	    if (wch != ILLEGAL_CHAR_VALUE)
684 	      /* We are only interested in the side-effects of the
685 		 `find_idx' call.  It will add appropriate entries in
686 		 the name array if this is necessary.  */
687 	      (void) find_idx (ctype, NULL, NULL, NULL, wch);
688 
689 	    /* "Increment" the bytes sequence.  */
690 	    inner = nbytes - 1;
691 	    while (inner >= 0 && bytes[inner] == 0xff)
692 	      --inner;
693 
694 	    if (inner < 0)
695 	      {
696 		/* We have to extend the byte sequence.  */
697 		if (nbytes >= charmap->width_rules[cnt].to->nbytes)
698 		  break;
699 
700 		bytes[0] = 1;
701 		memset (&bytes[1], 0, nbytes);
702 		++nbytes;
703 	      }
704 	    else
705 	      {
706 		++bytes[inner];
707 		while (++inner < nbytes)
708 		  bytes[inner] = 0;
709 	      }
710 	  }
711       }
712 
713   /* Now set all the other characters of the character set to the
714      default width.  */
715   curs = NULL;
716   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
717     {
718       struct charseq *data = (struct charseq *) vdata;
719 
720       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
721 	data->ucs4 = repertoire_find_value (ctype->repertoire,
722 					    data->name, len);
723 
724       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
725 	(void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
726     }
727 
728   /* There must be a multiple of 10 digits.  */
729   if (ctype->mbdigits_act % 10 != 0)
730     {
731       assert (ctype->mbdigits_act == ctype->wcdigits_act);
732       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
733       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
734       record_error (0, 0, _("\
735 `digit' category has not entries in groups of ten"));
736     }
737 
738   /* Check the input digits.  There must be a multiple of ten available.
739      In each group it could be that one or the other character is missing.
740      In this case the whole group must be removed.  */
741   cnt = 0;
742   while (cnt < ctype->mbdigits_act)
743     {
744       size_t inner;
745       for (inner = 0; inner < 10; ++inner)
746 	if (ctype->mbdigits[cnt + inner] == NULL)
747 	  break;
748 
749       if (inner == 10)
750 	cnt += 10;
751       else
752 	{
753 	  /* Remove the group.  */
754 	  memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
755 		   ((ctype->wcdigits_act - cnt - 10)
756 		    * sizeof (ctype->mbdigits[0])));
757 	  ctype->mbdigits_act -= 10;
758 	}
759     }
760 
761   /* If no input digits are given use the default.  */
762   if (ctype->mbdigits_act == 0)
763     {
764       if (ctype->mbdigits_max == 0)
765 	{
766 	  ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
767 					   10 * sizeof (struct charseq *));
768 	  ctype->mbdigits_max = 10;
769 	}
770 
771       for (cnt = 0; cnt < 10; ++cnt)
772 	{
773 	  ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
774 						      (char *) digits + cnt, 1);
775 	  if (ctype->mbdigits[cnt] == NULL)
776 	    {
777 	      ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
778 							  longnames[cnt],
779 							  strlen (longnames[cnt]));
780 	      if (ctype->mbdigits[cnt] == NULL)
781 		{
782 		  /* Hum, this ain't good.  */
783 		  record_error (0, 0, _("\
784 no input digits defined and none of the standard names in the charmap"));
785 
786 		  ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
787 							sizeof (struct charseq) + 1);
788 
789 		  /* This is better than nothing.  */
790 		  ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
791 		  ctype->mbdigits[cnt]->nbytes = 1;
792 		}
793 	    }
794 	}
795 
796       ctype->mbdigits_act = 10;
797     }
798 
799   /* Check the wide character input digits.  There must be a multiple
800      of ten available.  In each group it could be that one or the other
801      character is missing.  In this case the whole group must be
802      removed.  */
803   cnt = 0;
804   while (cnt < ctype->wcdigits_act)
805     {
806       size_t inner;
807       for (inner = 0; inner < 10; ++inner)
808 	if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
809 	  break;
810 
811       if (inner == 10)
812 	cnt += 10;
813       else
814 	{
815 	  /* Remove the group.  */
816 	  memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
817 		   ((ctype->wcdigits_act - cnt - 10)
818 		    * sizeof (ctype->wcdigits[0])));
819 	  ctype->wcdigits_act -= 10;
820 	}
821     }
822 
823   /* If no input digits are given use the default.  */
824   if (ctype->wcdigits_act == 0)
825     {
826       if (ctype->wcdigits_max == 0)
827 	{
828 	  ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
829 					   10 * sizeof (uint32_t));
830 	  ctype->wcdigits_max = 10;
831 	}
832 
833       for (cnt = 0; cnt < 10; ++cnt)
834 	ctype->wcdigits[cnt] = L'0' + cnt;
835 
836       ctype->mbdigits_act = 10;
837     }
838 
839   /* Check the outdigits.  */
840   warned = 0;
841   for (cnt = 0; cnt < 10; ++cnt)
842     if (ctype->mboutdigits[cnt] == NULL)
843       {
844 	if (!warned)
845 	  {
846 	    record_error (0, 0, _("\
847 not all characters used in `outdigit' are available in the charmap"));
848 	    warned = 1;
849 	  }
850 
851 	static const struct charseq replace =
852 	  {
853 	     .nbytes = 1,
854 	     .bytes = "?",
855 	  };
856 	ctype->mboutdigits[cnt] = (struct charseq *) &replace;
857       }
858 
859   warned = 0;
860   for (cnt = 0; cnt < 10; ++cnt)
861     if (ctype->wcoutdigits[cnt] == 0)
862       {
863 	if (!warned)
864 	  {
865 	    record_error (0, 0, _("\
866 not all characters used in `outdigit' are available in the repertoire"));
867 	    warned = 1;
868 	  }
869 
870 	ctype->wcoutdigits[cnt] = L'?';
871       }
872 
873   /* Sort the entries in the translit_ignore list.  */
874   if (ctype->translit_ignore != NULL)
875     {
876       struct translit_ignore_t *firstp = ctype->translit_ignore;
877       struct translit_ignore_t *runp;
878 
879       ctype->ntranslit_ignore = 1;
880 
881       for (runp = firstp->next; runp != NULL; runp = runp->next)
882 	{
883 	  struct translit_ignore_t *lastp = NULL;
884 	  struct translit_ignore_t *cmpp;
885 
886 	  ++ctype->ntranslit_ignore;
887 
888 	  for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
889 	    if (runp->from < cmpp->from)
890 	      break;
891 
892 	  runp->next = lastp;
893 	  if (lastp == NULL)
894 	    firstp = runp;
895 	}
896 
897       ctype->translit_ignore = firstp;
898     }
899 }
900 
901 
902 void
ctype_output(struct localedef_t * locale,const struct charmap_t * charmap,const char * output_path)903 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
904 	      const char *output_path)
905 {
906   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
907   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
908 			 + ctype->nr_charclass + ctype->map_collection_nr);
909   struct locale_file file;
910   uint32_t default_missing_len;
911   size_t elem, cnt;
912 
913   /* Now prepare the output: Find the sizes of the table we can use.  */
914   allocate_arrays (ctype, charmap, ctype->repertoire);
915 
916   default_missing_len = (ctype->default_missing
917 			 ? wcslen ((wchar_t *) ctype->default_missing)
918 			 : 0);
919 
920   init_locale_data (&file, nelems);
921   for (elem = 0; elem < nelems; ++elem)
922     {
923       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
924 	switch (elem)
925 	  {
926 #define CTYPE_EMPTY(name) \
927 	  case name:							      \
928 	    add_locale_empty (&file);					      \
929 	    break
930 
931 	  CTYPE_EMPTY(_NL_CTYPE_GAP1);
932 	  CTYPE_EMPTY(_NL_CTYPE_GAP2);
933 	  CTYPE_EMPTY(_NL_CTYPE_GAP3);
934 	  CTYPE_EMPTY(_NL_CTYPE_GAP4);
935 	  CTYPE_EMPTY(_NL_CTYPE_GAP5);
936 	  CTYPE_EMPTY(_NL_CTYPE_GAP6);
937 
938 #define CTYPE_RAW_DATA(name, base, size)				      \
939 	  case _NL_ITEM_INDEX (name):					      \
940 	    add_locale_raw_data (&file, base, size);			      \
941 	    break
942 
943 	  CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
944 			  ctype->ctype_b,
945 			  (256 + 128) * sizeof (char_class_t));
946 
947 #define CTYPE_UINT32_ARRAY(name, base, n_elems)				      \
948 	  case _NL_ITEM_INDEX (name):					      \
949 	    add_locale_uint32_array (&file, base, n_elems);		      \
950 	    break
951 
952 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
953 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
954 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
955 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
956 	  CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
957 			  ctype->ctype32_b,
958 			  256 * sizeof (char_class32_t));
959 
960 #define CTYPE_UINT32(name, value)					      \
961 	  case _NL_ITEM_INDEX (name):					      \
962 	    add_locale_uint32 (&file, value);				      \
963 	    break
964 
965 	  CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
966 	  CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
967 	  CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
968 
969 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
970 			      ctype->translit_from_idx,
971 			      ctype->translit_idx_size);
972 
973 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
974 			      ctype->translit_from_tbl,
975 			      ctype->translit_from_tbl_size
976 			      / sizeof (uint32_t));
977 
978 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
979 			      ctype->translit_to_idx,
980 			      ctype->translit_idx_size);
981 
982 	  CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
983 			      ctype->translit_to_tbl,
984 			      ctype->translit_to_tbl_size / sizeof (uint32_t));
985 
986 	  case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
987 	    /* The class name array.  */
988 	    start_locale_structure (&file);
989 	    for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
990 	      add_locale_string (&file, ctype->classnames[cnt]);
991 	    add_locale_char (&file, 0);
992 	    align_locale_data (&file, LOCFILE_ALIGN);
993 	    end_locale_structure (&file);
994 	    break;
995 
996 	  case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
997 	    /* The class name array.  */
998 	    start_locale_structure (&file);
999 	    for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1000 	      add_locale_string (&file, ctype->mapnames[cnt]);
1001 	    add_locale_char (&file, 0);
1002 	    align_locale_data (&file, LOCFILE_ALIGN);
1003 	    end_locale_structure (&file);
1004 	    break;
1005 
1006 	  case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1007 	    add_locale_wcwidth_table (&file, &ctype->width);
1008 	    break;
1009 
1010 	  CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1011 
1012 	  case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1013 	    add_locale_string (&file, ctype->codeset_name);
1014 	    break;
1015 
1016 	  CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1017 
1018 	  CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1019 
1020 	  case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1021 	    add_locale_uint32 (&file, ctype->mbdigits_act / 10);
1022 	    break;
1023 
1024 	  case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1025 	    add_locale_uint32 (&file, ctype->wcdigits_act / 10);
1026 	    break;
1027 
1028 	  case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1029 	    start_locale_structure (&file);
1030 	    for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1031 		 cnt < ctype->mbdigits_act; cnt += 10)
1032 	      {
1033 		add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1034 				     ctype->mbdigits[cnt]->nbytes);
1035 		add_locale_char (&file, 0);
1036 	      }
1037 	    end_locale_structure (&file);
1038 	    break;
1039 
1040 	  case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1041 	    start_locale_structure (&file);
1042 	    cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1043 	    add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1044 				 ctype->mboutdigits[cnt]->nbytes);
1045 	    add_locale_char (&file, 0);
1046 	    end_locale_structure (&file);
1047 	    break;
1048 
1049 	  case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1050 	    start_locale_structure (&file);
1051 	    for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1052 		 cnt < ctype->wcdigits_act; cnt += 10)
1053 	      add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1054 	    end_locale_structure (&file);
1055 	    break;
1056 
1057 	  case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1058 	    cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1059 	    add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1060 	    break;
1061 
1062 	  case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1063 	    add_locale_uint32 (&file, default_missing_len);
1064 	    break;
1065 
1066 	  case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1067 	    add_locale_uint32_array (&file, ctype->default_missing,
1068 				     default_missing_len);
1069 	    break;
1070 
1071 	  case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1072 	    add_locale_uint32 (&file, ctype->ntranslit_ignore);
1073 	    break;
1074 
1075 	  case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1076 	    start_locale_structure (&file);
1077 	    {
1078 	      struct translit_ignore_t *runp;
1079 	      for (runp = ctype->translit_ignore; runp != NULL;
1080 		   runp = runp->next)
1081 		{
1082 		  add_locale_uint32 (&file, runp->from);
1083 		  add_locale_uint32 (&file, runp->to);
1084 		  add_locale_uint32 (&file, runp->step);
1085 		}
1086 	    }
1087 	    end_locale_structure (&file);
1088 	    break;
1089 
1090 	  default:
1091 	    assert (! "unknown CTYPE element");
1092 	  }
1093       else
1094 	{
1095 	  /* Handle extra maps.  */
1096 	  size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1097 	  if (nr < ctype->nr_charclass)
1098 	    {
1099 	      start_locale_prelude (&file);
1100 	      add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
1101 	      end_locale_prelude (&file);
1102 	      add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1103 	    }
1104 	  else
1105 	    {
1106 	      nr -= ctype->nr_charclass;
1107 	      assert (nr < ctype->map_collection_nr);
1108 	      add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1109 	    }
1110 	}
1111     }
1112 
1113   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1114 }
1115 
1116 
1117 /* Local functions.  */
1118 static void
ctype_class_new(struct linereader * lr,struct locale_ctype_t * ctype,const char * name)1119 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1120 		 const char *name)
1121 {
1122   size_t cnt;
1123 
1124   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1125     if (strcmp (ctype->classnames[cnt], name) == 0)
1126       break;
1127 
1128   if (cnt < ctype->nr_charclass)
1129     {
1130       lr_error (lr, _("character class `%s' already defined"), name);
1131       return;
1132     }
1133 
1134   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1135     /* Exit code 2 is prescribed in P1003.2b.  */
1136     record_error (2, 0, _("\
1137 implementation limit: no more than %Zd character classes allowed"),
1138 		  MAX_NR_CHARCLASS);
1139 
1140   ctype->classnames[ctype->nr_charclass++] = name;
1141 }
1142 
1143 
1144 static void
ctype_map_new(struct linereader * lr,struct locale_ctype_t * ctype,const char * name,const struct charmap_t * charmap)1145 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1146 	       const char *name, const struct charmap_t *charmap)
1147 {
1148   size_t max_chars = 0;
1149   size_t cnt;
1150 
1151   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1152     {
1153       if (strcmp (ctype->mapnames[cnt], name) == 0)
1154 	break;
1155 
1156       if (max_chars < ctype->map_collection_max[cnt])
1157 	max_chars = ctype->map_collection_max[cnt];
1158     }
1159 
1160   if (cnt < ctype->map_collection_nr)
1161     {
1162       lr_error (lr, _("character map `%s' already defined"), name);
1163       return;
1164     }
1165 
1166   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1167     /* Exit code 2 is prescribed in P1003.2b.  */
1168     record_error (2, 0, _("\
1169 implementation limit: no more than %d character maps allowed"),
1170 		  MAX_NR_CHARMAP);
1171 
1172   ctype->mapnames[cnt] = name;
1173 
1174   if (max_chars == 0)
1175     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1176   else
1177     ctype->map_collection_max[cnt] = max_chars;
1178 
1179   ctype->map_collection[cnt] = (uint32_t *)
1180     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1181   ctype->map_collection_act[cnt] = 256;
1182 
1183   ++ctype->map_collection_nr;
1184 }
1185 
1186 
1187 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1188    is possible if we only want to extend the name array.  */
1189 static uint32_t *
find_idx(struct locale_ctype_t * ctype,uint32_t ** table,size_t * max,size_t * act,uint32_t idx)1190 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1191 	  size_t *act, uint32_t idx)
1192 {
1193   size_t cnt;
1194 
1195   if (idx < 256)
1196     return table == NULL ? NULL : &(*table)[idx];
1197 
1198   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1199 #if 1
1200   cnt = idx_table_get (&ctype->charnames_idx, idx);
1201   if (cnt == EMPTY)
1202     /* Not found.  */
1203     cnt = ctype->charnames_act;
1204 #else
1205   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1206     if (ctype->charnames[cnt] == idx)
1207       break;
1208 #endif
1209 
1210   /* We have to distinguish two cases: the name is found or not.  */
1211   if (cnt == ctype->charnames_act)
1212     {
1213       /* Extend the name array.  */
1214       if (ctype->charnames_act == ctype->charnames_max)
1215 	{
1216 	  ctype->charnames_max *= 2;
1217 	  ctype->charnames = (uint32_t *)
1218 	    xrealloc (ctype->charnames,
1219 		      sizeof (uint32_t) * ctype->charnames_max);
1220 	}
1221       ctype->charnames[ctype->charnames_act++] = idx;
1222       idx_table_add (&ctype->charnames_idx, idx, cnt);
1223     }
1224 
1225   if (table == NULL)
1226     /* We have done everything we are asked to do.  */
1227     return NULL;
1228 
1229   if (max == NULL)
1230     /* The caller does not want to extend the table.  */
1231     return (cnt >= *act ? NULL : &(*table)[cnt]);
1232 
1233   if (cnt >= *act)
1234     {
1235       if (cnt >= *max)
1236 	{
1237 	  size_t old_max = *max;
1238 	  do
1239 	    *max *= 2;
1240 	  while (*max <= cnt);
1241 
1242 	  *table =
1243 	    (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1244 	  memset (&(*table)[old_max], '\0',
1245 		  (*max - old_max) * sizeof (uint32_t));
1246 	}
1247 
1248       *act = cnt + 1;
1249     }
1250 
1251   return &(*table)[cnt];
1252 }
1253 
1254 
1255 static int
get_character(struct token * now,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct charseq ** seqp,uint32_t * wchp)1256 get_character (struct token *now, const struct charmap_t *charmap,
1257 	       struct repertoire_t *repertoire,
1258 	       struct charseq **seqp, uint32_t *wchp)
1259 {
1260   if (now->tok == tok_bsymbol)
1261     {
1262       /* This will hopefully be the normal case.  */
1263       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1264 				     now->val.str.lenmb);
1265       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1266 				  now->val.str.lenmb);
1267     }
1268   else if (now->tok == tok_ucs4)
1269     {
1270       char utmp[10];
1271 
1272       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1273       *seqp = charmap_find_value (charmap, utmp, 9);
1274 
1275       if (*seqp == NULL)
1276 	*seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1277 
1278       if (*seqp == NULL)
1279 	{
1280 	  /* Compute the value in the charmap from the UCS value.  */
1281 	  const char *symbol = repertoire_find_symbol (repertoire,
1282 						       now->val.ucs4);
1283 
1284 	  if (symbol == NULL)
1285 	    *seqp = NULL;
1286 	  else
1287 	    *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1288 
1289 	  if (*seqp == NULL)
1290 	    {
1291 	      if (repertoire != NULL)
1292 		{
1293 		  /* Insert a negative entry.  */
1294 		  static const struct charseq negative
1295 		    = { .ucs4 = ILLEGAL_CHAR_VALUE };
1296 		  uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1297 						  sizeof (uint32_t));
1298 		  *newp = now->val.ucs4;
1299 
1300 		  insert_entry (&repertoire->seq_table, newp,
1301 				sizeof (uint32_t), (void *) &negative);
1302 		}
1303 	    }
1304 	  else
1305 	    (*seqp)->ucs4 = now->val.ucs4;
1306 	}
1307       else if ((*seqp)->ucs4 != now->val.ucs4)
1308 	*seqp = NULL;
1309 
1310       *wchp = now->val.ucs4;
1311     }
1312   else if (now->tok == tok_charcode)
1313     {
1314       /* We must map from the byte code to UCS4.  */
1315       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1316 				   now->val.str.lenmb);
1317 
1318       if (*seqp == NULL)
1319 	*wchp = ILLEGAL_CHAR_VALUE;
1320       else
1321 	{
1322 	  if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1323 	    (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1324 						   strlen ((*seqp)->name));
1325 	  *wchp = (*seqp)->ucs4;
1326 	}
1327     }
1328   else
1329     return 1;
1330 
1331   return 0;
1332 }
1333 
1334 
1335 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1336    the .(2). counterparts.  */
1337 static void
charclass_symbolic_ellipsis(struct linereader * ldfile,struct locale_ctype_t * ctype,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct token * now,const char * last_str,unsigned long int class256_bit,unsigned long int class_bit,int base,int ignore_content,int handle_digits,int step)1338 charclass_symbolic_ellipsis (struct linereader *ldfile,
1339 			     struct locale_ctype_t *ctype,
1340 			     const struct charmap_t *charmap,
1341 			     struct repertoire_t *repertoire,
1342 			     struct token *now,
1343 			     const char *last_str,
1344 			     unsigned long int class256_bit,
1345 			     unsigned long int class_bit, int base,
1346 			     int ignore_content, int handle_digits, int step)
1347 {
1348   const char *nowstr = now->val.str.startmb;
1349   char tmp[now->val.str.lenmb + 1];
1350   const char *cp;
1351   char *endp;
1352   unsigned long int from;
1353   unsigned long int to;
1354 
1355   /* We have to compute the ellipsis values using the symbolic names.  */
1356   assert (last_str != NULL);
1357 
1358   if (strlen (last_str) != now->val.str.lenmb)
1359     {
1360     invalid_range:
1361       lr_error (ldfile,
1362 		_("`%s' and `%.*s' are not valid names for symbolic range"),
1363 		last_str, (int) now->val.str.lenmb, nowstr);
1364       return;
1365     }
1366 
1367   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1368     /* Nothing to do, the names are the same.  */
1369     return;
1370 
1371   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1372     ;
1373 
1374   errno = 0;
1375   from = strtoul (cp, &endp, base);
1376   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1377     goto invalid_range;
1378 
1379   to = strtoul (nowstr + (cp - last_str), &endp, base);
1380   if ((to == UINT_MAX && errno == ERANGE)
1381       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1382     goto invalid_range;
1383 
1384   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1385   if (!ignore_content)
1386     {
1387       now->val.str.startmb = tmp;
1388       while ((from += step) <= to)
1389 	{
1390 	  struct charseq *seq;
1391 	  uint32_t wch;
1392 
1393 	  sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1394 		   (int) (cp - last_str), last_str,
1395 		   (int) (now->val.str.lenmb - (cp - last_str)),
1396 		   from);
1397 
1398 	  if (get_character (now, charmap, repertoire, &seq, &wch))
1399 	    goto invalid_range;
1400 
1401 	  if (seq != NULL && seq->nbytes == 1)
1402 	    /* Yep, we can store information about this byte sequence.  */
1403 	    ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1404 
1405 	  if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1406 	    /* We have the UCS4 position.  */
1407 	    *find_idx (ctype, &ctype->class_collection,
1408 		       &ctype->class_collection_max,
1409 		       &ctype->class_collection_act, wch) |= class_bit;
1410 
1411 	  if (handle_digits == 1)
1412 	    {
1413 	      /* We must store the digit values.  */
1414 	      if (ctype->mbdigits_act == ctype->mbdigits_max)
1415 		{
1416 		  ctype->mbdigits_max *= 2;
1417 		  ctype->mbdigits = xrealloc (ctype->mbdigits,
1418 					      (ctype->mbdigits_max
1419 					       * sizeof (char *)));
1420 		  ctype->wcdigits_max *= 2;
1421 		  ctype->wcdigits = xrealloc (ctype->wcdigits,
1422 					      (ctype->wcdigits_max
1423 					       * sizeof (uint32_t)));
1424 		}
1425 
1426 	      ctype->mbdigits[ctype->mbdigits_act++] = seq;
1427 	      ctype->wcdigits[ctype->wcdigits_act++] = wch;
1428 	    }
1429 	  else if (handle_digits == 2)
1430 	    {
1431 	      /* We must store the digit values.  */
1432 	      if (ctype->outdigits_act >= 10)
1433 		{
1434 		  lr_error (ldfile, _("\
1435 %s: field `%s' does not contain exactly ten entries"),
1436 			    "LC_CTYPE", "outdigit");
1437 		  return;
1438 		}
1439 
1440 	      ctype->mboutdigits[ctype->outdigits_act] = seq;
1441 	      ctype->wcoutdigits[ctype->outdigits_act] = wch;
1442 	      ++ctype->outdigits_act;
1443 	    }
1444 	}
1445     }
1446 }
1447 
1448 
1449 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1450 static void
charclass_ucs4_ellipsis(struct linereader * ldfile,struct locale_ctype_t * ctype,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct token * now,uint32_t last_wch,unsigned long int class256_bit,unsigned long int class_bit,int ignore_content,int handle_digits,int step)1451 charclass_ucs4_ellipsis (struct linereader *ldfile,
1452 			 struct locale_ctype_t *ctype,
1453 			 const struct charmap_t *charmap,
1454 			 struct repertoire_t *repertoire,
1455 			 struct token *now, uint32_t last_wch,
1456 			 unsigned long int class256_bit,
1457 			 unsigned long int class_bit, int ignore_content,
1458 			 int handle_digits, int step)
1459 {
1460   if (last_wch > now->val.ucs4)
1461     {
1462       lr_error (ldfile, _("\
1463 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1464 		(now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1465 		(now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1466       return;
1467     }
1468 
1469   if (!ignore_content)
1470     while ((last_wch += step) <= now->val.ucs4)
1471       {
1472 	/* We have to find out whether there is a byte sequence corresponding
1473 	   to this UCS4 value.  */
1474 	struct charseq *seq;
1475 	char utmp[10];
1476 
1477 	snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1478 	seq = charmap_find_value (charmap, utmp, 9);
1479 	if (seq == NULL)
1480 	  {
1481 	    snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1482 	    seq = charmap_find_value (charmap, utmp, 5);
1483 	  }
1484 
1485 	if (seq == NULL)
1486 	  /* Try looking in the repertoire map.  */
1487 	  seq = repertoire_find_seq (repertoire, last_wch);
1488 
1489 	/* If this is the first time we look for this sequence create a new
1490 	   entry.  */
1491 	if (seq == NULL)
1492 	  {
1493 	    static const struct charseq negative
1494 	      = { .ucs4 = ILLEGAL_CHAR_VALUE };
1495 
1496 	    /* Find the symbolic name for this UCS4 value.  */
1497 	    if (repertoire != NULL)
1498 	      {
1499 		const char *symbol = repertoire_find_symbol (repertoire,
1500 							     last_wch);
1501 		uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1502 						sizeof (uint32_t));
1503 		*newp = last_wch;
1504 
1505 		if (symbol != NULL)
1506 		  /* We have a name, now search the multibyte value.  */
1507 		  seq = charmap_find_value (charmap, symbol, strlen (symbol));
1508 
1509 		if (seq == NULL)
1510 		  /* We have to create a fake entry.  */
1511 		  seq = (struct charseq *) &negative;
1512 		else
1513 		  seq->ucs4 = last_wch;
1514 
1515 		insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1516 			      seq);
1517 	      }
1518 	    else
1519 	      /* We have to create a fake entry.  */
1520 	      seq = (struct charseq *) &negative;
1521 	  }
1522 
1523 	/* We have a name, now search the multibyte value.  */
1524 	if (seq->ucs4 == last_wch && seq->nbytes == 1)
1525 	  /* Yep, we can store information about this byte sequence.  */
1526 	  ctype->class256_collection[(size_t) seq->bytes[0]]
1527 	    |= class256_bit;
1528 
1529 	/* And of course we have the UCS4 position.  */
1530 	if (class_bit != 0)
1531 	  *find_idx (ctype, &ctype->class_collection,
1532 		     &ctype->class_collection_max,
1533 		     &ctype->class_collection_act, last_wch) |= class_bit;
1534 
1535 	if (handle_digits == 1)
1536 	  {
1537 	    /* We must store the digit values.  */
1538 	    if (ctype->mbdigits_act == ctype->mbdigits_max)
1539 	      {
1540 		ctype->mbdigits_max *= 2;
1541 		ctype->mbdigits = xrealloc (ctype->mbdigits,
1542 					    (ctype->mbdigits_max
1543 					     * sizeof (char *)));
1544 		ctype->wcdigits_max *= 2;
1545 		ctype->wcdigits = xrealloc (ctype->wcdigits,
1546 					    (ctype->wcdigits_max
1547 					     * sizeof (uint32_t)));
1548 	      }
1549 
1550 	    ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1551 						      ? seq : NULL);
1552 	    ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1553 	  }
1554 	else if (handle_digits == 2)
1555 	  {
1556 	    /* We must store the digit values.  */
1557 	    if (ctype->outdigits_act >= 10)
1558 	      {
1559 		lr_error (ldfile, _("\
1560 %s: field `%s' does not contain exactly ten entries"),
1561 			  "LC_CTYPE", "outdigit");
1562 		return;
1563 	      }
1564 
1565 	    ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1566 							? seq : NULL);
1567 	    ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1568 	    ++ctype->outdigits_act;
1569 	  }
1570       }
1571 }
1572 
1573 
1574 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1575 static void
charclass_charcode_ellipsis(struct linereader * ldfile,struct locale_ctype_t * ctype,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct token * now,char * last_charcode,uint32_t last_charcode_len,unsigned long int class256_bit,unsigned long int class_bit,int ignore_content,int handle_digits)1576 charclass_charcode_ellipsis (struct linereader *ldfile,
1577 			     struct locale_ctype_t *ctype,
1578 			     const struct charmap_t *charmap,
1579 			     struct repertoire_t *repertoire,
1580 			     struct token *now, char *last_charcode,
1581 			     uint32_t last_charcode_len,
1582 			     unsigned long int class256_bit,
1583 			     unsigned long int class_bit, int ignore_content,
1584 			     int handle_digits)
1585 {
1586   /* First check whether the to-value is larger.  */
1587   if (now->val.charcode.nbytes != last_charcode_len)
1588     {
1589       lr_error (ldfile, _("\
1590 start and end character sequence of range must have the same length"));
1591       return;
1592     }
1593 
1594   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1595     {
1596       lr_error (ldfile, _("\
1597 to-value character sequence is smaller than from-value sequence"));
1598       return;
1599     }
1600 
1601   if (!ignore_content)
1602     {
1603       do
1604 	{
1605 	  /* Increment the byte sequence value.  */
1606 	  struct charseq *seq;
1607 	  uint32_t wch;
1608 	  int i;
1609 
1610 	  for (i = last_charcode_len - 1; i >= 0; --i)
1611 	    if (++last_charcode[i] != 0)
1612 	      break;
1613 
1614 	  if (last_charcode_len == 1)
1615 	    /* Of course we have the charcode value.  */
1616 	    ctype->class256_collection[(size_t) last_charcode[0]]
1617 	      |= class256_bit;
1618 
1619 	  /* Find the symbolic name.  */
1620 	  seq = charmap_find_symbol (charmap, last_charcode,
1621 				     last_charcode_len);
1622 	  if (seq != NULL)
1623 	    {
1624 	      if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1625 		seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1626 						   strlen (seq->name));
1627 	      wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1628 
1629 	      if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1630 		*find_idx (ctype, &ctype->class_collection,
1631 			   &ctype->class_collection_max,
1632 			   &ctype->class_collection_act, wch) |= class_bit;
1633 	    }
1634 	  else
1635 	    wch = ILLEGAL_CHAR_VALUE;
1636 
1637 	  if (handle_digits == 1)
1638 	    {
1639 	      /* We must store the digit values.  */
1640 	      if (ctype->mbdigits_act == ctype->mbdigits_max)
1641 		{
1642 		  ctype->mbdigits_max *= 2;
1643 		  ctype->mbdigits = xrealloc (ctype->mbdigits,
1644 					      (ctype->mbdigits_max
1645 					       * sizeof (char *)));
1646 		  ctype->wcdigits_max *= 2;
1647 		  ctype->wcdigits = xrealloc (ctype->wcdigits,
1648 					      (ctype->wcdigits_max
1649 					       * sizeof (uint32_t)));
1650 		}
1651 
1652 	      seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1653 	      memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1654 	      seq->nbytes = last_charcode_len;
1655 
1656 	      ctype->mbdigits[ctype->mbdigits_act++] = seq;
1657 	      ctype->wcdigits[ctype->wcdigits_act++] = wch;
1658 	    }
1659 	  else if (handle_digits == 2)
1660 	    {
1661 	      struct charseq *seq;
1662 	      /* We must store the digit values.  */
1663 	      if (ctype->outdigits_act >= 10)
1664 		{
1665 		  lr_error (ldfile, _("\
1666 %s: field `%s' does not contain exactly ten entries"),
1667 			    "LC_CTYPE", "outdigit");
1668 		  return;
1669 		}
1670 
1671 	      seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1672 	      memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1673 	      seq->nbytes = last_charcode_len;
1674 
1675 	      ctype->mboutdigits[ctype->outdigits_act] = seq;
1676 	      ctype->wcoutdigits[ctype->outdigits_act] = wch;
1677 	      ++ctype->outdigits_act;
1678 	    }
1679 	}
1680       while (memcmp (last_charcode, now->val.charcode.bytes,
1681 		     last_charcode_len) != 0);
1682     }
1683 }
1684 
1685 
1686 static uint32_t *
find_translit2(struct locale_ctype_t * ctype,const struct charmap_t * charmap,uint32_t wch)1687 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1688 		uint32_t wch)
1689 {
1690   struct translit_t *trunp = ctype->translit;
1691   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1692 
1693   while (trunp != NULL)
1694     {
1695       /* XXX We simplify things here.  The transliterations we look
1696 	 for are only allowed to have one character.  */
1697       if (trunp->from[0] == wch && trunp->from[1] == 0)
1698 	{
1699 	  /* Found it.  Now look for a transliteration which can be
1700 	     represented with the character set.  */
1701 	  struct translit_to_t *torunp = trunp->to;
1702 
1703 	  while (torunp != NULL)
1704 	    {
1705 	      int i;
1706 
1707 	      for (i = 0; torunp->str[i] != 0; ++i)
1708 		{
1709 		  char utmp[10];
1710 
1711 		  snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1712 		  if (charmap_find_value (charmap, utmp, 9) == NULL)
1713 		    /* This character cannot be represented.  */
1714 		    break;
1715 		}
1716 
1717 	      if (torunp->str[i] == 0)
1718 		return torunp->str;
1719 
1720 	      torunp = torunp->next;
1721 	    }
1722 
1723 	  break;
1724 	}
1725 
1726       trunp = trunp->next;
1727     }
1728 
1729   /* Check for ignored chars.  */
1730   while (tirunp != NULL)
1731     {
1732       if (tirunp->from <= wch && tirunp->to >= wch)
1733 	{
1734 	  uint32_t wi;
1735 
1736 	  for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1737 	    if (wi == wch)
1738 	      return no_str;
1739 	}
1740     }
1741 
1742   /* Nothing found.  */
1743   return NULL;
1744 }
1745 
1746 
1747 uint32_t *
find_translit(struct localedef_t * locale,const struct charmap_t * charmap,uint32_t wch)1748 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1749 	       uint32_t wch)
1750 {
1751   struct locale_ctype_t *ctype;
1752   uint32_t *result = NULL;
1753 
1754   assert (locale != NULL);
1755   ctype = locale->categories[LC_CTYPE].ctype;
1756 
1757   if (ctype == NULL)
1758     return NULL;
1759 
1760   if (ctype->translit != NULL)
1761     result = find_translit2 (ctype, charmap, wch);
1762 
1763   if (result == NULL)
1764     {
1765       struct translit_include_t *irunp = ctype->translit_include;
1766 
1767       while (irunp != NULL && result == NULL)
1768 	{
1769 	  result = find_translit (find_locale (CTYPE_LOCALE,
1770 					       irunp->copy_locale,
1771 					       irunp->copy_repertoire,
1772 					       charmap),
1773 				  charmap, wch);
1774 	  irunp = irunp->next;
1775 	}
1776     }
1777 
1778   return result;
1779 }
1780 
1781 
1782 /* Read one transliteration entry.  */
1783 static uint32_t *
read_widestring(struct linereader * ldfile,struct token * now,const struct charmap_t * charmap,struct repertoire_t * repertoire)1784 read_widestring (struct linereader *ldfile, struct token *now,
1785 		 const struct charmap_t *charmap,
1786 		 struct repertoire_t *repertoire)
1787 {
1788   uint32_t *wstr;
1789 
1790   if (now->tok == tok_default_missing)
1791     /* The special name "" will denote this case.  */
1792     wstr = no_str;
1793   else if (now->tok == tok_bsymbol)
1794     {
1795       /* Get the value from the repertoire.  */
1796       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1797       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1798 				       now->val.str.lenmb);
1799       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1800 	{
1801 	  /* We cannot proceed, we don't know the UCS4 value.  */
1802 	  free (wstr);
1803 	  return NULL;
1804 	}
1805 
1806       wstr[1] = 0;
1807     }
1808   else if (now->tok == tok_ucs4)
1809     {
1810       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1811       wstr[0] = now->val.ucs4;
1812       wstr[1] = 0;
1813     }
1814   else if (now->tok == tok_charcode)
1815     {
1816       /* Argh, we have to convert to the symbol name first and then to the
1817 	 UCS4 value.  */
1818       struct charseq *seq = charmap_find_symbol (charmap,
1819 						 now->val.str.startmb,
1820 						 now->val.str.lenmb);
1821       if (seq == NULL)
1822 	/* Cannot find the UCS4 value.  */
1823 	return NULL;
1824 
1825       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1826 	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1827 					   strlen (seq->name));
1828       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1829 	/* We cannot proceed, we don't know the UCS4 value.  */
1830 	return NULL;
1831 
1832       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1833       wstr[0] = seq->ucs4;
1834       wstr[1] = 0;
1835     }
1836   else if (now->tok == tok_string)
1837     {
1838       wstr = now->val.str.startwc;
1839       if (wstr == NULL || wstr[0] == 0)
1840 	return NULL;
1841     }
1842   else
1843     {
1844       if (now->tok != tok_eol && now->tok != tok_eof)
1845 	lr_ignore_rest (ldfile, 0);
1846       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1847       return (uint32_t *) -1l;
1848     }
1849 
1850   return wstr;
1851 }
1852 
1853 
1854 static void
read_translit_entry(struct linereader * ldfile,struct locale_ctype_t * ctype,struct token * now,const struct charmap_t * charmap,struct repertoire_t * repertoire)1855 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1856 		     struct token *now, const struct charmap_t *charmap,
1857 		     struct repertoire_t *repertoire)
1858 {
1859   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1860   struct translit_t *result;
1861   struct translit_to_t **top;
1862   struct obstack *ob = &ctype->mempool;
1863   int first;
1864   int ignore;
1865 
1866   if (from_wstr == NULL)
1867     /* There is no valid from string.  */
1868     return;
1869 
1870   result = (struct translit_t *) obstack_alloc (ob,
1871 						sizeof (struct translit_t));
1872   result->from = from_wstr;
1873   result->fname = ldfile->fname;
1874   result->lineno = ldfile->lineno;
1875   result->next = NULL;
1876   result->to = NULL;
1877   top = &result->to;
1878   first = 1;
1879   ignore = 0;
1880 
1881   while (1)
1882     {
1883       uint32_t *to_wstr;
1884 
1885       /* Next we have one or more transliterations.  They are
1886 	 separated by semicolons.  */
1887       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1888 
1889       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1890 	{
1891 	  /* One string read.  */
1892 	  const uint32_t zero = 0;
1893 
1894 	  if (!ignore)
1895 	    {
1896 	      obstack_grow (ob, &zero, 4);
1897 	      to_wstr = obstack_finish (ob);
1898 
1899 	      *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1900 	      (*top)->str = to_wstr;
1901 	      (*top)->next = NULL;
1902 	    }
1903 
1904 	  if (now->tok == tok_eol)
1905 	    {
1906 	      result->next = ctype->translit;
1907 	      ctype->translit = result;
1908 	      return;
1909 	    }
1910 
1911 	  if (!ignore)
1912 	    top = &(*top)->next;
1913 	  ignore = 0;
1914 	}
1915       else
1916 	{
1917 	  to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1918 	  if (to_wstr == (uint32_t *) -1l)
1919 	    {
1920 	      /* An error occurred.  */
1921 	      obstack_free (ob, result);
1922 	      return;
1923 	    }
1924 
1925 	  if (to_wstr == NULL)
1926 	    ignore = 1;
1927 	  else
1928 	    /* This value is usable.  */
1929 	    obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1930 
1931 	  first = 0;
1932 	}
1933     }
1934 }
1935 
1936 
1937 static void
read_translit_ignore_entry(struct linereader * ldfile,struct locale_ctype_t * ctype,const struct charmap_t * charmap,struct repertoire_t * repertoire)1938 read_translit_ignore_entry (struct linereader *ldfile,
1939 			    struct locale_ctype_t *ctype,
1940 			    const struct charmap_t *charmap,
1941 			    struct repertoire_t *repertoire)
1942 {
1943   /* We expect a semicolon-separated list of characters we ignore.  We are
1944      only interested in the wide character definitions.  These must be
1945      single characters, possibly defining a range when an ellipsis is used.  */
1946   while (1)
1947     {
1948       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1949 				    verbose);
1950       struct translit_ignore_t *newp;
1951       uint32_t from;
1952 
1953       if (now->tok == tok_eol || now->tok == tok_eof)
1954 	{
1955 	  lr_error (ldfile,
1956 		    _("premature end of `translit_ignore' definition"));
1957 	  return;
1958 	}
1959 
1960       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1961 	{
1962 	  lr_error (ldfile, _("syntax error"));
1963 	  lr_ignore_rest (ldfile, 0);
1964 	  return;
1965 	}
1966 
1967       if (now->tok == tok_ucs4)
1968 	from = now->val.ucs4;
1969       else
1970 	/* Try to get the value.  */
1971 	from = repertoire_find_value (repertoire, now->val.str.startmb,
1972 				      now->val.str.lenmb);
1973 
1974       if (from == ILLEGAL_CHAR_VALUE)
1975 	{
1976 	  lr_error (ldfile, "invalid character name");
1977 	  newp = NULL;
1978 	}
1979       else
1980 	{
1981 	  newp = (struct translit_ignore_t *)
1982 	    obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1983 	  newp->from = from;
1984 	  newp->to = from;
1985 	  newp->step = 1;
1986 
1987 	  newp->next = ctype->translit_ignore;
1988 	  ctype->translit_ignore = newp;
1989 	}
1990 
1991       /* Now we expect either a semicolon, an ellipsis, or the end of the
1992 	 line.  */
1993       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1994 
1995       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
1996 	{
1997 	  /* XXX Should we bother implementing `....'?  `...' certainly
1998 	     will not be implemented.  */
1999 	  uint32_t to;
2000 	  int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2001 
2002 	  now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2003 
2004 	  if (now->tok == tok_eol || now->tok == tok_eof)
2005 	    {
2006 	      lr_error (ldfile,
2007 			_("premature end of `translit_ignore' definition"));
2008 	      return;
2009 	    }
2010 
2011 	  if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2012 	    {
2013 	      lr_error (ldfile, _("syntax error"));
2014 	      lr_ignore_rest (ldfile, 0);
2015 	      return;
2016 	    }
2017 
2018 	  if (now->tok == tok_ucs4)
2019 	    to = now->val.ucs4;
2020 	  else
2021 	    /* Try to get the value.  */
2022 	    to = repertoire_find_value (repertoire, now->val.str.startmb,
2023 					now->val.str.lenmb);
2024 
2025 	  if (to == ILLEGAL_CHAR_VALUE)
2026 	    lr_error (ldfile, "invalid character name");
2027 	  else
2028 	    {
2029 	      /* Make sure the `to'-value is larger.  */
2030 	      if (to >= from)
2031 		{
2032 		  newp->to = to;
2033 		  newp->step = step;
2034 		}
2035 	      else
2036 		lr_error (ldfile, _("\
2037 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2038 			  (to | from) < 65536 ? 4 : 8, to,
2039 			  (to | from) < 65536 ? 4 : 8, from);
2040 	    }
2041 
2042 	  /* And the next token.  */
2043 	  now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2044 	}
2045 
2046       if (now->tok == tok_eol || now->tok == tok_eof)
2047 	/* We are done.  */
2048 	return;
2049 
2050       if (now->tok == tok_semicolon)
2051 	/* Next round.  */
2052 	continue;
2053 
2054       /* If we come here something is wrong.  */
2055       lr_error (ldfile, _("syntax error"));
2056       lr_ignore_rest (ldfile, 0);
2057       return;
2058     }
2059 }
2060 
2061 
2062 /* The parser for the LC_CTYPE section of the locale definition.  */
2063 void
ctype_read(struct linereader * ldfile,struct localedef_t * result,const struct charmap_t * charmap,const char * repertoire_name,int ignore_content)2064 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2065 	    const struct charmap_t *charmap, const char *repertoire_name,
2066 	    int ignore_content)
2067 {
2068   struct repertoire_t *repertoire = NULL;
2069   struct locale_ctype_t *ctype;
2070   struct token *now;
2071   enum token_t nowtok;
2072   size_t cnt;
2073   uint32_t last_wch = 0;
2074   enum token_t last_token;
2075   enum token_t ellipsis_token;
2076   int step;
2077   char last_charcode[16];
2078   size_t last_charcode_len = 0;
2079   const char *last_str = NULL;
2080   int mapidx;
2081   struct localedef_t *copy_locale = NULL;
2082 
2083   /* Get the repertoire we have to use.  */
2084   if (repertoire_name != NULL)
2085     repertoire = repertoire_read (repertoire_name);
2086 
2087   /* The rest of the line containing `LC_CTYPE' must be free.  */
2088   lr_ignore_rest (ldfile, 1);
2089 
2090 
2091   do
2092     {
2093       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2094       nowtok = now->tok;
2095     }
2096   while (nowtok == tok_eol);
2097 
2098   /* If we see `copy' now we are almost done.  */
2099   if (nowtok == tok_copy)
2100     {
2101       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2102       if (now->tok != tok_string)
2103 	{
2104 	  SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2105 
2106 	skip_category:
2107 	  do
2108 	    now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2109 	  while (now->tok != tok_eof && now->tok != tok_end);
2110 
2111 	  if (now->tok != tok_eof
2112 	      || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2113 		  now->tok == tok_eof))
2114 	    lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2115 	  else if (now->tok != tok_lc_ctype)
2116 	    {
2117 	      lr_error (ldfile, _("\
2118 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2119 	      lr_ignore_rest (ldfile, 0);
2120 	    }
2121 	  else
2122 	    lr_ignore_rest (ldfile, 1);
2123 
2124 	  return;
2125 	}
2126 
2127       if (! ignore_content)
2128 	{
2129 	  /* Get the locale definition.  */
2130 	  copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2131 				     repertoire_name, charmap, NULL);
2132 	  if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2133 	    {
2134 	      /* Not yet loaded.  So do it now.  */
2135 	      if (locfile_read (copy_locale, charmap) != 0)
2136 		goto skip_category;
2137 	    }
2138 
2139 	  if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2140 	    return;
2141 	}
2142 
2143       lr_ignore_rest (ldfile, 1);
2144 
2145       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2146       nowtok = now->tok;
2147     }
2148 
2149   /* Prepare the data structures.  */
2150   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2151   ctype = result->categories[LC_CTYPE].ctype;
2152 
2153   /* Remember the repertoire we use.  */
2154   if (!ignore_content)
2155     ctype->repertoire = repertoire;
2156 
2157   while (1)
2158     {
2159       unsigned long int class_bit = 0;
2160       unsigned long int class256_bit = 0;
2161       int handle_digits = 0;
2162 
2163       /* Of course we don't proceed beyond the end of file.  */
2164       if (nowtok == tok_eof)
2165 	break;
2166 
2167       /* Ingore empty lines.  */
2168       if (nowtok == tok_eol)
2169 	{
2170 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2171 	  nowtok = now->tok;
2172 	  continue;
2173 	}
2174 
2175       switch (nowtok)
2176 	{
2177 	case tok_charclass:
2178 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2179 	  while (now->tok == tok_ident || now->tok == tok_string)
2180 	    {
2181 	      ctype_class_new (ldfile, ctype, now->val.str.startmb);
2182 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2183 	      if (now->tok != tok_semicolon)
2184 		break;
2185 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2186 	    }
2187 	  if (now->tok != tok_eol)
2188 	    SYNTAX_ERROR (_("\
2189 %s: syntax error in definition of new character class"), "LC_CTYPE");
2190 	  break;
2191 
2192 	case tok_charconv:
2193 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194 	  while (now->tok == tok_ident || now->tok == tok_string)
2195 	    {
2196 	      ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2197 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2198 	      if (now->tok != tok_semicolon)
2199 		break;
2200 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2201 	    }
2202 	  if (now->tok != tok_eol)
2203 	    SYNTAX_ERROR (_("\
2204 %s: syntax error in definition of new character map"), "LC_CTYPE");
2205 	  break;
2206 
2207 	case tok_class:
2208 	  /* Ignore the rest of the line if we don't need the input of
2209 	     this line.  */
2210 	  if (ignore_content)
2211 	    {
2212 	      lr_ignore_rest (ldfile, 0);
2213 	      break;
2214 	    }
2215 
2216 	  /* We simply forget the `class' keyword and use the following
2217 	     operand to determine the bit.  */
2218 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2219 	  if (now->tok == tok_ident || now->tok == tok_string)
2220 	    {
2221 	      /* Must can be one of the predefined class names.  */
2222 	      for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2223 		if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2224 		  break;
2225 	      if (cnt >= ctype->nr_charclass)
2226 		{
2227 		  /* OK, it's a new class.  */
2228 		  ctype_class_new (ldfile, ctype, now->val.str.startmb);
2229 
2230 		  class_bit = _ISwbit (ctype->nr_charclass - 1);
2231 		}
2232 	      else
2233 		{
2234 		  class_bit = _ISwbit (cnt);
2235 
2236 		  free (now->val.str.startmb);
2237 		}
2238 	    }
2239 	  else if (now->tok == tok_digit)
2240 	    goto handle_tok_digit;
2241 	  else if (now->tok < tok_upper || now->tok > tok_blank)
2242 	    goto err_label;
2243 	  else
2244 	    {
2245 	      class_bit = BITw (now->tok);
2246 	      class256_bit = BIT (now->tok);
2247 	    }
2248 
2249 	  /* The next character must be a semicolon.  */
2250 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2251 	  if (now->tok != tok_semicolon)
2252 	    goto err_label;
2253 	  goto read_charclass;
2254 
2255 	case tok_upper:
2256 	case tok_lower:
2257 	case tok_alpha:
2258 	case tok_alnum:
2259 	case tok_space:
2260 	case tok_cntrl:
2261 	case tok_punct:
2262 	case tok_graph:
2263 	case tok_print:
2264 	case tok_xdigit:
2265 	case tok_blank:
2266 	  /* Ignore the rest of the line if we don't need the input of
2267 	     this line.  */
2268 	  if (ignore_content)
2269 	    {
2270 	      lr_ignore_rest (ldfile, 0);
2271 	      break;
2272 	    }
2273 
2274 	  class_bit = BITw (now->tok);
2275 	  class256_bit = BIT (now->tok);
2276 	  handle_digits = 0;
2277 	read_charclass:
2278 	  ctype->class_done |= class_bit;
2279 	  last_token = tok_none;
2280 	  ellipsis_token = tok_none;
2281 	  step = 1;
2282 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2283 	  while (now->tok != tok_eol && now->tok != tok_eof)
2284 	    {
2285 	      uint32_t wch;
2286 	      struct charseq *seq;
2287 
2288 	      if (ellipsis_token == tok_none)
2289 		{
2290 		  if (get_character (now, charmap, repertoire, &seq, &wch))
2291 		    goto err_label;
2292 
2293 		  if (!ignore_content && seq != NULL && seq->nbytes == 1)
2294 		    /* Yep, we can store information about this byte
2295 		       sequence.  */
2296 		    ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2297 
2298 		  if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2299 		      && class_bit != 0)
2300 		    /* We have the UCS4 position.  */
2301 		    *find_idx (ctype, &ctype->class_collection,
2302 			       &ctype->class_collection_max,
2303 			       &ctype->class_collection_act, wch) |= class_bit;
2304 
2305 		  last_token = now->tok;
2306 		  /* Terminate the string.  */
2307 		  if (last_token == tok_bsymbol)
2308 		    {
2309 		      now->val.str.startmb[now->val.str.lenmb] = '\0';
2310 		      last_str = now->val.str.startmb;
2311 		    }
2312 		  else
2313 		    last_str = NULL;
2314 		  last_wch = wch;
2315 		  memcpy (last_charcode, now->val.charcode.bytes, 16);
2316 		  last_charcode_len = now->val.charcode.nbytes;
2317 
2318 		  if (!ignore_content && handle_digits == 1)
2319 		    {
2320 		      /* We must store the digit values.  */
2321 		      if (ctype->mbdigits_act == ctype->mbdigits_max)
2322 			{
2323 			  ctype->mbdigits_max += 10;
2324 			  ctype->mbdigits = xrealloc (ctype->mbdigits,
2325 						      (ctype->mbdigits_max
2326 						       * sizeof (char *)));
2327 			  ctype->wcdigits_max += 10;
2328 			  ctype->wcdigits = xrealloc (ctype->wcdigits,
2329 						      (ctype->wcdigits_max
2330 						       * sizeof (uint32_t)));
2331 			}
2332 
2333 		      ctype->mbdigits[ctype->mbdigits_act++] = seq;
2334 		      ctype->wcdigits[ctype->wcdigits_act++] = wch;
2335 		    }
2336 		  else if (!ignore_content && handle_digits == 2)
2337 		    {
2338 		      /* We must store the digit values.  */
2339 		      if (ctype->outdigits_act >= 10)
2340 			{
2341 			  lr_error (ldfile, _("\
2342 %s: field `%s' does not contain exactly ten entries"),
2343 			    "LC_CTYPE", "outdigit");
2344 			  lr_ignore_rest (ldfile, 0);
2345 			  break;
2346 			}
2347 
2348 		      ctype->mboutdigits[ctype->outdigits_act] = seq;
2349 		      ctype->wcoutdigits[ctype->outdigits_act] = wch;
2350 		      ++ctype->outdigits_act;
2351 		    }
2352 		}
2353 	      else
2354 		{
2355 		  /* Now it gets complicated.  We have to resolve the
2356 		     ellipsis problem.  First we must distinguish between
2357 		     the different kind of ellipsis and this must match the
2358 		     tokens we have seen.  */
2359 		  assert (last_token != tok_none);
2360 
2361 		  if (last_token != now->tok)
2362 		    {
2363 		      lr_error (ldfile, _("\
2364 ellipsis range must be marked by two operands of same type"));
2365 		      lr_ignore_rest (ldfile, 0);
2366 		      break;
2367 		    }
2368 
2369 		  if (last_token == tok_bsymbol)
2370 		    {
2371 		      if (ellipsis_token == tok_ellipsis3)
2372 			lr_error (ldfile, _("with symbolic name range values \
2373 the absolute ellipsis `...' must not be used"));
2374 
2375 		      charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2376 						   repertoire, now, last_str,
2377 						   class256_bit, class_bit,
2378 						   (ellipsis_token
2379 						    == tok_ellipsis4
2380 						    ? 10 : 16),
2381 						   ignore_content,
2382 						   handle_digits, step);
2383 		    }
2384 		  else if (last_token == tok_ucs4)
2385 		    {
2386 		      if (ellipsis_token != tok_ellipsis2)
2387 			lr_error (ldfile, _("\
2388 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2389 
2390 		      charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2391 					       repertoire, now, last_wch,
2392 					       class256_bit, class_bit,
2393 					       ignore_content, handle_digits,
2394 					       step);
2395 		    }
2396 		  else
2397 		    {
2398 		      assert (last_token == tok_charcode);
2399 
2400 		      if (ellipsis_token != tok_ellipsis3)
2401 			lr_error (ldfile, _("\
2402 with character code range values one must use the absolute ellipsis `...'"));
2403 
2404 		      charclass_charcode_ellipsis (ldfile, ctype, charmap,
2405 						   repertoire, now,
2406 						   last_charcode,
2407 						   last_charcode_len,
2408 						   class256_bit, class_bit,
2409 						   ignore_content,
2410 						   handle_digits);
2411 		    }
2412 
2413 		  /* Now we have used the last value.  */
2414 		  last_token = tok_none;
2415 		}
2416 
2417 	      /* Next we expect a semicolon or the end of the line.  */
2418 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2419 	      if (now->tok == tok_eol || now->tok == tok_eof)
2420 		break;
2421 
2422 	      if (last_token != tok_none
2423 		  && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2424 		{
2425 		  if (now->tok == tok_ellipsis2_2)
2426 		    {
2427 		      now->tok = tok_ellipsis2;
2428 		      step = 2;
2429 		    }
2430 		  else if (now->tok == tok_ellipsis4_2)
2431 		    {
2432 		      now->tok = tok_ellipsis4;
2433 		      step = 2;
2434 		    }
2435 
2436 		  ellipsis_token = now->tok;
2437 
2438 		  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2439 		  continue;
2440 		}
2441 
2442 	      if (now->tok != tok_semicolon)
2443 		goto err_label;
2444 
2445 	      /* And get the next character.  */
2446 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2447 
2448 	      ellipsis_token = tok_none;
2449 	      step = 1;
2450 	    }
2451 	  break;
2452 
2453 	case tok_digit:
2454 	  /* Ignore the rest of the line if we don't need the input of
2455 	     this line.  */
2456 	  if (ignore_content)
2457 	    {
2458 	      lr_ignore_rest (ldfile, 0);
2459 	      break;
2460 	    }
2461 
2462 	handle_tok_digit:
2463 	  class_bit = _ISwdigit;
2464 	  class256_bit = _ISdigit;
2465 	  handle_digits = 1;
2466 	  goto read_charclass;
2467 
2468 	case tok_outdigit:
2469 	  /* Ignore the rest of the line if we don't need the input of
2470 	     this line.  */
2471 	  if (ignore_content)
2472 	    {
2473 	      lr_ignore_rest (ldfile, 0);
2474 	      break;
2475 	    }
2476 
2477 	  if (ctype->outdigits_act != 0)
2478 	    lr_error (ldfile, _("\
2479 %s: field `%s' declared more than once"),
2480 		      "LC_CTYPE", "outdigit");
2481 	  class_bit = 0;
2482 	  class256_bit = 0;
2483 	  handle_digits = 2;
2484 	  goto read_charclass;
2485 
2486 	case tok_toupper:
2487 	  /* Ignore the rest of the line if we don't need the input of
2488 	     this line.  */
2489 	  if (ignore_content)
2490 	    {
2491 	      lr_ignore_rest (ldfile, 0);
2492 	      break;
2493 	    }
2494 
2495 	  mapidx = 0;
2496 	  goto read_mapping;
2497 
2498 	case tok_tolower:
2499 	  /* Ignore the rest of the line if we don't need the input of
2500 	     this line.  */
2501 	  if (ignore_content)
2502 	    {
2503 	      lr_ignore_rest (ldfile, 0);
2504 	      break;
2505 	    }
2506 
2507 	  mapidx = 1;
2508 	  goto read_mapping;
2509 
2510 	case tok_map:
2511 	  /* Ignore the rest of the line if we don't need the input of
2512 	     this line.  */
2513 	  if (ignore_content)
2514 	    {
2515 	      lr_ignore_rest (ldfile, 0);
2516 	      break;
2517 	    }
2518 
2519 	  /* We simply forget the `map' keyword and use the following
2520 	     operand to determine the mapping.  */
2521 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2522 	  if (now->tok == tok_ident || now->tok == tok_string)
2523 	    {
2524 	      size_t cnt;
2525 
2526 	      for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2527 		if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2528 		  break;
2529 
2530 	      if (cnt < ctype->map_collection_nr)
2531 		free (now->val.str.startmb);
2532 	      else
2533 		/* OK, it's a new map.  */
2534 		ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2535 
2536 	      mapidx = cnt;
2537 	    }
2538 	  else if (now->tok < tok_toupper || now->tok > tok_tolower)
2539 	    goto err_label;
2540 	  else
2541 	    mapidx = now->tok - tok_toupper;
2542 
2543 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2544 	  /* This better should be a semicolon.  */
2545 	  if (now->tok != tok_semicolon)
2546 	    goto err_label;
2547 
2548 	read_mapping:
2549 	  /* Test whether this mapping was already defined.  */
2550 	  if (ctype->tomap_done[mapidx])
2551 	    {
2552 	      lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2553 			ctype->mapnames[mapidx]);
2554 	      lr_ignore_rest (ldfile, 0);
2555 	      break;
2556 	    }
2557 	  ctype->tomap_done[mapidx] = 1;
2558 
2559 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2560 	  while (now->tok != tok_eol && now->tok != tok_eof)
2561 	    {
2562 	      struct charseq *from_seq;
2563 	      uint32_t from_wch;
2564 	      struct charseq *to_seq;
2565 	      uint32_t to_wch;
2566 
2567 	      /* Every pair starts with an opening brace.  */
2568 	      if (now->tok != tok_open_brace)
2569 		goto err_label;
2570 
2571 	      /* Next comes the from-value.  */
2572 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2573 	      if (get_character (now, charmap, repertoire, &from_seq,
2574 				 &from_wch) != 0)
2575 		goto err_label;
2576 
2577 	      /* The next is a comma.  */
2578 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2579 	      if (now->tok != tok_comma)
2580 		goto err_label;
2581 
2582 	      /* And the other value.  */
2583 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584 	      if (get_character (now, charmap, repertoire, &to_seq,
2585 				 &to_wch) != 0)
2586 		goto err_label;
2587 
2588 	      /* And the last thing is the closing brace.  */
2589 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590 	      if (now->tok != tok_close_brace)
2591 		goto err_label;
2592 
2593 	      if (!ignore_content)
2594 		{
2595 		  /* Check whether the mapping converts from an ASCII value
2596 		     to a non-ASCII value.  */
2597 		  if (from_seq != NULL && from_seq->nbytes == 1
2598 		      && isascii (from_seq->bytes[0])
2599 		      && to_seq != NULL && (to_seq->nbytes != 1
2600 					    || !isascii (to_seq->bytes[0])))
2601 		    ctype->to_nonascii = 1;
2602 
2603 		  if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2604 		      && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2605 		    /* We can use this value.  */
2606 		    ctype->map256_collection[mapidx][from_seq->bytes[0]]
2607 		      = to_seq->bytes[0];
2608 
2609 		  if (from_wch != ILLEGAL_CHAR_VALUE
2610 		      && to_wch != ILLEGAL_CHAR_VALUE)
2611 		    /* Both correct values.  */
2612 		    *find_idx (ctype, &ctype->map_collection[mapidx],
2613 			       &ctype->map_collection_max[mapidx],
2614 			       &ctype->map_collection_act[mapidx],
2615 			       from_wch) = to_wch;
2616 		}
2617 
2618 	      /* Now comes a semicolon or the end of the line/file.  */
2619 	      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2620 	      if (now->tok == tok_semicolon)
2621 		now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2622 	    }
2623 	  break;
2624 
2625 	case tok_translit_start:
2626 	  /* Ignore the entire translit section with its peculiar syntax
2627 	     if we don't need the input.  */
2628 	  if (ignore_content)
2629 	    {
2630 	      do
2631 		{
2632 		  lr_ignore_rest (ldfile, 0);
2633 		  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2634 		}
2635 	      while (now->tok != tok_translit_end && now->tok != tok_eof);
2636 
2637 	      if (now->tok == tok_eof)
2638 		lr_error (ldfile, _(\
2639 "%s: `translit_start' section does not end with `translit_end'"),
2640 			  "LC_CTYPE");
2641 
2642 	      break;
2643 	    }
2644 
2645 	  /* The rest of the line better should be empty.  */
2646 	  lr_ignore_rest (ldfile, 1);
2647 
2648 	  /* We count here the number of allocated entries in the `translit'
2649 	     array.  */
2650 	  cnt = 0;
2651 
2652 	  ldfile->translate_strings = 1;
2653 	  ldfile->return_widestr = 1;
2654 
2655 	  /* We proceed until we see the `translit_end' token.  */
2656 	  while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2657 		 now->tok != tok_translit_end && now->tok != tok_eof)
2658 	    {
2659 	      if (now->tok == tok_eol)
2660 		/* Ignore empty lines.  */
2661 		continue;
2662 
2663 	      if (now->tok == tok_include)
2664 		{
2665 		  /* We have to include locale.  */
2666 		  const char *locale_name;
2667 		  const char *repertoire_name;
2668 		  struct translit_include_t *include_stmt, **include_ptr;
2669 
2670 		  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2671 		  /* This should be a string or an identifier.  In any
2672 		     case something to name a locale.  */
2673 		  if (now->tok != tok_string && now->tok != tok_ident)
2674 		    {
2675 		    translit_syntax:
2676 		      lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2677 		      lr_ignore_rest (ldfile, 0);
2678 		      continue;
2679 		    }
2680 		  locale_name = now->val.str.startmb;
2681 
2682 		  /* Next should be a semicolon.  */
2683 		  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2684 		  if (now->tok != tok_semicolon)
2685 		    goto translit_syntax;
2686 
2687 		  /* Now the repertoire name.  */
2688 		  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2689 		  if ((now->tok != tok_string && now->tok != tok_ident)
2690 		      || now->val.str.startmb == NULL)
2691 		    goto translit_syntax;
2692 		  repertoire_name = now->val.str.startmb;
2693 		  if (repertoire_name[0] == '\0')
2694 		    /* Ignore the empty string.  */
2695 		    repertoire_name = NULL;
2696 
2697 		  /* Save the include statement for later processing.  */
2698 		  include_stmt = (struct translit_include_t *)
2699 		    xmalloc (sizeof (struct translit_include_t));
2700 		  include_stmt->copy_locale = locale_name;
2701 		  include_stmt->copy_repertoire = repertoire_name;
2702 		  include_stmt->next = NULL;
2703 
2704 		  include_ptr = &ctype->translit_include;
2705 		  while (*include_ptr != NULL)
2706 		    include_ptr = &(*include_ptr)->next;
2707 		  *include_ptr = include_stmt;
2708 
2709 		  /* The rest of the line must be empty.  */
2710 		  lr_ignore_rest (ldfile, 1);
2711 
2712 		  /* Make sure the locale is read.  */
2713 		  add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2714 				   1, NULL);
2715 		  continue;
2716 		}
2717 	      else if (now->tok == tok_default_missing)
2718 		{
2719 		  uint32_t *wstr;
2720 
2721 		  while (1)
2722 		    {
2723 		      /* We expect a single character or string as the
2724 			 argument.  */
2725 		      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2726 		      wstr = read_widestring (ldfile, now, charmap,
2727 					      repertoire);
2728 
2729 		      if (wstr != NULL)
2730 			{
2731 			  if (ctype->default_missing != NULL)
2732 			    {
2733 			      lr_error (ldfile, _("\
2734 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2735 			      record_error_at_line (0, 0,
2736 						    ctype->default_missing_file,
2737 						    ctype->default_missing_lineno,
2738 						    _("\
2739 previous definition was here"));
2740 			    }
2741 			  else
2742 			    {
2743 			      ctype->default_missing = wstr;
2744 			      ctype->default_missing_file = ldfile->fname;
2745 			      ctype->default_missing_lineno = ldfile->lineno;
2746 			    }
2747 			  /* We can have more entries, ignore them.  */
2748 			  lr_ignore_rest (ldfile, 0);
2749 			  break;
2750 			}
2751 		      else if (wstr == (uint32_t *) -1l)
2752 			/* This was an syntax error.  */
2753 			break;
2754 
2755 		      /* Maybe there is another replacement we can use.  */
2756 		      now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2757 		      if (now->tok == tok_eol || now->tok == tok_eof)
2758 			{
2759 			  /* Nothing found.  We tell the user.  */
2760 			  lr_error (ldfile, _("\
2761 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2762 			  break;
2763 			}
2764 		      if (now->tok != tok_semicolon)
2765 			goto translit_syntax;
2766 		    }
2767 
2768 		  continue;
2769 		}
2770 	      else if (now->tok == tok_translit_ignore)
2771 		{
2772 		  read_translit_ignore_entry (ldfile, ctype, charmap,
2773 					      repertoire);
2774 		  continue;
2775 		}
2776 
2777 	      read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2778 	    }
2779 	  ldfile->return_widestr = 0;
2780 
2781 	  if (now->tok == tok_eof)
2782 	    lr_error (ldfile, _(\
2783 "%s: `translit_start' section does not end with `translit_end'"),
2784 		      "LC_CTYPE");
2785 
2786 	  break;
2787 
2788 	case tok_ident:
2789 	  /* Ignore the rest of the line if we don't need the input of
2790 	     this line.  */
2791 	  if (ignore_content)
2792 	    {
2793 	      lr_ignore_rest (ldfile, 0);
2794 	      break;
2795 	    }
2796 
2797 	  /* This could mean one of several things.  First test whether
2798 	     it's a character class name.  */
2799 	  for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2800 	    if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2801 	      break;
2802 	  if (cnt < ctype->nr_charclass)
2803 	    {
2804 	      class_bit = _ISwbit (cnt);
2805 	      class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2806 	      free (now->val.str.startmb);
2807 	      goto read_charclass;
2808 	    }
2809 	  for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2810 	    if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2811 	      break;
2812 	  if (cnt < ctype->map_collection_nr)
2813 	    {
2814 	      mapidx = cnt;
2815 	      free (now->val.str.startmb);
2816 	      goto read_mapping;
2817             }
2818 	  break;
2819 
2820 	case tok_end:
2821 	  /* Next we assume `LC_CTYPE'.  */
2822 	  now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2823 	  if (now->tok == tok_eof)
2824 	    break;
2825 	  if (now->tok == tok_eol)
2826 	    lr_error (ldfile, _("%s: incomplete `END' line"),
2827 		      "LC_CTYPE");
2828 	  else if (now->tok != tok_lc_ctype)
2829 	    lr_error (ldfile, _("\
2830 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2831 	  lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2832 	  return;
2833 
2834 	default:
2835 	err_label:
2836 	  if (now->tok != tok_eof)
2837 	    SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2838 	}
2839 
2840       /* Prepare for the next round.  */
2841       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2842       nowtok = now->tok;
2843     }
2844 
2845   /* When we come here we reached the end of the file.  */
2846   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2847 }
2848 
2849 
2850 /* Subroutine of set_class_defaults, below.  */
2851 static void
set_one_default(struct locale_ctype_t * ctype,const struct charmap_t * charmap,int bitpos,int from,int to)2852 set_one_default (struct locale_ctype_t *ctype,
2853                  const struct charmap_t *charmap,
2854                  int bitpos, int from, int to)
2855 {
2856   char tmp[2];
2857   int ch;
2858   int bit = _ISbit (bitpos);
2859   int bitw = _ISwbit (bitpos);
2860   /* Define string.  */
2861   strcpy (tmp, "?");
2862 
2863   for (ch = from; ch <= to; ++ch)
2864     {
2865       struct charseq *seq;
2866       tmp[0] = ch;
2867 
2868       seq = charmap_find_value (charmap, tmp, 1);
2869       if (seq == NULL)
2870         {
2871           char buf[10];
2872           sprintf (buf, "U%08X", ch);
2873           seq = charmap_find_value (charmap, buf, 9);
2874         }
2875       if (seq == NULL)
2876         {
2877           record_error (0, 0, _("\
2878 %s: character `%s' not defined while needed as default value"),
2879 			"LC_CTYPE", tmp);
2880         }
2881       else if (seq->nbytes != 1)
2882 	record_error (0, 0, _("\
2883 %s: character `%s' in charmap not representable with one byte"),
2884 		      "LC_CTYPE", tmp);
2885       else
2886         ctype->class256_collection[seq->bytes[0]] |= bit;
2887 
2888       /* No need to search here, the ASCII value is also the Unicode
2889          value.  */
2890       ELEM (ctype, class_collection, , ch) |= bitw;
2891     }
2892 }
2893 
2894 static void
set_class_defaults(struct locale_ctype_t * ctype,const struct charmap_t * charmap,struct repertoire_t * repertoire)2895 set_class_defaults (struct locale_ctype_t *ctype,
2896 		    const struct charmap_t *charmap,
2897 		    struct repertoire_t *repertoire)
2898 {
2899 #define set_default(bitpos, from, to) \
2900   set_one_default (ctype, charmap, bitpos, from, to)
2901 
2902   /* These function defines the default values for the classes and conversions
2903      according to POSIX.2 2.5.2.1.
2904      It may seem that the order of these if-blocks is arbitrary but it is NOT.
2905      Don't move them unless you know what you do!  */
2906 
2907   /* Set default values if keyword was not present.  */
2908   if ((ctype->class_done & BITw (tok_upper)) == 0)
2909     /* "If this keyword [lower] is not specified, the lowercase letters
2910         `A' through `Z', ..., shall automatically belong to this class,
2911 	with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2912     set_default (BITPOS (tok_upper), 'A', 'Z');
2913 
2914   if ((ctype->class_done & BITw (tok_lower)) == 0)
2915     /* "If this keyword [lower] is not specified, the lowercase letters
2916         `a' through `z', ..., shall automatically belong to this class,
2917 	with implementation defined character values."  [P1003.2, 2.5.2.1]  */
2918     set_default (BITPOS (tok_lower), 'a', 'z');
2919 
2920   if ((ctype->class_done & BITw (tok_alpha)) == 0)
2921     {
2922       /* Table 2-6 in P1003.2 says that characters in class `upper' or
2923 	 class `lower' *must* be in class `alpha'.  */
2924       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2925       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2926 
2927       for (size_t cnt = 0; cnt < 256; ++cnt)
2928 	if ((ctype->class256_collection[cnt] & mask) != 0)
2929 	  ctype->class256_collection[cnt] |= BIT (tok_alpha);
2930 
2931       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2932 	if ((ctype->class_collection[cnt] & maskw) != 0)
2933 	  ctype->class_collection[cnt] |= BITw (tok_alpha);
2934     }
2935 
2936   if ((ctype->class_done & BITw (tok_digit)) == 0)
2937     /* "If this keyword [digit] is not specified, the digits `0' through
2938         `9', ..., shall automatically belong to this class, with
2939 	implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2940     set_default (BITPOS (tok_digit), '0', '9');
2941 
2942   /* "Only characters specified for the `alpha' and `digit' keyword
2943      shall be specified.  Characters specified for the keyword `alpha'
2944      and `digit' are automatically included in this class.  */
2945   {
2946     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2947     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2948 
2949     for (size_t cnt = 0; cnt < 256; ++cnt)
2950       if ((ctype->class256_collection[cnt] & mask) != 0)
2951 	ctype->class256_collection[cnt] |= BIT (tok_alnum);
2952 
2953     for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2954       if ((ctype->class_collection[cnt] & maskw) != 0)
2955 	ctype->class_collection[cnt] |= BITw (tok_alnum);
2956   }
2957 
2958   if ((ctype->class_done & BITw (tok_space)) == 0)
2959     /* "If this keyword [space] is not specified, the characters <space>,
2960         <form-feed>, <newline>, <carriage-return>, <tab>, and
2961 	<vertical-tab>, ..., shall automatically belong to this class,
2962 	with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
2963     {
2964       struct charseq *seq;
2965 
2966       seq = charmap_find_value (charmap, "space", 5);
2967       if (seq == NULL)
2968 	seq = charmap_find_value (charmap, "SP", 2);
2969       if (seq == NULL)
2970 	seq = charmap_find_value (charmap, "U00000020", 9);
2971       if (seq == NULL)
2972 	{
2973 	  record_error (0, 0, _("\
2974 %s: character `%s' not defined while needed as default value"),
2975 			"LC_CTYPE", "<space>");
2976 	}
2977       else if (seq->nbytes != 1)
2978 	record_error (0, 0, _("\
2979 %s: character `%s' in charmap not representable with one byte"),
2980 		      "LC_CTYPE", "<space>");
2981       else
2982 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2983 
2984       /* No need to search.  */
2985       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2986 
2987       seq = charmap_find_value (charmap, "form-feed", 9);
2988       if (seq == NULL)
2989 	seq = charmap_find_value (charmap, "U0000000C", 9);
2990       if (seq == NULL)
2991 	{
2992 	  record_error (0, 0, _("\
2993 %s: character `%s' not defined while needed as default value"),
2994 				    "LC_CTYPE", "<form-feed>");
2995 	}
2996       else if (seq->nbytes != 1)
2997 	record_error (0, 0, _("\
2998 %s: character `%s' in charmap not representable with one byte"),
2999 		      "LC_CTYPE", "<form-feed>");
3000       else
3001 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3002 
3003       /* No need to search.  */
3004       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3005 
3006 
3007       seq = charmap_find_value (charmap, "newline", 7);
3008       if (seq == NULL)
3009 	seq = charmap_find_value (charmap, "U0000000A", 9);
3010       if (seq == NULL)
3011 	{
3012 	  record_error (0, 0, _("\
3013 %s: character `%s' not defined while needed as default value"),
3014 			"LC_CTYPE", "<newline>");
3015 	}
3016       else if (seq->nbytes != 1)
3017 	record_error (0, 0, _("\
3018 %s: character `%s' in charmap not representable with one byte"),
3019 		      "LC_CTYPE", "<newline>");
3020       else
3021 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3022 
3023       /* No need to search.  */
3024       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3025 
3026 
3027       seq = charmap_find_value (charmap, "carriage-return", 15);
3028       if (seq == NULL)
3029 	seq = charmap_find_value (charmap, "U0000000D", 9);
3030       if (seq == NULL)
3031 	{
3032 	  record_error (0, 0, _("\
3033 %s: character `%s' not defined while needed as default value"),
3034 			"LC_CTYPE", "<carriage-return>");
3035 	}
3036       else if (seq->nbytes != 1)
3037 	record_error (0, 0, _("\
3038 %s: character `%s' in charmap not representable with one byte"),
3039 		      "LC_CTYPE", "<carriage-return>");
3040       else
3041 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3042 
3043       /* No need to search.  */
3044       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3045 
3046 
3047       seq = charmap_find_value (charmap, "tab", 3);
3048       if (seq == NULL)
3049 	seq = charmap_find_value (charmap, "U00000009", 9);
3050       if (seq == NULL)
3051 	{
3052 	  record_error (0, 0, _("\
3053 %s: character `%s' not defined while needed as default value"),
3054 			"LC_CTYPE", "<tab>");
3055 	}
3056       else if (seq->nbytes != 1)
3057 	record_error (0, 0, _("\
3058 %s: character `%s' in charmap not representable with one byte"),
3059 		      "LC_CTYPE", "<tab>");
3060       else
3061 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3062 
3063       /* No need to search.  */
3064       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3065 
3066 
3067       seq = charmap_find_value (charmap, "vertical-tab", 12);
3068       if (seq == NULL)
3069 	seq = charmap_find_value (charmap, "U0000000B", 9);
3070       if (seq == NULL)
3071 	{
3072 	  record_error (0, 0, _("\
3073 %s: character `%s' not defined while needed as default value"),
3074 			"LC_CTYPE", "<vertical-tab>");
3075 	}
3076       else if (seq->nbytes != 1)
3077 	record_error (0, 0, _("\
3078 %s: character `%s' in charmap not representable with one byte"),
3079 		      "LC_CTYPE", "<vertical-tab>");
3080       else
3081 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3082 
3083       /* No need to search.  */
3084       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3085     }
3086 
3087   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3088     /* "If this keyword is not specified, the digits `0' to `9', the
3089         uppercase letters `A' through `F', and the lowercase letters `a'
3090 	through `f', ..., shell automatically belong to this class, with
3091 	implementation defined character values."  [P1003.2, 2.5.2.1]  */
3092     {
3093       set_default (BITPOS (tok_xdigit), '0', '9');
3094       set_default (BITPOS (tok_xdigit), 'A', 'F');
3095       set_default (BITPOS (tok_xdigit), 'a', 'f');
3096     }
3097 
3098   if ((ctype->class_done & BITw (tok_blank)) == 0)
3099     /* "If this keyword [blank] is unspecified, the characters <space> and
3100        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3101    {
3102       struct charseq *seq;
3103 
3104       seq = charmap_find_value (charmap, "space", 5);
3105       if (seq == NULL)
3106 	seq = charmap_find_value (charmap, "SP", 2);
3107       if (seq == NULL)
3108 	seq = charmap_find_value (charmap, "U00000020", 9);
3109       if (seq == NULL)
3110 	{
3111 	  record_error (0, 0, _("\
3112 %s: character `%s' not defined while needed as default value"),
3113 			"LC_CTYPE", "<space>");
3114 	}
3115       else if (seq->nbytes != 1)
3116 	record_error (0, 0, _("\
3117 %s: character `%s' in charmap not representable with one byte"),
3118 		      "LC_CTYPE", "<space>");
3119       else
3120 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3121 
3122       /* No need to search.  */
3123       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3124 
3125 
3126       seq = charmap_find_value (charmap, "tab", 3);
3127       if (seq == NULL)
3128 	seq = charmap_find_value (charmap, "U00000009", 9);
3129       if (seq == NULL)
3130 	{
3131 	   record_error (0, 0, _("\
3132 %s: character `%s' not defined while needed as default value"),
3133 		         "LC_CTYPE", "<tab>");
3134 	}
3135       else if (seq->nbytes != 1)
3136 	record_error (0, 0, _("\
3137 %s: character `%s' in charmap not representable with one byte"),
3138 		      "LC_CTYPE", "<tab>");
3139       else
3140 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3141 
3142       /* No need to search.  */
3143       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3144     }
3145 
3146   if ((ctype->class_done & BITw (tok_graph)) == 0)
3147     /* "If this keyword [graph] is not specified, characters specified for
3148         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3149 	shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3150     {
3151       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower)
3152 	| BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit)
3153 	| BIT (tok_punct);
3154       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower)
3155 	| BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit)
3156 	| BITw (tok_punct);
3157 
3158       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3159 	if ((ctype->class_collection[cnt] & maskw) != 0)
3160 	  ctype->class_collection[cnt] |= BITw (tok_graph);
3161 
3162       for (size_t cnt = 0; cnt < 256; ++cnt)
3163 	if ((ctype->class256_collection[cnt] & mask) != 0)
3164 	  ctype->class256_collection[cnt] |= BIT (tok_graph);
3165     }
3166 
3167   if ((ctype->class_done & BITw (tok_print)) == 0)
3168     /* "If this keyword [print] is not provided, characters specified for
3169         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3170 	and the <space> character shall belong to this character class."
3171 	[P1003.2, 2.5.2.1]  */
3172     {
3173       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower)
3174 	| BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit)
3175 	| BIT (tok_punct);
3176       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower)
3177 	| BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit)
3178 	| BITw (tok_punct);
3179       struct charseq *seq;
3180 
3181       for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3182 	if ((ctype->class_collection[cnt] & maskw) != 0)
3183 	  ctype->class_collection[cnt] |= BITw (tok_print);
3184 
3185       for (size_t cnt = 0; cnt < 256; ++cnt)
3186 	if ((ctype->class256_collection[cnt] & mask) != 0)
3187 	  ctype->class256_collection[cnt] |= BIT (tok_print);
3188 
3189 
3190       seq = charmap_find_value (charmap, "space", 5);
3191       if (seq == NULL)
3192 	seq = charmap_find_value (charmap, "SP", 2);
3193       if (seq == NULL)
3194 	seq = charmap_find_value (charmap, "U00000020", 9);
3195       if (seq == NULL)
3196 	{
3197 	  record_error (0, 0, _("\
3198 %s: character `%s' not defined while needed as default value"),
3199 			"LC_CTYPE", "<space>");
3200 	}
3201       else if (seq->nbytes != 1)
3202 	record_error (0, 0, _("\
3203 %s: character `%s' in charmap not representable with one byte"),
3204 		      "LC_CTYPE", "<space>");
3205       else
3206 	ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3207 
3208       /* No need to search.  */
3209       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3210     }
3211 
3212   if (ctype->tomap_done[0] == 0)
3213     /* "If this keyword [toupper] is not specified, the lowercase letters
3214         `a' through `z', and their corresponding uppercase letters `A' to
3215 	`Z', ..., shall automatically be included, with implementation-
3216 	defined character values."  [P1003.2, 2.5.2.1]  */
3217     {
3218       char tmp[4];
3219       int ch;
3220 
3221       strcpy (tmp, "<?>");
3222 
3223       for (ch = 'a'; ch <= 'z'; ++ch)
3224 	{
3225 	  struct charseq *seq_from, *seq_to;
3226 
3227 	  tmp[1] = (char) ch;
3228 
3229 	  seq_from = charmap_find_value (charmap, &tmp[1], 1);
3230 	  if (seq_from == NULL)
3231 	    {
3232 	      char buf[10];
3233 	      sprintf (buf, "U%08X", ch);
3234 	      seq_from = charmap_find_value (charmap, buf, 9);
3235 	    }
3236 	  if (seq_from == NULL)
3237 	    {
3238 	      record_error (0, 0, _("\
3239 %s: character `%s' not defined while needed as default value"),
3240 			    "LC_CTYPE", tmp);
3241 	    }
3242 	  else if (seq_from->nbytes != 1)
3243 	    {
3244 	      record_error (0, 0, _("\
3245 %s: character `%s' needed as default value not representable with one byte"),
3246 			    "LC_CTYPE", tmp);
3247 	    }
3248 	  else
3249 	    {
3250 	      /* This conversion is implementation defined.  */
3251 	      tmp[1] = (char) (ch + ('A' - 'a'));
3252 	      seq_to = charmap_find_value (charmap, &tmp[1], 1);
3253 	      if (seq_to == NULL)
3254 		{
3255 		  char buf[10];
3256 		  sprintf (buf, "U%08X", ch + ('A' - 'a'));
3257 		  seq_to = charmap_find_value (charmap, buf, 9);
3258 		}
3259 	      if (seq_to == NULL)
3260 		{
3261 		  record_error (0, 0, _("\
3262 %s: character `%s' not defined while needed as default value"),
3263 				"LC_CTYPE", tmp);
3264 		}
3265 	      else if (seq_to->nbytes != 1)
3266 		{
3267 		  record_error (0, 0, _("\
3268 %s: character `%s' needed as default value not representable with one byte"),
3269 				"LC_CTYPE", tmp);
3270 		}
3271 	      else
3272 		/* The index [0] is determined by the order of the
3273 		   `ctype_map_newP' calls in `ctype_startup'.  */
3274 		ctype->map256_collection[0][seq_from->bytes[0]]
3275 		  = seq_to->bytes[0];
3276 	    }
3277 
3278 	  /* No need to search.  */
3279 	  ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3280 	}
3281     }
3282 
3283   if (ctype->tomap_done[1] == 0)
3284     /* "If this keyword [tolower] is not specified, the mapping shall be
3285        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3286     {
3287       for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3288 	if (ctype->map_collection[0][cnt] != 0)
3289 	  ELEM (ctype, map_collection, [1],
3290 		ctype->map_collection[0][cnt])
3291 	    = ctype->charnames[cnt];
3292 
3293       for (size_t cnt = 0; cnt < 256; ++cnt)
3294 	if (ctype->map256_collection[0][cnt] != 0)
3295 	  ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3296     }
3297 
3298   if (ctype->outdigits_act != 10)
3299     {
3300       if (ctype->outdigits_act != 0)
3301 	record_error (0, 0, _("\
3302 %s: field `%s' does not contain exactly ten entries"),
3303 		      "LC_CTYPE", "outdigit");
3304 
3305       for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3306 	{
3307 	  ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3308 							 (char *) digits + cnt,
3309 							 1);
3310 
3311 	  if (ctype->mboutdigits[cnt] == NULL)
3312 	    ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3313 							   longnames[cnt],
3314 							   strlen (longnames[cnt]));
3315 
3316 	  if (ctype->mboutdigits[cnt] == NULL)
3317 	    ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3318 							   uninames[cnt], 9);
3319 
3320 	  if (ctype->mboutdigits[cnt] == NULL)
3321 	    {
3322 	      /* Provide a replacement.  */
3323 	      record_error (0, 0, _("\
3324 no output digits defined and none of the standard names in the charmap"));
3325 
3326 	      ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3327 						       sizeof (struct charseq)
3328 						       + 1);
3329 
3330 	      /* This is better than nothing.  */
3331 	      ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3332 	      ctype->mboutdigits[cnt]->nbytes = 1;
3333 	    }
3334 
3335 	  ctype->wcoutdigits[cnt] = L'0' + cnt;
3336 	}
3337 
3338       ctype->outdigits_act = 10;
3339     }
3340 
3341 #undef set_default
3342 }
3343 
3344 
3345 /* Initialize.  Assumes t->p and t->q have already been set.  */
3346 static inline void
wctype_table_init(struct wctype_table * t)3347 wctype_table_init (struct wctype_table *t)
3348 {
3349   t->level1 = NULL;
3350   t->level1_alloc = t->level1_size = 0;
3351   t->level2 = NULL;
3352   t->level2_alloc = t->level2_size = 0;
3353   t->level3 = NULL;
3354   t->level3_alloc = t->level3_size = 0;
3355 }
3356 
3357 /* Add one entry.  */
3358 static void
wctype_table_add(struct wctype_table * t,uint32_t wc)3359 wctype_table_add (struct wctype_table *t, uint32_t wc)
3360 {
3361   uint32_t index1 = wc >> (t->q + t->p + 5);
3362   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3363   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3364   uint32_t index4 = wc & 0x1f;
3365   size_t i, i1, i2;
3366 
3367   if (index1 >= t->level1_size)
3368     {
3369       if (index1 >= t->level1_alloc)
3370 	{
3371 	  size_t alloc = 2 * t->level1_alloc;
3372 	  if (alloc <= index1)
3373 	    alloc = index1 + 1;
3374 	  t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3375 					     alloc * sizeof (uint32_t));
3376 	  t->level1_alloc = alloc;
3377 	}
3378       while (index1 >= t->level1_size)
3379 	t->level1[t->level1_size++] = EMPTY;
3380     }
3381 
3382   if (t->level1[index1] == EMPTY)
3383     {
3384       if (t->level2_size == t->level2_alloc)
3385 	{
3386 	  size_t alloc = 2 * t->level2_alloc + 1;
3387 	  t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3388 					     (alloc << t->q) * sizeof (uint32_t));
3389 	  t->level2_alloc = alloc;
3390 	}
3391       i1 = t->level2_size << t->q;
3392       i2 = (t->level2_size + 1) << t->q;
3393       for (i = i1; i < i2; i++)
3394 	t->level2[i] = EMPTY;
3395       t->level1[index1] = t->level2_size++;
3396     }
3397 
3398   index2 += t->level1[index1] << t->q;
3399 
3400   if (t->level2[index2] == EMPTY)
3401     {
3402       if (t->level3_size == t->level3_alloc)
3403 	{
3404 	  size_t alloc = 2 * t->level3_alloc + 1;
3405 	  t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3406 					     (alloc << t->p) * sizeof (uint32_t));
3407 	  t->level3_alloc = alloc;
3408 	}
3409       i1 = t->level3_size << t->p;
3410       i2 = (t->level3_size + 1) << t->p;
3411       for (i = i1; i < i2; i++)
3412 	t->level3[i] = 0;
3413       t->level2[index2] = t->level3_size++;
3414     }
3415 
3416   index3 += t->level2[index2] << t->p;
3417 
3418   t->level3[index3] |= (uint32_t)1 << index4;
3419 }
3420 
3421 /* Finalize and shrink.  */
3422 static void
add_locale_wctype_table(struct locale_file * file,struct wctype_table * t)3423 add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
3424 {
3425   size_t i, j, k;
3426   uint32_t reorder3[t->level3_size];
3427   uint32_t reorder2[t->level2_size];
3428   uint32_t level2_offset, level3_offset;
3429 
3430   /* Uniquify level3 blocks.  */
3431   k = 0;
3432   for (j = 0; j < t->level3_size; j++)
3433     {
3434       for (i = 0; i < k; i++)
3435 	if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3436 		    (1 << t->p) * sizeof (uint32_t)) == 0)
3437 	  break;
3438       /* Relocate block j to block i.  */
3439       reorder3[j] = i;
3440       if (i == k)
3441 	{
3442 	  if (i != j)
3443 	    memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3444 		    (1 << t->p) * sizeof (uint32_t));
3445 	  k++;
3446 	}
3447     }
3448   t->level3_size = k;
3449 
3450   for (i = 0; i < (t->level2_size << t->q); i++)
3451     if (t->level2[i] != EMPTY)
3452       t->level2[i] = reorder3[t->level2[i]];
3453 
3454   /* Uniquify level2 blocks.  */
3455   k = 0;
3456   for (j = 0; j < t->level2_size; j++)
3457     {
3458       for (i = 0; i < k; i++)
3459 	if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3460 		    (1 << t->q) * sizeof (uint32_t)) == 0)
3461 	  break;
3462       /* Relocate block j to block i.  */
3463       reorder2[j] = i;
3464       if (i == k)
3465 	{
3466 	  if (i != j)
3467 	    memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3468 		    (1 << t->q) * sizeof (uint32_t));
3469 	  k++;
3470 	}
3471     }
3472   t->level2_size = k;
3473 
3474   for (i = 0; i < t->level1_size; i++)
3475     if (t->level1[i] != EMPTY)
3476       t->level1[i] = reorder2[t->level1[i]];
3477 
3478   t->result_size =
3479     5 * sizeof (uint32_t)
3480     + t->level1_size * sizeof (uint32_t)
3481     + (t->level2_size << t->q) * sizeof (uint32_t)
3482     + (t->level3_size << t->p) * sizeof (uint32_t);
3483 
3484   level2_offset =
3485     5 * sizeof (uint32_t)
3486     + t->level1_size * sizeof (uint32_t);
3487   level3_offset =
3488     5 * sizeof (uint32_t)
3489     + t->level1_size * sizeof (uint32_t)
3490     + (t->level2_size << t->q) * sizeof (uint32_t);
3491 
3492   start_locale_structure (file);
3493   add_locale_uint32 (file, t->q + t->p + 5);
3494   add_locale_uint32 (file, t->level1_size);
3495   add_locale_uint32 (file, t->p + 5);
3496   add_locale_uint32 (file, (1 << t->q) - 1);
3497   add_locale_uint32 (file, (1 << t->p) - 1);
3498 
3499   for (i = 0; i < t->level1_size; i++)
3500     add_locale_uint32
3501       (file,
3502        t->level1[i] == EMPTY
3503        ? 0
3504        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3505 
3506   for (i = 0; i < (t->level2_size << t->q); i++)
3507     add_locale_uint32
3508       (file,
3509        t->level2[i] == EMPTY
3510        ? 0
3511        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3512 
3513   add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3514   end_locale_structure (file);
3515 
3516   if (t->level1_alloc > 0)
3517     free (t->level1);
3518   if (t->level2_alloc > 0)
3519     free (t->level2);
3520   if (t->level3_alloc > 0)
3521     free (t->level3);
3522 }
3523 
3524 /* Flattens the included transliterations into a translit list.
3525    Inserts them in the list at `cursor', and returns the new cursor.  */
3526 static struct translit_t **
translit_flatten(struct locale_ctype_t * ctype,const struct charmap_t * charmap,struct translit_t ** cursor)3527 translit_flatten (struct locale_ctype_t *ctype,
3528 		  const struct charmap_t *charmap,
3529 		  struct translit_t **cursor)
3530 {
3531   while (ctype->translit_include != NULL)
3532     {
3533       const char *copy_locale = ctype->translit_include->copy_locale;
3534       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3535       struct localedef_t *other;
3536 
3537       /* Unchain the include statement.  During the depth-first traversal
3538 	 we don't want to visit any locale more than once.  */
3539       ctype->translit_include = ctype->translit_include->next;
3540 
3541       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3542 
3543       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3544 	{
3545 	  record_error (0, 0, _("\
3546 %s: transliteration data from locale `%s' not available"),
3547 			"LC_CTYPE", copy_locale);
3548 	}
3549       else
3550 	{
3551 	  struct locale_ctype_t *other_ctype =
3552 	    other->categories[LC_CTYPE].ctype;
3553 
3554 	  cursor = translit_flatten (other_ctype, charmap, cursor);
3555 	  assert (other_ctype->translit_include == NULL);
3556 
3557 	  if (other_ctype->translit != NULL)
3558 	    {
3559 	      /* Insert the other_ctype->translit list at *cursor.  */
3560 	      struct translit_t *endp = other_ctype->translit;
3561 	      while (endp->next != NULL)
3562 		endp = endp->next;
3563 
3564 	      endp->next = *cursor;
3565 	      *cursor = other_ctype->translit;
3566 
3567 	      /* Avoid any risk of circular lists.  */
3568 	      other_ctype->translit = NULL;
3569 
3570 	      cursor = &endp->next;
3571 	    }
3572 
3573 	  if (ctype->default_missing == NULL)
3574 	    ctype->default_missing = other_ctype->default_missing;
3575 	}
3576     }
3577 
3578   return cursor;
3579 }
3580 
3581 static void
allocate_arrays(struct locale_ctype_t * ctype,const struct charmap_t * charmap,struct repertoire_t * repertoire)3582 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3583 		 struct repertoire_t *repertoire)
3584 {
3585   size_t idx, nr;
3586   const void *key;
3587   size_t len;
3588   void *vdata;
3589   void *curs;
3590 
3591   /* You wonder about this amount of memory?  This is only because some
3592      users do not manage to address the array with unsigned values or
3593      data types with range >= 256.  '\200' would result in the array
3594      index -128.  To help these poor people we duplicate the entries for
3595      128 up to 255 below the entry for \0.  */
3596   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3597   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3598   ctype->class_b = (uint32_t **)
3599     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3600   ctype->class_3level = (struct wctype_table *)
3601     xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3602 
3603   /* This is the array accessed using the multibyte string elements.  */
3604   for (idx = 0; idx < 256; ++idx)
3605     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3606 
3607   /* Mirror first 127 entries.  We must take care that entry -1 is not
3608      mirrored because EOF == -1.  */
3609   for (idx = 0; idx < 127; ++idx)
3610     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3611 
3612   /* The 32 bit array contains all characters < 0x100.  */
3613   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3614     if (ctype->charnames[idx] < 0x100)
3615       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3616 
3617   for (nr = 0; nr < ctype->nr_charclass; nr++)
3618     {
3619       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3620 
3621       /* We only set CLASS_B for the bits in the ISO C classes, not
3622 	 the user defined classes.  The number should not change but
3623 	 who knows.  */
3624 #define LAST_ISO_C_BIT 11
3625       if (nr <= LAST_ISO_C_BIT)
3626 	for (idx = 0; idx < 256; ++idx)
3627 	  if (ctype->class256_collection[idx] & _ISbit (nr))
3628 	    ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3629     }
3630 
3631   for (nr = 0; nr < ctype->nr_charclass; nr++)
3632     {
3633       struct wctype_table *t;
3634 
3635       t = &ctype->class_3level[nr];
3636       t->p = 4; /* or: 5 */
3637       t->q = 7; /* or: 6 */
3638       wctype_table_init (t);
3639 
3640       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3641 	if (ctype->class_collection[idx] & _ISwbit (nr))
3642 	  wctype_table_add (t, ctype->charnames[idx]);
3643 
3644       record_verbose (stderr, _("\
3645 %s: table for class \"%s\": %lu bytes"),
3646 		      "LC_CTYPE", ctype->classnames[nr],
3647 		      (unsigned long int) t->result_size);
3648     }
3649 
3650   /* Room for table of mappings.  */
3651   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3652   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3653 					  * sizeof (uint32_t *));
3654   ctype->map_3level = (struct wctrans_table *)
3655     xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3656 
3657   /* Fill in all mappings.  */
3658   for (idx = 0; idx < 2; ++idx)
3659     {
3660       unsigned int idx2;
3661 
3662       /* Allocate table.  */
3663       ctype->map_b[idx] = (uint32_t *)
3664 	xmalloc ((256 + 128) * sizeof (uint32_t));
3665 
3666       /* Copy values from collection.  */
3667       for (idx2 = 0; idx2 < 256; ++idx2)
3668 	ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3669 
3670       /* Mirror first 127 entries.  We must take care not to map entry
3671 	 -1 because EOF == -1.  */
3672       for (idx2 = 0; idx2 < 127; ++idx2)
3673 	ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3674 
3675       /* EOF must map to EOF.  */
3676       ctype->map_b[idx][127] = EOF;
3677     }
3678 
3679   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3680     {
3681       unsigned int idx2;
3682 
3683       /* Allocate table.  */
3684       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3685 
3686       /* Copy values from collection.  Default is identity mapping.  */
3687       for (idx2 = 0; idx2 < 256; ++idx2)
3688 	ctype->map32_b[idx][idx2] =
3689 	  (ctype->map_collection[idx][idx2] != 0
3690 	   ? ctype->map_collection[idx][idx2]
3691 	   : idx2);
3692     }
3693 
3694   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3695     {
3696       struct wctrans_table *t;
3697 
3698       t = &ctype->map_3level[nr];
3699       t->p = 7;
3700       t->q = 9;
3701       wctrans_table_init (t);
3702 
3703       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3704 	if (ctype->map_collection[nr][idx] != 0)
3705 	  wctrans_table_add (t, ctype->charnames[idx],
3706 			     ctype->map_collection[nr][idx]);
3707 
3708       record_verbose (stderr, _("\
3709 %s: table for map \"%s\": %lu bytes"),
3710 		      "LC_CTYPE", ctype->mapnames[nr],
3711 		      (unsigned long int) t->result_size);
3712     }
3713 
3714   /* Extra array for class and map names.  */
3715   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3716 						* sizeof (uint32_t));
3717   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3718 					      * sizeof (uint32_t));
3719 
3720   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3721   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3722 
3723   /* Array for width information.  Because the expected widths are very
3724      small (never larger than 2) we use only one single byte.  This
3725      saves space.
3726      We put only printable characters in the table.  wcwidth is specified
3727      to return -1 for non-printable characters.  Doing the check here
3728      saves a run-time check.
3729      But we put L'\0' in the table.  This again saves a run-time check.  */
3730   {
3731     struct wcwidth_table *t;
3732 
3733     t = &ctype->width;
3734     t->p = 7;
3735     t->q = 9;
3736     wcwidth_table_init (t);
3737 
3738     /* First set all the printable characters of the character set to
3739        the default width.  */
3740     curs = NULL;
3741     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3742       {
3743 	struct charseq *data = (struct charseq *) vdata;
3744 
3745 	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3746 	  data->ucs4 = repertoire_find_value (ctype->repertoire,
3747 					      data->name, len);
3748 
3749 	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3750 	  {
3751 	    uint32_t *class_bits =
3752 	      find_idx (ctype, &ctype->class_collection, NULL,
3753 			&ctype->class_collection_act, data->ucs4);
3754 
3755 	    if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3756 	      wcwidth_table_add (t, data->ucs4, charmap->width_default);
3757 	  }
3758       }
3759 
3760     /* Now add the explicitly specified widths.  */
3761     if (charmap->width_rules != NULL)
3762       for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3763         {
3764           unsigned char bytes[charmap->mb_cur_max];
3765           int nbytes = charmap->width_rules[cnt].from->nbytes;
3766 
3767           /* We have the range of character for which the width is
3768              specified described using byte sequences of the multibyte
3769              charset.  We have to convert this to UCS4 now.  And we
3770              cannot simply convert the beginning and the end of the
3771              sequence, we have to iterate over the byte sequence and
3772              convert it for every single character.  */
3773           memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3774 
3775           while (nbytes < charmap->width_rules[cnt].to->nbytes
3776                  || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3777                             nbytes) <= 0)
3778             {
3779               /* Find the UCS value for `bytes'.  */
3780               int inner;
3781               uint32_t wch;
3782               struct charseq *seq =
3783                 charmap_find_symbol (charmap, (char *) bytes, nbytes);
3784 
3785               if (seq == NULL)
3786                 wch = ILLEGAL_CHAR_VALUE;
3787               else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3788                 wch = seq->ucs4;
3789               else
3790                 wch = repertoire_find_value (ctype->repertoire, seq->name,
3791                                              strlen (seq->name));
3792 
3793               if (wch != ILLEGAL_CHAR_VALUE)
3794                 {
3795                   /* Store the value.  */
3796                   uint32_t *class_bits =
3797                     find_idx (ctype, &ctype->class_collection, NULL,
3798                               &ctype->class_collection_act, wch);
3799 
3800                   if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3801                     wcwidth_table_add (t, wch,
3802                                        charmap->width_rules[cnt].width);
3803                 }
3804 
3805               /* "Increment" the bytes sequence.  */
3806               inner = nbytes - 1;
3807               while (inner >= 0 && bytes[inner] == 0xff)
3808                 --inner;
3809 
3810               if (inner < 0)
3811                 {
3812                   /* We have to extend the byte sequence.  */
3813                   if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3814                     break;
3815 
3816                   bytes[0] = 1;
3817                   memset (&bytes[1], 0, nbytes);
3818                   ++nbytes;
3819                 }
3820               else
3821                 {
3822                   ++bytes[inner];
3823                   while (++inner < nbytes)
3824                     bytes[inner] = 0;
3825                 }
3826             }
3827         }
3828 
3829     /* Set the width of L'\0' to 0.  */
3830     wcwidth_table_add (t, 0, 0);
3831 
3832     record_verbose (stderr, _("%s: table for width: %lu bytes"),
3833 		    "LC_CTYPE", (unsigned long int) t->result_size);
3834   }
3835 
3836   /* Set MB_CUR_MAX.  */
3837   ctype->mb_cur_max = charmap->mb_cur_max;
3838 
3839   /* Now determine the table for the transliteration information.
3840 
3841      XXX It is not yet clear to me whether it is worth implementing a
3842      complicated algorithm which uses a hash table to locate the entries.
3843      For now I'll use a simple array which can be searching using binary
3844      search.  */
3845   if (ctype->translit_include != NULL)
3846     /* Traverse the locales mentioned in the `include' statements in a
3847        depth-first way and fold in their transliteration information.  */
3848     translit_flatten (ctype, charmap, &ctype->translit);
3849 
3850   if (ctype->translit != NULL)
3851     {
3852       /* First count how many entries we have.  This is the upper limit
3853 	 since some entries from the included files might be overwritten.  */
3854       size_t number = 0;
3855       struct translit_t *runp = ctype->translit;
3856       struct translit_t **sorted;
3857       size_t from_len, to_len;
3858 
3859       while (runp != NULL)
3860 	{
3861 	  ++number;
3862 	  runp = runp->next;
3863 	}
3864 
3865       /* Next we allocate an array large enough and fill in the values.  */
3866       sorted = (struct translit_t **) alloca (number
3867 					      * sizeof (struct translit_t **));
3868       runp = ctype->translit;
3869       number = 0;
3870       do
3871 	{
3872 	  /* Search for the place where to insert this string.
3873 	     XXX Better use a real sorting algorithm later.  */
3874 	  size_t idx = 0;
3875 	  int replace = 0;
3876 
3877 	  while (idx < number)
3878 	    {
3879 	      int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3880 				(const wchar_t *) runp->from);
3881 	      if (res == 0)
3882 		{
3883 		  replace = 1;
3884 		  break;
3885 		}
3886 	      if (res > 0)
3887 		break;
3888 	      ++idx;
3889 	    }
3890 
3891 	  if (replace)
3892 	    sorted[idx] = runp;
3893 	  else
3894 	    {
3895 	      memmove (&sorted[idx + 1], &sorted[idx],
3896 		       (number - idx) * sizeof (struct translit_t *));
3897 	      sorted[idx] = runp;
3898 	      ++number;
3899 	    }
3900 
3901 	  runp = runp->next;
3902 	}
3903       while (runp != NULL);
3904 
3905       /* The next step is putting all the possible transliteration
3906 	 strings in one memory block so that we can write it out.
3907 	 We need several different blocks:
3908 	 - index to the from-string array
3909 	 - from-string array
3910 	 - index to the to-string array
3911 	 - to-string array.
3912       */
3913       from_len = to_len = 0;
3914       for (size_t cnt = 0; cnt < number; ++cnt)
3915 	{
3916 	  struct translit_to_t *srunp;
3917 	  from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3918 	  srunp = sorted[cnt]->to;
3919 	  while (srunp != NULL)
3920 	    {
3921 	      to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3922 	      srunp = srunp->next;
3923 	    }
3924 	  /* Plus one for the extra NUL character marking the end of
3925 	     the list for the current entry.  */
3926 	  ++to_len;
3927 	}
3928 
3929       /* We can allocate the arrays for the results.  */
3930       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3931       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3932       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3933       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3934 
3935       from_len = 0;
3936       to_len = 0;
3937       for (size_t cnt = 0; cnt < number; ++cnt)
3938 	{
3939 	  size_t len;
3940 	  struct translit_to_t *srunp;
3941 
3942 	  ctype->translit_from_idx[cnt] = from_len;
3943 	  ctype->translit_to_idx[cnt] = to_len;
3944 
3945 	  len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3946 	  wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
3947 		   (const wchar_t *) sorted[cnt]->from, len);
3948 	  from_len += len;
3949 
3950 	  ctype->translit_to_idx[cnt] = to_len;
3951 	  srunp = sorted[cnt]->to;
3952 	  while (srunp != NULL)
3953 	    {
3954 	      len = wcslen ((const wchar_t *) srunp->str) + 1;
3955 	      wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
3956 		       (const wchar_t *) srunp->str, len);
3957 	      to_len += len;
3958 	      srunp = srunp->next;
3959 	    }
3960 	  ctype->translit_to_tbl[to_len++] = L'\0';
3961 	}
3962 
3963       /* Store the information about the length.  */
3964       ctype->translit_idx_size = number;
3965       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
3966       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
3967     }
3968   else
3969     {
3970       ctype->translit_from_idx = no_str;
3971       ctype->translit_from_tbl = no_str;
3972       ctype->translit_to_tbl = no_str;
3973       ctype->translit_idx_size = 0;
3974       ctype->translit_from_tbl_size = 0;
3975       ctype->translit_to_tbl_size = 0;
3976     }
3977 }
3978