1 /* Copyright (C) 1995-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published
6    by the Free Software Foundation; version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
16 
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 
21 #include <errno.h>
22 #include <stdlib.h>
23 #include <wchar.h>
24 #include <stdint.h>
25 #include <sys/param.h>
26 #include <array_length.h>
27 
28 #include "localedef.h"
29 #include "charmap.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
32 #include "locfile.h"
33 #include "elem-hash.h"
34 
35 /* Uncomment the following line in the production version.  */
36 /* #define NDEBUG 1 */
37 #include <assert.h>
38 
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
41 
42 static inline void
43 __attribute ((always_inline))
obstack_int32_grow(struct obstack * obstack,int32_t data)44 obstack_int32_grow (struct obstack *obstack, int32_t data)
45 {
46   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47   data = maybe_swap_uint32 (data);
48   if (sizeof (int32_t) == sizeof (int))
49     obstack_int_grow (obstack, data);
50   else
51     obstack_grow (obstack, &data, sizeof (int32_t));
52 }
53 
54 static inline void
55 __attribute ((always_inline))
obstack_int32_grow_fast(struct obstack * obstack,int32_t data)56 obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57 {
58   assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59   data = maybe_swap_uint32 (data);
60   if (sizeof (int32_t) == sizeof (int))
61     obstack_int_grow_fast (obstack, data);
62   else
63     obstack_grow (obstack, &data, sizeof (int32_t));
64 }
65 
66 /* Forward declaration.  */
67 struct element_t;
68 
69 /* Data type for list of strings.  */
70 struct section_list
71 {
72   /* Successor in the known_sections list.  */
73   struct section_list *def_next;
74   /* Successor in the sections list.  */
75   struct section_list *next;
76   /* Name of the section.  */
77   const char *name;
78   /* First element of this section.  */
79   struct element_t *first;
80   /* Last element of this section.  */
81   struct element_t *last;
82   /* These are the rules for this section.  */
83   enum coll_sort_rule *rules;
84   /* Index of the rule set in the appropriate section of the output file.  */
85   int ruleidx;
86 };
87 
88 struct element_t;
89 
90 struct element_list_t
91 {
92   /* Number of elements.  */
93   int cnt;
94 
95   struct element_t **w;
96 };
97 
98 /* Data type for collating element.  */
99 struct element_t
100 {
101   const char *name;
102 
103   const char *mbs;
104   size_t nmbs;
105   const uint32_t *wcs;
106   size_t nwcs;
107   int *mborder;
108   int wcorder;
109 
110   /* The following is a bit mask which bits are set if this element is
111      used in the appropriate level.  Interesting for the singlebyte
112      weight computation.
113 
114      XXX The type here restricts the number of levels to 32.  It could
115      be changed if necessary but I doubt this is necessary.  */
116   unsigned int used_in_level;
117 
118   struct element_list_t *weights;
119 
120   /* Nonzero if this is a real character definition.  */
121   int is_character;
122 
123   /* Order of the character in the sequence.  This information will
124      be used in range expressions.  */
125   int mbseqorder;
126   int wcseqorder;
127 
128   /* Where does the definition come from.  */
129   const char *file;
130   size_t line;
131 
132   /* Which section does this belong to.  */
133   struct section_list *section;
134 
135   /* Predecessor and successor in the order list.  */
136   struct element_t *last;
137   struct element_t *next;
138 
139   /* Next element in multibyte output list.  */
140   struct element_t *mbnext;
141   struct element_t *mblast;
142 
143   /* Next element in wide character output list.  */
144   struct element_t *wcnext;
145   struct element_t *wclast;
146 };
147 
148 /* Special element value.  */
149 #define ELEMENT_ELLIPSIS2	((struct element_t *) 1)
150 #define ELEMENT_ELLIPSIS3	((struct element_t *) 2)
151 #define ELEMENT_ELLIPSIS4	((struct element_t *) 3)
152 
153 /* Data type for collating symbol.  */
154 struct symbol_t
155 {
156   const char *name;
157 
158   /* Point to place in the order list.  */
159   struct element_t *order;
160 
161   /* Where does the definition come from.  */
162   const char *file;
163   size_t line;
164 };
165 
166 /* Sparse table of struct element_t *.  */
167 #define TABLE wchead_table
168 #define ELEMENT struct element_t *
169 #define DEFAULT NULL
170 #define ITERATE
171 #define NO_ADD_LOCALE
172 #include "3level.h"
173 
174 /* Sparse table of int32_t.  */
175 #define TABLE collidx_table
176 #define ELEMENT int32_t
177 #define DEFAULT 0
178 #include "3level.h"
179 
180 /* Sparse table of uint32_t.  */
181 #define TABLE collseq_table
182 #define ELEMENT uint32_t
183 #define DEFAULT ~((uint32_t) 0)
184 #include "3level.h"
185 
186 
187 /* Simple name list for the preprocessor.  */
188 struct name_list
189 {
190   struct name_list *next;
191   char str[0];
192 };
193 
194 
195 /* The real definition of the struct for the LC_COLLATE locale.  */
196 struct locale_collate_t
197 {
198   /* Does the locale use code points to compare the encoding?  */
199   bool codepoint_collation;
200 
201   int col_weight_max;
202   int cur_weight_max;
203 
204   /* List of known scripts.  */
205   struct section_list *known_sections;
206   /* List of used sections.  */
207   struct section_list *sections;
208   /* Current section using definition.  */
209   struct section_list *current_section;
210   /* There always can be an unnamed section.  */
211   struct section_list unnamed_section;
212   /* Flag whether the unnamed section has been defined.  */
213   bool unnamed_section_defined;
214   /* To make handling of errors easier we have another section.  */
215   struct section_list error_section;
216   /* Sometimes we are defining the values for collating symbols before
217      the first actual section.  */
218   struct section_list symbol_section;
219 
220   /* Start of the order list.  */
221   struct element_t *start;
222 
223   /* The undefined element.  */
224   struct element_t undefined;
225 
226   /* This is the cursor for `reorder_after' insertions.  */
227   struct element_t *cursor;
228 
229   /* This value is used when handling ellipsis.  */
230   struct element_t ellipsis_weight;
231 
232   /* Known collating elements.  */
233   hash_table elem_table;
234 
235   /* Known collating symbols.  */
236   hash_table sym_table;
237 
238   /* Known collation sequences.  */
239   hash_table seq_table;
240 
241   struct obstack mempool;
242 
243   /* The LC_COLLATE category is a bit special as it is sometimes possible
244      that the definitions from more than one input file contains information.
245      Therefore we keep all relevant input in a list.  */
246   struct locale_collate_t *next;
247 
248   /* Arrays with heads of the list for each of the leading bytes in
249      the multibyte sequences.  */
250   struct element_t *mbheads[256];
251 
252   /* Arrays with heads of the list for each of the leading bytes in
253      the multibyte sequences.  */
254   struct wchead_table wcheads;
255 
256   /* The arrays with the collation sequence order.  */
257   unsigned char mbseqorder[256];
258   struct collseq_table wcseqorder;
259 
260   /* State of the preprocessor.  */
261   enum
262     {
263       else_none = 0,
264       else_ignore,
265       else_seen
266     }
267     else_action;
268 };
269 
270 
271 /* We have a few global variables which are used for reading all
272    LC_COLLATE category descriptions in all files.  */
273 static uint32_t nrules;
274 
275 /* List of defined preprocessor symbols.  */
276 static struct name_list *defined;
277 
278 
279 /* We need UTF-8 encoding of numbers.  */
280 static inline int
281 __attribute ((always_inline))
utf8_encode(char * buf,int val)282 utf8_encode (char *buf, int val)
283 {
284   int retval;
285 
286   if (val < 0x80)
287     {
288       *buf++ = (char) val;
289       retval = 1;
290     }
291   else
292     {
293       int step;
294 
295       for (step = 2; step < 6; ++step)
296 	if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
297 	  break;
298       retval = step;
299 
300       *buf = (unsigned char) (~0xff >> step);
301       --step;
302       do
303 	{
304 	  buf[step] = 0x80 | (val & 0x3f);
305 	  val >>= 6;
306 	}
307       while (--step > 0);
308       *buf |= val;
309     }
310 
311   return retval;
312 }
313 
314 
315 static struct section_list *
make_seclist_elem(struct locale_collate_t * collate,const char * string,struct section_list * next)316 make_seclist_elem (struct locale_collate_t *collate, const char *string,
317 		   struct section_list *next)
318 {
319   struct section_list *newp;
320 
321   newp = (struct section_list *) obstack_alloc (&collate->mempool,
322 						sizeof (*newp));
323   newp->next = next;
324   newp->name = string;
325   newp->first = NULL;
326   newp->last = NULL;
327 
328   return newp;
329 }
330 
331 
332 static struct element_t *
new_element(struct locale_collate_t * collate,const char * mbs,size_t mbslen,const uint32_t * wcs,const char * name,size_t namelen,int is_character)333 new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
334 	     const uint32_t *wcs, const char *name, size_t namelen,
335 	     int is_character)
336 {
337   struct element_t *newp;
338 
339   newp = (struct element_t *) obstack_alloc (&collate->mempool,
340 					     sizeof (*newp));
341   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
342 						    name, namelen);
343   if (mbs != NULL)
344     {
345       newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
346       newp->nmbs = mbslen;
347     }
348   else
349     {
350       newp->mbs = NULL;
351       newp->nmbs = 0;
352     }
353   if (wcs != NULL)
354     {
355       size_t nwcs = wcslen ((wchar_t *) wcs);
356       uint32_t zero = 0;
357       /* Handle <U0000> as a single character.  */
358       if (nwcs == 0)
359 	nwcs = 1;
360       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
361       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
362       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
363       newp->nwcs = nwcs;
364     }
365   else
366     {
367       newp->wcs = NULL;
368       newp->nwcs = 0;
369     }
370   newp->mborder = NULL;
371   newp->wcorder = 0;
372   newp->used_in_level = 0;
373   newp->is_character = is_character;
374 
375   /* Will be assigned later.  XXX  */
376   newp->mbseqorder = 0;
377   newp->wcseqorder = 0;
378 
379   /* Will be allocated later.  */
380   newp->weights = NULL;
381 
382   newp->file = NULL;
383   newp->line = 0;
384 
385   newp->section = collate->current_section;
386 
387   newp->last = NULL;
388   newp->next = NULL;
389 
390   newp->mbnext = NULL;
391   newp->mblast = NULL;
392 
393   newp->wcnext = NULL;
394   newp->wclast = NULL;
395 
396   return newp;
397 }
398 
399 
400 static struct symbol_t *
new_symbol(struct locale_collate_t * collate,const char * name,size_t len)401 new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
402 {
403   struct symbol_t *newp;
404 
405   newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
406 
407   newp->name = obstack_copy0 (&collate->mempool, name, len);
408   newp->order = NULL;
409 
410   newp->file = NULL;
411   newp->line = 0;
412 
413   return newp;
414 }
415 
416 
417 /* Test whether this name is already defined somewhere.  */
418 static int
check_duplicate(struct linereader * ldfile,struct locale_collate_t * collate,const struct charmap_t * charmap,struct repertoire_t * repertoire,const char * symbol,size_t symbol_len)419 check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
420 		 const struct charmap_t *charmap,
421 		 struct repertoire_t *repertoire, const char *symbol,
422 		 size_t symbol_len)
423 {
424   void *ignore = NULL;
425 
426   if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
427     {
428       lr_error (ldfile, _("`%.*s' already defined in charmap"),
429 		(int) symbol_len, symbol);
430       return 1;
431     }
432 
433   if (repertoire != NULL
434       && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
435 	  == 0))
436     {
437       lr_error (ldfile, _("`%.*s' already defined in repertoire"),
438 		(int) symbol_len, symbol);
439       return 1;
440     }
441 
442   if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
443     {
444       lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
445 		(int) symbol_len, symbol);
446       return 1;
447     }
448 
449   if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
450     {
451       lr_error (ldfile, _("`%.*s' already defined as collating element"),
452 		(int) symbol_len, symbol);
453       return 1;
454     }
455 
456   return 0;
457 }
458 
459 
460 /* Read the direction specification.  */
461 static void
read_directions(struct linereader * ldfile,struct token * arg,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct localedef_t * result)462 read_directions (struct linereader *ldfile, struct token *arg,
463 		 const struct charmap_t *charmap,
464 		 struct repertoire_t *repertoire, struct localedef_t *result)
465 {
466   int cnt = 0;
467   int max = nrules ?: 10;
468   enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
469   int warned = 0;
470   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
471 
472   while (1)
473     {
474       int valid = 0;
475 
476       if (arg->tok == tok_forward)
477 	{
478 	  if (rules[cnt] & sort_backward)
479 	    {
480 	      if (! warned)
481 		{
482 		  lr_error (ldfile, _("\
483 %s: `forward' and `backward' are mutually excluding each other"),
484 			    "LC_COLLATE");
485 		  warned = 1;
486 		}
487 	    }
488 	  else if (rules[cnt] & sort_forward)
489 	    {
490 	      if (! warned)
491 		{
492 		  lr_error (ldfile, _("\
493 %s: `%s' mentioned more than once in definition of weight %d"),
494 			    "LC_COLLATE", "forward", cnt + 1);
495 		}
496 	    }
497 	  else
498 	    rules[cnt] |= sort_forward;
499 
500 	  valid = 1;
501 	}
502       else if (arg->tok == tok_backward)
503 	{
504 	  if (rules[cnt] & sort_forward)
505 	    {
506 	      if (! warned)
507 		{
508 		  lr_error (ldfile, _("\
509 %s: `forward' and `backward' are mutually excluding each other"),
510 			    "LC_COLLATE");
511 		  warned = 1;
512 		}
513 	    }
514 	  else if (rules[cnt] & sort_backward)
515 	    {
516 	      if (! warned)
517 		{
518 		  lr_error (ldfile, _("\
519 %s: `%s' mentioned more than once in definition of weight %d"),
520 			    "LC_COLLATE", "backward", cnt + 1);
521 		}
522 	    }
523 	  else
524 	    rules[cnt] |= sort_backward;
525 
526 	  valid = 1;
527 	}
528       else if (arg->tok == tok_position)
529 	{
530 	  if (rules[cnt] & sort_position)
531 	    {
532 	      if (! warned)
533 		{
534 		  lr_error (ldfile, _("\
535 %s: `%s' mentioned more than once in definition of weight %d"),
536 			    "LC_COLLATE", "position", cnt + 1);
537 		}
538 	    }
539 	  else
540 	    rules[cnt] |= sort_position;
541 
542 	  valid = 1;
543 	}
544 
545       if (valid)
546 	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
547 
548       if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
549 	  || arg->tok == tok_semicolon)
550 	{
551 	  if (! valid && ! warned)
552 	    {
553 	      lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
554 	      warned = 1;
555 	    }
556 
557 	  /* See whether we have to increment the counter.  */
558 	  if (arg->tok != tok_comma && rules[cnt] != 0)
559 	    {
560 	      /* Add the default `forward' if we have seen only `position'.  */
561 	      if (rules[cnt] == sort_position)
562 		rules[cnt] = sort_position | sort_forward;
563 
564 	      ++cnt;
565 	    }
566 
567 	  if (arg->tok == tok_eof || arg->tok == tok_eol)
568 	    /* End of line or file, so we exit the loop.  */
569 	    break;
570 
571 	  if (nrules == 0)
572 	    {
573 	      /* See whether we have enough room in the array.  */
574 	      if (cnt == max)
575 		{
576 		  max += 10;
577 		  rules = (enum coll_sort_rule *) xrealloc (rules,
578 							    max
579 							    * sizeof (*rules));
580 		  memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
581 		}
582 	    }
583 	  else
584 	    {
585 	      if (cnt == nrules)
586 		{
587 		  /* There must not be any more rule.  */
588 		  if (! warned)
589 		    {
590 		      lr_error (ldfile, _("\
591 %s: too many rules; first entry only had %d"),
592 				"LC_COLLATE", nrules);
593 		      warned = 1;
594 		    }
595 
596 		  lr_ignore_rest (ldfile, 0);
597 		  break;
598 		}
599 	    }
600 	}
601       else
602 	{
603 	  if (! warned)
604 	    {
605 	      lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
606 	      warned = 1;
607 	    }
608 	}
609 
610       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
611     }
612 
613   if (nrules == 0)
614     {
615       /* Now we know how many rules we have.  */
616       nrules = cnt;
617       rules = (enum coll_sort_rule *) xrealloc (rules,
618 						nrules * sizeof (*rules));
619     }
620   else
621     {
622       if (cnt < nrules)
623 	{
624 	  /* Not enough rules in this specification.  */
625 	  if (! warned)
626 	    lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
627 
628 	  do
629 	    rules[cnt] = sort_forward;
630 	  while (++cnt < nrules);
631 	}
632     }
633 
634   collate->current_section->rules = rules;
635 }
636 
637 
638 static struct element_t *
find_element(struct linereader * ldfile,struct locale_collate_t * collate,const char * str,size_t len)639 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
640 	      const char *str, size_t len)
641 {
642   void *result = NULL;
643 
644   /* Search for the entries among the collation sequences already define.  */
645   if (find_entry (&collate->seq_table, str, len, &result) != 0)
646     {
647       /* Nope, not define yet.  So we see whether it is a
648 	 collation symbol.  */
649       void *ptr;
650 
651       if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
652 	{
653 	  /* It's a collation symbol.  */
654 	  struct symbol_t *sym = (struct symbol_t *) ptr;
655 	  result = sym->order;
656 
657 	  if (result == NULL)
658 	    result = sym->order = new_element (collate, NULL, 0, NULL,
659 					       NULL, 0, 0);
660 	}
661       else if (find_entry (&collate->elem_table, str, len, &result) != 0)
662 	{
663 	  /* It's also no collation element.  So it is a character
664 	     element defined later.  */
665 	  result = new_element (collate, NULL, 0, NULL, str, len, 1);
666 	  /* Insert it into the sequence table.  */
667 	  insert_entry (&collate->seq_table, str, len, result);
668 	}
669     }
670 
671   return (struct element_t *) result;
672 }
673 
674 
675 static void
unlink_element(struct locale_collate_t * collate)676 unlink_element (struct locale_collate_t *collate)
677 {
678   if (collate->cursor == collate->start)
679     {
680       assert (collate->cursor->next == NULL);
681       assert (collate->cursor->last == NULL);
682       collate->cursor = NULL;
683     }
684   else
685     {
686       if (collate->cursor->next != NULL)
687 	collate->cursor->next->last = collate->cursor->last;
688       if (collate->cursor->last != NULL)
689 	collate->cursor->last->next = collate->cursor->next;
690       collate->cursor = collate->cursor->last;
691     }
692 }
693 
694 
695 static void
insert_weights(struct linereader * ldfile,struct element_t * elem,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct localedef_t * result,enum token_t ellipsis)696 insert_weights (struct linereader *ldfile, struct element_t *elem,
697 		const struct charmap_t *charmap,
698 		struct repertoire_t *repertoire, struct localedef_t *result,
699 		enum token_t ellipsis)
700 {
701   int weight_cnt;
702   struct token *arg;
703   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
704 
705   /* Initialize all the fields.  */
706   elem->file = ldfile->fname;
707   elem->line = ldfile->lineno;
708 
709   elem->last = collate->cursor;
710   elem->next = collate->cursor ? collate->cursor->next : NULL;
711   if (collate->cursor != NULL && collate->cursor->next != NULL)
712     collate->cursor->next->last = elem;
713   if (collate->cursor != NULL)
714     collate->cursor->next = elem;
715   if (collate->start == NULL)
716     {
717       assert (collate->cursor == NULL);
718       collate->start = elem;
719     }
720 
721   elem->section = collate->current_section;
722 
723   if (collate->current_section->first == NULL)
724     collate->current_section->first = elem;
725   if (collate->current_section->last == collate->cursor)
726     collate->current_section->last = elem;
727 
728   collate->cursor = elem;
729 
730   elem->weights = (struct element_list_t *)
731     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
732   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
733 
734   weight_cnt = 0;
735 
736   arg = lr_token (ldfile, charmap, result, repertoire, verbose);
737   do
738     {
739       if (arg->tok == tok_eof || arg->tok == tok_eol)
740 	break;
741 
742       if (arg->tok == tok_ignore)
743 	{
744 	  /* The weight for this level has to be ignored.  We use the
745 	     null pointer to indicate this.  */
746 	  elem->weights[weight_cnt].w = (struct element_t **)
747 	    obstack_alloc (&collate->mempool, sizeof (struct element_t *));
748 	  elem->weights[weight_cnt].w[0] = NULL;
749 	  elem->weights[weight_cnt].cnt = 1;
750 	}
751       else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
752 	{
753 	  char ucs4str[10];
754 	  struct element_t *val;
755 	  char *symstr;
756 	  size_t symlen;
757 
758 	  if (arg->tok == tok_bsymbol)
759 	    {
760 	      symstr = arg->val.str.startmb;
761 	      symlen = arg->val.str.lenmb;
762 	    }
763 	  else
764 	    {
765 	      snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
766 	      symstr = ucs4str;
767 	      symlen = 9;
768 	    }
769 
770 	  val = find_element (ldfile, collate, symstr, symlen);
771 	  if (val == NULL)
772 	    break;
773 
774 	  elem->weights[weight_cnt].w = (struct element_t **)
775 	    obstack_alloc (&collate->mempool, sizeof (struct element_t *));
776 	  elem->weights[weight_cnt].w[0] = val;
777 	  elem->weights[weight_cnt].cnt = 1;
778 	}
779       else if (arg->tok == tok_string)
780 	{
781 	  /* Split the string up in the individual characters and put
782 	     the element definitions in the list.  */
783 	  const char *cp = arg->val.str.startmb;
784 	  int cnt = 0;
785 	  struct element_t *charelem;
786 	  struct element_t **weights = NULL;
787 	  int max = 0;
788 
789 	  if (*cp == '\0')
790 	    {
791 	      lr_error (ldfile, _("%s: empty weight string not allowed"),
792 			"LC_COLLATE");
793 	      lr_ignore_rest (ldfile, 0);
794 	      break;
795 	    }
796 
797 	  do
798 	    {
799 	      if (*cp == '<')
800 		{
801 		  /* Ahh, it's a bsymbol or an UCS4 value.  If it's
802 		     the latter we have to unify the name.  */
803 		  const char *startp = ++cp;
804 		  size_t len;
805 
806 		  while (*cp != '>')
807 		    {
808 		      if (*cp == ldfile->escape_char)
809 			++cp;
810 		      if (*cp == '\0')
811 			/* It's a syntax error.  */
812 			goto syntax;
813 
814 		      ++cp;
815 		    }
816 
817 		  if (cp - startp == 5 && startp[0] == 'U'
818 		      && isxdigit (startp[1]) && isxdigit (startp[2])
819 		      && isxdigit (startp[3]) && isxdigit (startp[4]))
820 		    {
821 		      unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
822 		      char *newstr;
823 
824 		      newstr = (char *) xmalloc (10);
825 		      snprintf (newstr, 10, "U%08X", ucs4);
826 		      startp = newstr;
827 
828 		      len = 9;
829 		    }
830 		  else
831 		    len = cp - startp;
832 
833 		  charelem = find_element (ldfile, collate, startp, len);
834 		  ++cp;
835 		}
836 	      else
837 		{
838 		  /* People really shouldn't use characters directly in
839 		     the string.  Especially since it's not really clear
840 		     what this means.  We interpret all characters in the
841 		     string as if that would be bsymbols.  Otherwise we
842 		     would have to match back to bsymbols somehow and this
843 		     is normally not what people normally expect.  */
844 		  charelem = find_element (ldfile, collate, cp++, 1);
845 		}
846 
847 	      if (charelem == NULL)
848 		{
849 		  /* We ignore the rest of the line.  */
850 		  lr_ignore_rest (ldfile, 0);
851 		  break;
852 		}
853 
854 	      /* Add the pointer.  */
855 	      if (cnt >= max)
856 		{
857 		  struct element_t **newp;
858 		  max += 10;
859 		  newp = (struct element_t **)
860 		    alloca (max * sizeof (struct element_t *));
861 		  memcpy (newp, weights, cnt * sizeof (struct element_t *));
862 		  weights = newp;
863 		}
864 	      weights[cnt++] = charelem;
865 	    }
866 	  while (*cp != '\0');
867 
868 	  /* Now store the information.  */
869 	  elem->weights[weight_cnt].w = (struct element_t **)
870 	    obstack_alloc (&collate->mempool,
871 			   cnt * sizeof (struct element_t *));
872 	  memcpy (elem->weights[weight_cnt].w, weights,
873 		  cnt * sizeof (struct element_t *));
874 	  elem->weights[weight_cnt].cnt = cnt;
875 
876 	  /* We don't need the string anymore.  */
877 	  free (arg->val.str.startmb);
878 	}
879       else if (ellipsis != tok_none
880 	       && (arg->tok == tok_ellipsis2
881 		   || arg->tok == tok_ellipsis3
882 		   || arg->tok == tok_ellipsis4))
883 	{
884 	  /* It must be the same ellipsis as used in the initial column.  */
885 	  if (arg->tok != ellipsis)
886 	    lr_error (ldfile, _("\
887 %s: weights must use the same ellipsis symbol as the name"),
888 		      "LC_COLLATE");
889 
890 	  /* The weight for this level will depend on the element
891 	     iterating over the range.  Put a placeholder.  */
892 	  elem->weights[weight_cnt].w = (struct element_t **)
893 	    obstack_alloc (&collate->mempool, sizeof (struct element_t *));
894 	  elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
895 	  elem->weights[weight_cnt].cnt = 1;
896 	}
897       else
898 	{
899 	syntax:
900 	  /* It's a syntax error.  */
901 	  lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
902 	  lr_ignore_rest (ldfile, 0);
903 	  break;
904 	}
905 
906       arg = lr_token (ldfile, charmap, result, repertoire, verbose);
907       /* This better should be the end of the line or a semicolon.  */
908       if (arg->tok == tok_semicolon)
909 	/* OK, ignore this and read the next token.  */
910 	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
911       else if (arg->tok != tok_eof && arg->tok != tok_eol)
912 	{
913 	  /* It's a syntax error.  */
914 	  lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
915 	  lr_ignore_rest (ldfile, 0);
916 	  break;
917 	}
918     }
919   while (++weight_cnt < nrules);
920 
921   if (weight_cnt < nrules)
922     {
923       /* This means the rest of the line uses the current element as
924 	 the weight.  */
925       do
926 	{
927 	  elem->weights[weight_cnt].w = (struct element_t **)
928 	    obstack_alloc (&collate->mempool, sizeof (struct element_t *));
929 	  if (ellipsis == tok_none)
930 	    elem->weights[weight_cnt].w[0] = elem;
931 	  else
932 	    elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
933 	  elem->weights[weight_cnt].cnt = 1;
934 	}
935       while (++weight_cnt < nrules);
936     }
937   else
938     {
939       if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
940 	{
941 	  /* Too many rule values.  */
942 	  lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
943 	  lr_ignore_rest (ldfile, 0);
944 	}
945       else
946 	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
947     }
948 }
949 
950 
951 static int
insert_value(struct linereader * ldfile,const char * symstr,size_t symlen,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct localedef_t * result)952 insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
953 	      const struct charmap_t *charmap, struct repertoire_t *repertoire,
954 	      struct localedef_t *result)
955 {
956   /* First find out what kind of symbol this is.  */
957   struct charseq *seq;
958   uint32_t wc;
959   struct element_t *elem = NULL;
960   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
961 
962   /* Try to find the character in the charmap.  */
963   seq = charmap_find_value (charmap, symstr, symlen);
964 
965   /* Determine the wide character.  */
966   if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
967     {
968       wc = repertoire_find_value (repertoire, symstr, symlen);
969       if (seq != NULL)
970 	seq->ucs4 = wc;
971     }
972   else
973     wc = seq->ucs4;
974 
975   if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
976     {
977       /* It's no character, so look through the collation elements and
978 	 symbol list.  */
979       void *ptr = elem;
980       if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
981 	{
982 	  void *result;
983 	  struct symbol_t *sym = NULL;
984 
985 	  /* It's also collation element.  Therefore it's either a
986 	     collating symbol or it's a character which is not
987 	     supported by the character set.  In the later case we
988 	     simply create a dummy entry.  */
989 	  if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
990 	    {
991 	      /* It's a collation symbol.  */
992 	      sym = (struct symbol_t *) result;
993 
994 	      elem = sym->order;
995 	    }
996 
997 	  if (elem == NULL)
998 	    {
999 	      elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
1000 
1001 	      if (sym != NULL)
1002 		sym->order = elem;
1003 	      else
1004 		/* Enter a fake element in the sequence table.  This
1005 		   won't cause anything in the output since there is
1006 		   no multibyte or wide character associated with
1007 		   it.  */
1008 		insert_entry (&collate->seq_table, symstr, symlen, elem);
1009 	    }
1010 	}
1011       else
1012 	/* Copy the result back.  */
1013 	elem = ptr;
1014     }
1015   else
1016     {
1017       /* Otherwise the symbols stands for a character.  */
1018       void *ptr = elem;
1019       if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1020 	{
1021 	  uint32_t wcs[2] = { wc, 0 };
1022 
1023 	  /* We have to allocate an entry.  */
1024 	  elem = new_element (collate,
1025 			      seq != NULL ? (char *) seq->bytes : NULL,
1026 			      seq != NULL ? seq->nbytes : 0,
1027 			      wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1028 			      symstr, symlen, 1);
1029 
1030 	  /* And add it to the table.  */
1031 	  if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1032 	    /* This cannot happen.  */
1033 	    assert (! "Internal error");
1034 	}
1035       else
1036 	{
1037 	  /* Copy the result back.  */
1038 	  elem = ptr;
1039 
1040 	  /* Maybe the character was used before the definition.  In this case
1041 	     we have to insert the byte sequences now.  */
1042 	  if (elem->mbs == NULL && seq != NULL)
1043 	    {
1044 	      elem->mbs = obstack_copy0 (&collate->mempool,
1045 					 seq->bytes, seq->nbytes);
1046 	      elem->nmbs = seq->nbytes;
1047 	    }
1048 
1049 	  if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1050 	    {
1051 	      uint32_t wcs[2] = { wc, 0 };
1052 
1053 	      elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1054 	      elem->nwcs = 1;
1055 	    }
1056 	}
1057     }
1058 
1059   /* Test whether this element is not already in the list.  */
1060   if (elem->next != NULL || elem == collate->cursor)
1061     {
1062       lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1063 		(int) symlen, symstr, elem->file, elem->line);
1064       lr_ignore_rest (ldfile, 0);
1065       return 1;
1066     }
1067 
1068   insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1069 
1070   return 0;
1071 }
1072 
1073 
1074 static void
handle_ellipsis(struct linereader * ldfile,const char * symstr,size_t symlen,enum token_t ellipsis,const struct charmap_t * charmap,struct repertoire_t * repertoire,struct localedef_t * result)1075 handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1076 		 enum token_t ellipsis, const struct charmap_t *charmap,
1077 		 struct repertoire_t *repertoire,
1078 		 struct localedef_t *result)
1079 {
1080   struct element_t *startp;
1081   struct element_t *endp;
1082   struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1083 
1084   /* Unlink the entry added for the ellipsis.  */
1085   unlink_element (collate);
1086   startp = collate->cursor;
1087 
1088   /* Process and add the end-entry.  */
1089   if (symstr != NULL
1090       && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1091     /* Something went wrong with inserting the to-value.  This means
1092        we cannot process the ellipsis.  */
1093     return;
1094 
1095   /* Reset the cursor.  */
1096   collate->cursor = startp;
1097 
1098   /* Now we have to handle many different situations:
1099      - we have to distinguish between the three different ellipsis forms
1100      - the is the ellipsis at the beginning, in the middle, or at the end.
1101   */
1102   endp = collate->cursor->next;
1103   assert (symstr == NULL || endp != NULL);
1104 
1105   /* XXX The following is probably very wrong since also collating symbols
1106      can appear in ranges.  But do we want/can refine the test for that?  */
1107 #if 0
1108   /* Both, the start and the end symbol, must stand for characters.  */
1109   if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1110       || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1111     {
1112       lr_error (ldfile, _("\
1113 %s: the start and the end symbol of a range must stand for characters"),
1114 		"LC_COLLATE");
1115       return;
1116     }
1117 #endif
1118 
1119   if (ellipsis == tok_ellipsis3)
1120     {
1121       /* One requirement we make here: the length of the byte
1122 	 sequences for the first and end character must be the same.
1123 	 This is mainly to prevent unwanted effects and this is often
1124 	 not what is wanted.  */
1125       size_t len = (startp->mbs != NULL ? startp->nmbs
1126 		    : (endp->mbs != NULL ? endp->nmbs : 0));
1127       char mbcnt[len + 1];
1128       char mbend[len + 1];
1129 
1130       /* Well, this should be caught somewhere else already.  Just to
1131 	 make sure.  */
1132       assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1133       assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1134 
1135       if (startp != NULL && endp != NULL
1136 	  && startp->mbs != NULL && endp->mbs != NULL
1137 	  && startp->nmbs != endp->nmbs)
1138 	{
1139 	  lr_error (ldfile, _("\
1140 %s: byte sequences of first and last character must have the same length"),
1141 		    "LC_COLLATE");
1142 	  return;
1143 	}
1144 
1145       /* Determine whether we have to generate multibyte sequences.  */
1146       if ((startp == NULL || startp->mbs != NULL)
1147 	  && (endp == NULL || endp->mbs != NULL))
1148 	{
1149 	  int cnt;
1150 	  int ret;
1151 
1152 	  /* Prepare the beginning byte sequence.  This is either from the
1153 	     beginning byte sequence or it is all nulls if it was an
1154 	     initial ellipsis.  */
1155 	  if (startp == NULL || startp->mbs == NULL)
1156 	    memset (mbcnt, '\0', len);
1157 	  else
1158 	    {
1159 	      memcpy (mbcnt, startp->mbs, len);
1160 
1161 	      /* And increment it so that the value is the first one we will
1162 		 try to insert.  */
1163 	      for (cnt = len - 1; cnt >= 0; --cnt)
1164 		if (++mbcnt[cnt] != '\0')
1165 		  break;
1166 	    }
1167 	  mbcnt[len] = '\0';
1168 
1169 	  /* And the end sequence.  */
1170 	  if (endp == NULL || endp->mbs == NULL)
1171 	    memset (mbend, '\0', len);
1172 	  else
1173 	    memcpy (mbend, endp->mbs, len);
1174 	  mbend[len] = '\0';
1175 
1176 	  /* Test whether we have a correct range.  */
1177 	  ret = memcmp (mbcnt, mbend, len);
1178 	  if (ret >= 0)
1179 	    {
1180 	      if (ret > 0)
1181 		lr_error (ldfile, _("%s: byte sequence of first character of \
1182 range is not lower than that of the last character"), "LC_COLLATE");
1183 	      return;
1184 	    }
1185 
1186 	  /* Generate the byte sequences data.  */
1187 	  while (1)
1188 	    {
1189 	      struct charseq *seq;
1190 
1191 	      /* Quite a bit of work ahead.  We have to find the character
1192 		 definition for the byte sequence and then determine the
1193 		 wide character belonging to it.  */
1194 	      seq = charmap_find_symbol (charmap, mbcnt, len);
1195 	      if (seq != NULL)
1196 		{
1197 		  struct element_t *elem;
1198 		  size_t namelen;
1199 
1200 		  /* I don't think this can ever happen.  */
1201 		  assert (seq->name != NULL);
1202 		  namelen = strlen (seq->name);
1203 
1204 		  if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1205 		    seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1206 						       namelen);
1207 
1208 		  /* Now we are ready to insert the new value in the
1209 		     sequence.  Find out whether the element is
1210 		     already known.  */
1211 		  void *ptr;
1212 		  if (find_entry (&collate->seq_table, seq->name, namelen,
1213 				  &ptr) != 0)
1214 		    {
1215 		      uint32_t wcs[2] = { seq->ucs4, 0 };
1216 
1217 		      /* We have to allocate an entry.  */
1218 		      elem = new_element (collate, mbcnt, len,
1219 					  seq->ucs4 == ILLEGAL_CHAR_VALUE
1220 					  ? NULL : wcs, seq->name,
1221 					  namelen, 1);
1222 
1223 		      /* And add it to the table.  */
1224 		      if (insert_entry (&collate->seq_table, seq->name,
1225 					namelen, elem) != 0)
1226 			/* This cannot happen.  */
1227 			assert (! "Internal error");
1228 		    }
1229 		  else
1230 		    /* Copy the result.  */
1231 		    elem = ptr;
1232 
1233 		  /* Test whether this element is not already in the list.  */
1234 		  if (elem->next != NULL || (collate->cursor != NULL
1235 					     && elem->next == collate->cursor))
1236 		    {
1237 		      lr_error (ldfile, _("\
1238 order for `%.*s' already defined at %s:%Zu"),
1239 				(int) namelen, seq->name,
1240 				elem->file, elem->line);
1241 		      goto increment;
1242 		    }
1243 
1244 		  /* Enqueue the new element.  */
1245 		  elem->last = collate->cursor;
1246 		  if (collate->cursor == NULL)
1247 		    elem->next = NULL;
1248 		  else
1249 		    {
1250 		      elem->next = collate->cursor->next;
1251 		      elem->last->next = elem;
1252 		      if (elem->next != NULL)
1253 			elem->next->last = elem;
1254 		    }
1255 		  if (collate->start == NULL)
1256 		    {
1257 		      assert (collate->cursor == NULL);
1258 		      collate->start = elem;
1259 		    }
1260 		  collate->cursor = elem;
1261 
1262 		 /* Add the weight value.  We take them from the
1263 		    `ellipsis_weights' member of `collate'.  */
1264 		  elem->weights = (struct element_list_t *)
1265 		    obstack_alloc (&collate->mempool,
1266 				   nrules * sizeof (struct element_list_t));
1267 		  for (cnt = 0; cnt < nrules; ++cnt)
1268 		    if (collate->ellipsis_weight.weights[cnt].cnt == 1
1269 			&& (collate->ellipsis_weight.weights[cnt].w[0]
1270 			    == ELEMENT_ELLIPSIS2))
1271 		      {
1272 			elem->weights[cnt].w = (struct element_t **)
1273 			  obstack_alloc (&collate->mempool,
1274 					 sizeof (struct element_t *));
1275 			elem->weights[cnt].w[0] = elem;
1276 			elem->weights[cnt].cnt = 1;
1277 		      }
1278 		    else
1279 		      {
1280 			/* Simply use the weight from `ellipsis_weight'.  */
1281 			elem->weights[cnt].w =
1282 			  collate->ellipsis_weight.weights[cnt].w;
1283 			elem->weights[cnt].cnt =
1284 			  collate->ellipsis_weight.weights[cnt].cnt;
1285 		      }
1286 		}
1287 
1288 	      /* Increment for the next round.  */
1289 	    increment:
1290 	      for (cnt = len - 1; cnt >= 0; --cnt)
1291 		if (++mbcnt[cnt] != '\0')
1292 		  break;
1293 
1294 	      /* Find out whether this was all.  */
1295 	      if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1296 		/* Yep, that's all.  */
1297 		break;
1298 	    }
1299 	}
1300     }
1301   else
1302     {
1303       /* For symbolic range we naturally must have a beginning and an
1304 	 end specified by the user.  */
1305       if (startp == NULL)
1306 	lr_error (ldfile, _("\
1307 %s: symbolic range ellipsis must not directly follow `order_start'"),
1308 		  "LC_COLLATE");
1309       else if (endp == NULL)
1310 	lr_error (ldfile, _("\
1311 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1312 		  "LC_COLLATE");
1313       else
1314 	{
1315 	  /* Determine the range.  To do so we have to determine the
1316 	     common prefix of the both names and then the numeric
1317 	     values of both ends.  */
1318 	  size_t lenfrom = strlen (startp->name);
1319 	  size_t lento = strlen (endp->name);
1320 	  char buf[lento + 1];
1321 	  int preflen = 0;
1322 	  long int from;
1323 	  long int to;
1324 	  char *cp;
1325 	  int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1326 
1327 	  if (lenfrom != lento)
1328 	    {
1329 	    invalid_range:
1330 	      lr_error (ldfile, _("\
1331 `%s' and `%.*s' are not valid names for symbolic range"),
1332 			startp->name, (int) lento, endp->name);
1333 	      return;
1334 	    }
1335 
1336 	  while (startp->name[preflen] == endp->name[preflen])
1337 	    if (startp->name[preflen] == '\0')
1338 	      /* Nothing to be done.  The start and end point are identical
1339 		 and while inserting the end point we have already given
1340 		 the user an error message.  */
1341 	      return;
1342 	    else
1343 	      ++preflen;
1344 
1345 	  errno = 0;
1346 	  from = strtol (startp->name + preflen, &cp, base);
1347 	  if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1348 	    goto invalid_range;
1349 
1350 	  errno = 0;
1351 	  to = strtol (endp->name + preflen, &cp, base);
1352 	  if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1353 	    goto invalid_range;
1354 
1355 	  /* Copy the prefix.  */
1356 	  memcpy (buf, startp->name, preflen);
1357 
1358 	  /* Loop over all values.  */
1359 	  for (++from; from < to; ++from)
1360 	    {
1361 	      struct element_t *elem = NULL;
1362 	      struct charseq *seq;
1363 	      uint32_t wc;
1364 	      int cnt;
1365 
1366 	      /* Generate the name.  */
1367 	      sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1368 		       (int) (lenfrom - preflen), from);
1369 
1370 	      /* Look whether this name is already defined.  */
1371 	      void *ptr;
1372 	      if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1373 		{
1374 		  /* Copy back the result.  */
1375 		  elem = ptr;
1376 
1377 		  if (elem->next != NULL || (collate->cursor != NULL
1378 					     && elem->next == collate->cursor))
1379 		    {
1380 		      lr_error (ldfile, _("\
1381 %s: order for `%.*s' already defined at %s:%Zu"),
1382 				"LC_COLLATE", (int) lenfrom, buf,
1383 				elem->file, elem->line);
1384 		      continue;
1385 		    }
1386 
1387 		  if (elem->name == NULL)
1388 		    {
1389 		      lr_error (ldfile, _("%s: `%s' must be a character"),
1390 				"LC_COLLATE", buf);
1391 		      continue;
1392 		    }
1393 		}
1394 
1395 	      if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1396 		{
1397 		  /* Search for a character of this name.  */
1398 		  seq = charmap_find_value (charmap, buf, lenfrom);
1399 		  if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1400 		    {
1401 		      wc = repertoire_find_value (repertoire, buf, lenfrom);
1402 
1403 		      if (seq != NULL)
1404 			seq->ucs4 = wc;
1405 		    }
1406 		  else
1407 		    wc = seq->ucs4;
1408 
1409 		  if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1410 		    /* We don't know anything about a character with this
1411 		       name.  XXX Should we warn?  */
1412 		    continue;
1413 
1414 		  if (elem == NULL)
1415 		    {
1416 		      uint32_t wcs[2] = { wc, 0 };
1417 
1418 		      /* We have to allocate an entry.  */
1419 		      elem = new_element (collate,
1420 					  seq != NULL
1421 					  ? (char *) seq->bytes : NULL,
1422 					  seq != NULL ? seq->nbytes : 0,
1423 					  wc == ILLEGAL_CHAR_VALUE
1424 					  ? NULL : wcs, buf, lenfrom, 1);
1425 		    }
1426 		  else
1427 		    {
1428 		      /* Update the element.  */
1429 		      if (seq != NULL)
1430 			{
1431 			  elem->mbs = obstack_copy0 (&collate->mempool,
1432 						     seq->bytes, seq->nbytes);
1433 			  elem->nmbs = seq->nbytes;
1434 			}
1435 
1436 		      if (wc != ILLEGAL_CHAR_VALUE)
1437 			{
1438 			  uint32_t zero = 0;
1439 
1440 			  obstack_grow (&collate->mempool,
1441 					&wc, sizeof (uint32_t));
1442 			  obstack_grow (&collate->mempool,
1443 					&zero, sizeof (uint32_t));
1444 			  elem->wcs = obstack_finish (&collate->mempool);
1445 			  elem->nwcs = 1;
1446 			}
1447 		    }
1448 
1449 		  elem->file = ldfile->fname;
1450 		  elem->line = ldfile->lineno;
1451 		  elem->section = collate->current_section;
1452 		}
1453 
1454 	      /* Enqueue the new element.  */
1455 	      elem->last = collate->cursor;
1456 	      elem->next = collate->cursor->next;
1457 	      elem->last->next = elem;
1458 	      if (elem->next != NULL)
1459 		elem->next->last = elem;
1460 	      collate->cursor = elem;
1461 
1462 	      /* Now add the weights.  They come from the `ellipsis_weights'
1463 		 member of `collate'.  */
1464 	      elem->weights = (struct element_list_t *)
1465 		obstack_alloc (&collate->mempool,
1466 			       nrules * sizeof (struct element_list_t));
1467 	      for (cnt = 0; cnt < nrules; ++cnt)
1468 		if (collate->ellipsis_weight.weights[cnt].cnt == 1
1469 		    && (collate->ellipsis_weight.weights[cnt].w[0]
1470 			== ELEMENT_ELLIPSIS2))
1471 		  {
1472 		    elem->weights[cnt].w = (struct element_t **)
1473 		      obstack_alloc (&collate->mempool,
1474 				     sizeof (struct element_t *));
1475 		    elem->weights[cnt].w[0] = elem;
1476 		    elem->weights[cnt].cnt = 1;
1477 		  }
1478 		else
1479 		  {
1480 		    /* Simly use the weight from `ellipsis_weight'.  */
1481 		    elem->weights[cnt].w =
1482 		      collate->ellipsis_weight.weights[cnt].w;
1483 		    elem->weights[cnt].cnt =
1484 		      collate->ellipsis_weight.weights[cnt].cnt;
1485 		  }
1486 	    }
1487 	}
1488     }
1489   /* Move the cursor to the last entry in the ellipsis.
1490      Subsequent operations need to start from the last entry.  */
1491   collate->cursor = endp;
1492 }
1493 
1494 
1495 static void
collate_startup(struct linereader * ldfile,struct localedef_t * locale,struct localedef_t * copy_locale,int ignore_content)1496 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1497 		 struct localedef_t *copy_locale, int ignore_content)
1498 {
1499   if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1500     {
1501       struct locale_collate_t *collate;
1502 
1503       if (copy_locale == NULL)
1504 	{
1505 	  collate = locale->categories[LC_COLLATE].collate =
1506 	    (struct locale_collate_t *)
1507 	    xcalloc (1, sizeof (struct locale_collate_t));
1508 
1509 	  /* Init the various data structures.  */
1510 	  init_hash (&collate->elem_table, 100);
1511 	  init_hash (&collate->sym_table, 100);
1512 	  init_hash (&collate->seq_table, 500);
1513 	  obstack_init (&collate->mempool);
1514 
1515 	  collate->col_weight_max = -1;
1516 	  collate->codepoint_collation = false;
1517 	}
1518       else
1519 	/* Reuse the copy_locale's data structures.  */
1520 	collate = locale->categories[LC_COLLATE].collate =
1521 	  copy_locale->categories[LC_COLLATE].collate;
1522     }
1523 
1524   ldfile->translate_strings = 0;
1525   ldfile->return_widestr = 0;
1526 }
1527 
1528 
1529 void
collate_finish(struct localedef_t * locale,const struct charmap_t * charmap)1530 collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1531 {
1532   /* Now is the time when we can assign the individual collation
1533      values for all the symbols.  We have possibly different values
1534      for the wide- and the multibyte-character symbols.  This is done
1535      since it might make a difference in the encoding if there is in
1536      some cases no multibyte-character but there are wide-characters.
1537      (The other way around it is not important since theencoded
1538      collation value in the wide-character case is 32 bits wide and
1539      therefore requires no encoding).
1540 
1541      The lowest collation value assigned is 2.  Zero is reserved for
1542      the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1543      functions and 1 is used to separate the individual passes for the
1544      different rules.
1545 
1546      We also have to construct is list with all the bytes/words which
1547      can come first in a sequence, followed by all the elements which
1548      also start with this byte/word.  The order is reverse which has
1549      among others the important effect that longer strings are located
1550      first in the list.  This is required for the output data since
1551      the algorithm used in `strcoll' etc depends on this.
1552 
1553      The multibyte case is easy.  We simply sort into an array with
1554      256 elements.  */
1555   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1556   int mbact[nrules];
1557   int wcact;
1558   int mbseqact;
1559   int wcseqact;
1560   struct element_t *runp;
1561   int i;
1562   int need_undefined = 0;
1563   struct section_list *sect;
1564   int ruleidx;
1565 
1566   if (collate == NULL)
1567     {
1568       /* No data, no check. Issue a warning.  */
1569       record_warning (_("No definition for %s category found"),
1570 		      "LC_COLLATE");
1571       return;
1572     }
1573 
1574   /* No data required.  */
1575   if (collate->codepoint_collation)
1576     return;
1577 
1578   /* If this assertion is hit change the type in `element_t'.  */
1579   assert (nrules <= sizeof (runp->used_in_level) * 8);
1580 
1581   /* Make sure that the `position' rule is used either in all sections
1582      or in none.  */
1583   for (i = 0; i < nrules; ++i)
1584     for (sect = collate->sections; sect != NULL; sect = sect->next)
1585       if (sect != collate->current_section
1586 	  && sect->rules != NULL
1587 	  && ((sect->rules[i] & sort_position)
1588 	      != (collate->current_section->rules[i] & sort_position)))
1589 	{
1590 	  record_error (0, 0, _("\
1591 %s: `position' must be used for a specific level in all sections or none"),
1592 			"LC_COLLATE");
1593 	  break;
1594 	}
1595 
1596   /* Find out which elements are used at which level.  At the same
1597      time we find out whether we have any undefined symbols.  */
1598   runp = collate->start;
1599   while (runp != NULL)
1600     {
1601       if (runp->mbs != NULL)
1602 	{
1603 	  for (i = 0; i < nrules; ++i)
1604 	    {
1605 	      int j;
1606 
1607 	      for (j = 0; j < runp->weights[i].cnt; ++j)
1608 		/* A NULL pointer as the weight means IGNORE.  */
1609 		if (runp->weights[i].w[j] != NULL)
1610 		  {
1611 		    if (runp->weights[i].w[j]->weights == NULL)
1612 		      {
1613 			record_error_at_line (0, 0, runp->file, runp->line,
1614 					      _("symbol `%s' not defined"),
1615 					      runp->weights[i].w[j]->name);
1616 
1617 			need_undefined = 1;
1618 			runp->weights[i].w[j] = &collate->undefined;
1619 		      }
1620 		    else
1621 		      /* Set the bit for the level.  */
1622 		      runp->weights[i].w[j]->used_in_level |= 1 << i;
1623 		  }
1624 	    }
1625 	}
1626 
1627       /* Up to the next entry.  */
1628       runp = runp->next;
1629     }
1630 
1631   /* Walk through the list of defined sequences and assign weights.  Also
1632      create the data structure which will allow generating the single byte
1633      character based tables.
1634 
1635      Since at each time only the weights for each of the rules are
1636      only compared to other weights for this rule it is possible to
1637      assign more compact weight values than simply counting all
1638      weights in sequence.  We can assign weights from 3, one for each
1639      rule individually and only for those elements, which are actually
1640      used for this rule.
1641 
1642      Why is this important?  It is not for the wide char table.  But
1643      it is for the singlebyte output since here larger numbers have to
1644      be encoded to make it possible to emit the value as a byte
1645      string.  */
1646   for (i = 0; i < nrules; ++i)
1647     mbact[i] = 2;
1648   wcact = 2;
1649   mbseqact = 0;
1650   wcseqact = 0;
1651   runp = collate->start;
1652   while (runp != NULL)
1653     {
1654       /* Determine the order.  */
1655       if (runp->used_in_level != 0)
1656 	{
1657 	  runp->mborder = (int *) obstack_alloc (&collate->mempool,
1658 						 nrules * sizeof (int));
1659 
1660 	  for (i = 0; i < nrules; ++i)
1661 	    if ((runp->used_in_level & (1 << i)) != 0)
1662 	      runp->mborder[i] = mbact[i]++;
1663 	    else
1664 	      runp->mborder[i] = 0;
1665 	}
1666 
1667       if (runp->mbs != NULL)
1668 	{
1669 	  struct element_t **eptr;
1670 	  struct element_t *lastp = NULL;
1671 
1672 	  /* Find the point where to insert in the list.  */
1673 	  eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1674 	  while (*eptr != NULL)
1675 	    {
1676 	      if ((*eptr)->nmbs < runp->nmbs)
1677 		break;
1678 
1679 	      if ((*eptr)->nmbs == runp->nmbs)
1680 		{
1681 		  int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1682 
1683 		  if (c == 0)
1684 		    {
1685 		      /* This should not happen.  It means that we have
1686 			 to symbols with the same byte sequence.  It is
1687 			 of course an error.  */
1688 		      record_error_at_line (0, 0, (*eptr)->file,
1689 					    (*eptr)->line,
1690 					    _("\
1691 symbol `%s' has the same encoding as"), (*eptr)->name);
1692 
1693 		      record_error_at_line (0, 0, runp->file, runp->line,
1694 					    _("symbol `%s'"), runp->name);
1695 		      goto dont_insert;
1696 		    }
1697 		  else if (c < 0)
1698 		    /* Insert it here.  */
1699 		    break;
1700 		}
1701 
1702 	      /* To the next entry.  */
1703 	      lastp = *eptr;
1704 	      eptr = &(*eptr)->mbnext;
1705 	    }
1706 
1707 	  /* Set the pointers.  */
1708 	  runp->mbnext = *eptr;
1709 	  runp->mblast = lastp;
1710 	  if (*eptr != NULL)
1711 	    (*eptr)->mblast = runp;
1712 	  *eptr = runp;
1713 	dont_insert:
1714 	  ;
1715 	}
1716 
1717       if (runp->used_in_level)
1718 	runp->wcorder = wcact++;
1719 
1720       if (runp->is_character)
1721 	{
1722 	  if (runp->nmbs == 1)
1723 	    collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1724 
1725 	  runp->wcseqorder = wcseqact++;
1726 	}
1727       else if (runp->mbs != NULL && runp->weights != NULL)
1728 	/* This is for collation elements.  */
1729 	runp->wcseqorder = wcseqact++;
1730 
1731       /* Up to the next entry.  */
1732       runp = runp->next;
1733     }
1734 
1735   /* Find out whether any of the `mbheads' entries is unset.  In this
1736      case we use the UNDEFINED entry.  */
1737   for (i = 1; i < 256; ++i)
1738     if (collate->mbheads[i] == NULL)
1739       {
1740 	need_undefined = 1;
1741 	collate->mbheads[i] = &collate->undefined;
1742       }
1743 
1744   /* Now to the wide character case.  */
1745   collate->wcheads.p = 6;
1746   collate->wcheads.q = 10;
1747   wchead_table_init (&collate->wcheads);
1748 
1749   collate->wcseqorder.p = 6;
1750   collate->wcseqorder.q = 10;
1751   collseq_table_init (&collate->wcseqorder);
1752 
1753   /* Start adding.  */
1754   runp = collate->start;
1755   while (runp != NULL)
1756     {
1757       if (runp->wcs != NULL)
1758 	{
1759 	  struct element_t *e;
1760 	  struct element_t **eptr;
1761 	  struct element_t *lastp;
1762 
1763 	  /* Insert the collation sequence value.  */
1764 	  if (runp->is_character)
1765 	    collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1766 			       runp->wcseqorder);
1767 
1768 	  /* Find the point where to insert in the list.  */
1769 	  e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1770 	  eptr = &e;
1771 	  lastp = NULL;
1772 	  while (*eptr != NULL)
1773 	    {
1774 	      if ((*eptr)->nwcs < runp->nwcs)
1775 		break;
1776 
1777 	      if ((*eptr)->nwcs == runp->nwcs)
1778 		{
1779 		  int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1780 				   (wchar_t *) runp->wcs, runp->nwcs);
1781 
1782 		  if (c == 0)
1783 		    {
1784 		      /* This should not happen.  It means that we have
1785 			 two symbols with the same byte sequence.  It is
1786 			 of course an error.  */
1787 		      record_error_at_line (0, 0, (*eptr)->file,
1788 					    (*eptr)->line,
1789 					    _("\
1790 symbol `%s' has the same encoding as"), (*eptr)->name);
1791 
1792 		      record_error_at_line (0, 0, runp->file, runp->line,
1793 					    _("symbol `%s'"), runp->name);
1794 		      goto dont_insertwc;
1795 		    }
1796 		  else if (c < 0)
1797 		    /* Insert it here.  */
1798 		    break;
1799 		}
1800 
1801 	      /* To the next entry.  */
1802 	      lastp = *eptr;
1803 	      eptr = &(*eptr)->wcnext;
1804 	    }
1805 
1806 	  /* Set the pointers.  */
1807 	  runp->wcnext = *eptr;
1808 	  runp->wclast = lastp;
1809 	  if (*eptr != NULL)
1810 	    (*eptr)->wclast = runp;
1811 	  *eptr = runp;
1812 	  if (eptr == &e)
1813 	    wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1814 	dont_insertwc:
1815 	  ;
1816 	}
1817 
1818       /* Up to the next entry.  */
1819       runp = runp->next;
1820     }
1821 
1822   /* Now determine whether the UNDEFINED entry is needed and if yes,
1823      whether it was defined.  */
1824   collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1825   if (collate->undefined.file == NULL)
1826     {
1827       if (need_undefined)
1828 	{
1829 	  /* This seems not to be enforced by recent standards.  Don't
1830 	     emit an error, simply append UNDEFINED at the end.  */
1831 	  collate->undefined.mborder =
1832 	    (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1833 
1834 	  for (i = 0; i < nrules; ++i)
1835 	    collate->undefined.mborder[i] = mbact[i]++;
1836 	}
1837 
1838       /* In any case we will need the definition for the wide character
1839 	 case.  But we will not complain that it is missing since the
1840 	 specification strangely enough does not seem to account for
1841 	 this.  */
1842       collate->undefined.wcorder = wcact++;
1843     }
1844 
1845   /* Finally, try to unify the rules for the sections.  Whenever the rules
1846      for a section are the same as those for another section give the
1847      ruleset the same index.  Since there are never many section we can
1848      use an O(n^2) algorithm here.  */
1849   sect = collate->sections;
1850   while (sect != NULL && sect->rules == NULL)
1851     sect = sect->next;
1852 
1853   /* Bail out if we have no sections because of earlier errors.  */
1854   if (sect == NULL)
1855     {
1856       record_error (EXIT_FAILURE, 0, _("too many errors; giving up"));
1857       return;
1858     }
1859 
1860   ruleidx = 0;
1861   do
1862     {
1863       struct section_list *osect = collate->sections;
1864 
1865       while (osect != sect)
1866 	if (osect->rules != NULL
1867 	    && memcmp (osect->rules, sect->rules,
1868 		       nrules * sizeof (osect->rules[0])) == 0)
1869 	  break;
1870 	else
1871 	  osect = osect->next;
1872 
1873       if (osect == sect)
1874 	sect->ruleidx = ruleidx++;
1875       else
1876 	sect->ruleidx = osect->ruleidx;
1877 
1878       /* Next section.  */
1879       do
1880 	sect = sect->next;
1881       while (sect != NULL && sect->rules == NULL);
1882     }
1883   while (sect != NULL);
1884   /* We are currently not prepared for more than 128 rulesets.  But this
1885      should never really be a problem.  */
1886   assert (ruleidx <= 128);
1887 }
1888 
1889 
1890 static int32_t
output_weight(struct obstack * pool,struct locale_collate_t * collate,struct element_t * elem)1891 output_weight (struct obstack *pool, struct locale_collate_t *collate,
1892 	       struct element_t *elem)
1893 {
1894   size_t cnt;
1895   int32_t retval;
1896 
1897   /* Optimize the use of UNDEFINED.  */
1898   if (elem == &collate->undefined)
1899     /* The weights are already inserted.  */
1900     return 0;
1901 
1902   /* This byte can start exactly one collation element and this is
1903      a single byte.  We can directly give the index to the weights.  */
1904   retval = obstack_object_size (pool);
1905 
1906   /* Construct the weight.  */
1907   for (cnt = 0; cnt < nrules; ++cnt)
1908     {
1909       char buf[elem->weights[cnt].cnt * 7];
1910       int len = 0;
1911       int i;
1912 
1913       for (i = 0; i < elem->weights[cnt].cnt; ++i)
1914 	/* Encode the weight value.  We do nothing for IGNORE entries.  */
1915 	if (elem->weights[cnt].w[i] != NULL)
1916 	  len += utf8_encode (&buf[len],
1917 			      elem->weights[cnt].w[i]->mborder[cnt]);
1918 
1919       /* And add the buffer content.  */
1920       obstack_1grow (pool, len);
1921       obstack_grow (pool, buf, len);
1922     }
1923 
1924   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1925 }
1926 
1927 
1928 static int32_t
output_weightwc(struct obstack * pool,struct locale_collate_t * collate,struct element_t * elem)1929 output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1930 		 struct element_t *elem)
1931 {
1932   size_t cnt;
1933   int32_t retval;
1934 
1935   /* Optimize the use of UNDEFINED.  */
1936   if (elem == &collate->undefined)
1937     /* The weights are already inserted.  */
1938     return 0;
1939 
1940   /* This byte can start exactly one collation element and this is
1941      a single byte.  We can directly give the index to the weights.  */
1942   retval = obstack_object_size (pool) / sizeof (int32_t);
1943 
1944   /* Construct the weight.  */
1945   for (cnt = 0; cnt < nrules; ++cnt)
1946     {
1947       int32_t buf[elem->weights[cnt].cnt];
1948       int i;
1949       int32_t j;
1950 
1951       for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1952 	if (elem->weights[cnt].w[i] != NULL)
1953 	  buf[j++] = elem->weights[cnt].w[i]->wcorder;
1954 
1955       /* And add the buffer content.  */
1956       obstack_int32_grow (pool, j);
1957 
1958       obstack_grow (pool, buf, j * sizeof (int32_t));
1959       maybe_swap_uint32_obstack (pool, j);
1960     }
1961 
1962   return retval | ((elem->section->ruleidx & 0x7f) << 24);
1963 }
1964 
1965 /* If localedef is every threaded, this would need to be __thread var.  */
1966 static struct
1967 {
1968   struct obstack *weightpool;
1969   struct obstack *extrapool;
1970   struct obstack *indpool;
1971   struct locale_collate_t *collate;
1972   struct collidx_table *tablewc;
1973 } atwc;
1974 
1975 static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1976 
1977 static void
add_to_tablewc(uint32_t ch,struct element_t * runp)1978 add_to_tablewc (uint32_t ch, struct element_t *runp)
1979 {
1980   if (runp->wcnext == NULL && runp->nwcs == 1)
1981     {
1982       int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1983 					   runp);
1984       collidx_table_add (atwc.tablewc, ch, weigthidx);
1985     }
1986   else
1987     {
1988       /* As for the singlebyte table, we recognize sequences and
1989 	 compress them.  */
1990 
1991       collidx_table_add (atwc.tablewc, ch,
1992 			 -(obstack_object_size (atwc.extrapool)
1993 			 / sizeof (uint32_t)));
1994 
1995       do
1996 	{
1997 	  /* Store the current index in the weight table.  We know that
1998 	     the current position in the `extrapool' is aligned on a
1999 	     32-bit address.  */
2000 	  int32_t weightidx;
2001 	  int added;
2002 
2003 	  /* Find out wether this is a single entry or we have more than
2004 	     one consecutive entry.  */
2005 	  if (runp->wcnext != NULL
2006 	      && runp->nwcs == runp->wcnext->nwcs
2007 	      && wmemcmp ((wchar_t *) runp->wcs,
2008 			  (wchar_t *)runp->wcnext->wcs,
2009 			  runp->nwcs - 1) == 0
2010 	      && (runp->wcs[runp->nwcs - 1]
2011 		  == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2012 	    {
2013 	      int i;
2014 	      struct element_t *series_startp = runp;
2015 	      struct element_t *curp;
2016 
2017 	      /* Now add first the initial byte sequence.  */
2018 	      added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2019 	      if (sizeof (int32_t) == sizeof (int))
2020 		obstack_make_room (atwc.extrapool, added);
2021 
2022 	      /* More than one consecutive entry.  We mark this by having
2023 		 a negative index into the indirect table.  */
2024 	      obstack_int32_grow_fast (atwc.extrapool,
2025 				       -(obstack_object_size (atwc.indpool)
2026 					 / sizeof (int32_t)));
2027 	      obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2028 
2029 	      do
2030 		runp = runp->wcnext;
2031 	      while (runp->wcnext != NULL
2032 		     && runp->nwcs == runp->wcnext->nwcs
2033 		     && wmemcmp ((wchar_t *) runp->wcs,
2034 				 (wchar_t *)runp->wcnext->wcs,
2035 				 runp->nwcs - 1) == 0
2036 		     && (runp->wcs[runp->nwcs - 1]
2037 			 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2038 
2039 	      /* Now walk backward from here to the beginning.  */
2040 	      curp = runp;
2041 
2042 	      for (i = 1; i < runp->nwcs; ++i)
2043 		obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2044 
2045 	      /* Now find the end of the consecutive sequence and
2046 		 add all the indices in the indirect pool.  */
2047 	      do
2048 		{
2049 		  weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2050 					       curp);
2051 		  obstack_int32_grow (atwc.indpool, weightidx);
2052 
2053 		  curp = curp->wclast;
2054 		}
2055 	      while (curp != series_startp);
2056 
2057 	      /* Add the final weight.  */
2058 	      weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2059 					   curp);
2060 	      obstack_int32_grow (atwc.indpool, weightidx);
2061 
2062 	      /* And add the end byte sequence.  Without length this
2063 		 time.  */
2064 	      for (i = 1; i < curp->nwcs; ++i)
2065 		obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2066 	    }
2067 	  else
2068 	    {
2069 	      /* A single entry.  Simply add the index and the length and
2070 		 string (except for the first character which is already
2071 		 tested for).  */
2072 	      int i;
2073 
2074 	      /* Output the weight info.  */
2075 	      weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2076 					   runp);
2077 
2078 	      assert (runp->nwcs > 0);
2079 	      added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2080 	      if (sizeof (int) == sizeof (int32_t))
2081 		obstack_make_room (atwc.extrapool, added);
2082 
2083 	      obstack_int32_grow_fast (atwc.extrapool, weightidx);
2084 	      obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2085 	      for (i = 1; i < runp->nwcs; ++i)
2086 		obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2087 	    }
2088 
2089 	  /* Next entry.  */
2090 	  runp = runp->wcnext;
2091 	}
2092       while (runp != NULL);
2093     }
2094 }
2095 
2096 /* Include the C locale identity tables for _NL_COLLATE_COLLSEQMB and
2097    _NL_COLLATE_COLLSEQWC.  */
2098 #include "C-collate-seq.c"
2099 
2100 void
collate_output(struct localedef_t * locale,const struct charmap_t * charmap,const char * output_path)2101 collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2102 		const char *output_path)
2103 {
2104   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2105   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2106   struct locale_file file;
2107   size_t ch;
2108   int32_t tablemb[256];
2109   struct obstack weightpool;
2110   struct obstack extrapool;
2111   struct obstack indirectpool;
2112   struct section_list *sect;
2113   struct collidx_table tablewc;
2114   uint32_t elem_size;
2115   uint32_t *elem_table;
2116   int i;
2117   struct element_t *runp;
2118 
2119   init_locale_data (&file, nelems);
2120   add_locale_uint32 (&file, nrules);
2121 
2122   /* If we have no LC_COLLATE data emit only the number of rules as zero.  */
2123   if (collate == NULL || collate->codepoint_collation)
2124     {
2125       size_t idx;
2126       for (idx = 1; idx < nelems; idx++)
2127 	{
2128 	  /* The words have to be handled specially.  */
2129 	  if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2130 	    add_locale_uint32 (&file, 0);
2131 	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_CODESET)
2132 		   && collate != NULL)
2133 	    /* A valid LC_COLLATE must have a code set name.  */
2134 	    add_locale_string (&file, charmap->code_set_name);
2135 	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB)
2136 		   && collate != NULL)
2137 	    add_locale_raw_data (&file, collseqmb, sizeof (collseqmb));
2138 	  else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC)
2139 		   && collate != NULL)
2140 	    add_locale_uint32_array (&file, collseqwc,
2141 				     array_length (collseqwc));
2142 	  else
2143 	    add_locale_empty (&file);
2144 	}
2145       write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2146       return;
2147     }
2148 
2149   obstack_init (&weightpool);
2150   obstack_init (&extrapool);
2151   obstack_init (&indirectpool);
2152 
2153   /* Since we are using the sign of an integer to mark indirection the
2154      offsets in the arrays we are indirectly referring to must not be
2155      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2156   obstack_int32_grow (&extrapool, 0);
2157   obstack_int32_grow (&indirectpool, 0);
2158 
2159   /* Prepare the ruleset table.  */
2160   for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2161     if (sect->rules != NULL && sect->ruleidx == i)
2162       {
2163 	int j;
2164 
2165 	obstack_make_room (&weightpool, nrules);
2166 
2167 	for (j = 0; j < nrules; ++j)
2168 	  obstack_1grow_fast (&weightpool, sect->rules[j]);
2169 	++i;
2170       }
2171   /* And align the output.  */
2172   i = (nrules * i) % LOCFILE_ALIGN;
2173   if (i > 0)
2174     do
2175       obstack_1grow (&weightpool, '\0');
2176     while (++i < LOCFILE_ALIGN);
2177 
2178   add_locale_raw_obstack (&file, &weightpool);
2179 
2180   /* Generate the 8-bit table.  Walk through the lists of sequences
2181      starting with the same byte and add them one after the other to
2182      the table.  In case we have more than one sequence starting with
2183      the same byte we have to use extra indirection.
2184 
2185      First add a record for the NUL byte.  This entry will never be used
2186      so it does not matter.  */
2187   tablemb[0] = 0;
2188 
2189   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2190      will probably be used more than once it is good to store the
2191      weights only once.  */
2192   if (collate->undefined.used_in_level != 0)
2193     output_weight (&weightpool, collate, &collate->undefined);
2194 
2195   for (ch = 1; ch < 256; ++ch)
2196     if (collate->mbheads[ch]->mbnext == NULL
2197 	&& collate->mbheads[ch]->nmbs <= 1)
2198       {
2199 	tablemb[ch] = output_weight (&weightpool, collate,
2200 				     collate->mbheads[ch]);
2201       }
2202     else
2203       {
2204 	/* The entries in the list are sorted by length and then
2205 	   alphabetically.  This is the order in which we will add the
2206 	   elements to the collation table.  This allows simply walking
2207 	   the table in sequence and stopping at the first matching
2208 	   entry.  Since the longer sequences are coming first in the
2209 	   list they have the possibility to match first, just as it
2210 	   has to be.  In the worst case we are walking to the end of
2211 	   the list where we put, if no singlebyte sequence is defined
2212 	   in the locale definition, the weights for UNDEFINED.
2213 
2214 	   To reduce the length of the search list we compress them a bit.
2215 	   This happens by collecting sequences of consecutive byte
2216 	   sequences in one entry (having and begin and end byte sequence)
2217 	   and add only one index into the weight table.  We can find the
2218 	   consecutive entries since they are also consecutive in the list.  */
2219 	struct element_t *runp = collate->mbheads[ch];
2220 	struct element_t *lastp;
2221 
2222 	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2223 
2224 	tablemb[ch] = -obstack_object_size (&extrapool);
2225 
2226 	do
2227 	  {
2228 	    /* Store the current index in the weight table.  We know that
2229 	       the current position in the `extrapool' is aligned on a
2230 	       32-bit address.  */
2231 	    int32_t weightidx;
2232 	    int added;
2233 
2234 	    /* Find out wether this is a single entry or we have more than
2235 	       one consecutive entry.  */
2236 	    if (runp->mbnext != NULL
2237 		&& runp->nmbs == runp->mbnext->nmbs
2238 		&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2239 		&& (runp->mbs[runp->nmbs - 1]
2240 		    == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2241 	      {
2242 		int i;
2243 		struct element_t *series_startp = runp;
2244 		struct element_t *curp;
2245 
2246 		/* Compute how much space we will need.  */
2247 		added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2248 					  + 2 * (runp->nmbs - 1));
2249 		assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2250 		obstack_make_room (&extrapool, added);
2251 
2252 		/* More than one consecutive entry.  We mark this by having
2253 		   a negative index into the indirect table.  */
2254 		obstack_int32_grow_fast (&extrapool,
2255 					 -(obstack_object_size (&indirectpool)
2256 					   / sizeof (int32_t)));
2257 
2258 		/* Now search first the end of the series.  */
2259 		do
2260 		  runp = runp->mbnext;
2261 		while (runp->mbnext != NULL
2262 		       && runp->nmbs == runp->mbnext->nmbs
2263 		       && memcmp (runp->mbs, runp->mbnext->mbs,
2264 				  runp->nmbs - 1) == 0
2265 		       && (runp->mbs[runp->nmbs - 1]
2266 			   == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2267 
2268 		/* Now walk backward from here to the beginning.  */
2269 		curp = runp;
2270 
2271 		assert (runp->nmbs <= 256);
2272 		obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2273 		for (i = 1; i < curp->nmbs; ++i)
2274 		  obstack_1grow_fast (&extrapool, curp->mbs[i]);
2275 
2276 		/* Now find the end of the consecutive sequence and
2277 		   add all the indices in the indirect pool.  */
2278 		do
2279 		  {
2280 		    weightidx = output_weight (&weightpool, collate, curp);
2281 		    obstack_int32_grow (&indirectpool, weightidx);
2282 
2283 		    curp = curp->mblast;
2284 		  }
2285 		while (curp != series_startp);
2286 
2287 		/* Add the final weight.  */
2288 		weightidx = output_weight (&weightpool, collate, curp);
2289 		obstack_int32_grow (&indirectpool, weightidx);
2290 
2291 		/* And add the end byte sequence.  Without length this
2292 		   time.  */
2293 		for (i = 1; i < curp->nmbs; ++i)
2294 		  obstack_1grow_fast (&extrapool, curp->mbs[i]);
2295 	      }
2296 	    else
2297 	      {
2298 		/* A single entry.  Simply add the index and the length and
2299 		   string (except for the first character which is already
2300 		   tested for).  */
2301 		int i;
2302 
2303 		/* Output the weight info.  */
2304 		weightidx = output_weight (&weightpool, collate, runp);
2305 
2306 		added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2307 					  + runp->nmbs - 1);
2308 		assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2309 		obstack_make_room (&extrapool, added);
2310 
2311 		obstack_int32_grow_fast (&extrapool, weightidx);
2312 		assert (runp->nmbs <= 256);
2313 		obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2314 
2315 		for (i = 1; i < runp->nmbs; ++i)
2316 		  obstack_1grow_fast (&extrapool, runp->mbs[i]);
2317 	      }
2318 
2319 	    /* Add alignment bytes if necessary.  */
2320 	    while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2321 	      obstack_1grow_fast (&extrapool, '\0');
2322 
2323 	    /* Next entry.  */
2324 	    lastp = runp;
2325 	    runp = runp->mbnext;
2326 	  }
2327 	while (runp != NULL);
2328 
2329 	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2330 
2331 	/* If the final entry in the list is not a single character we
2332 	   add an UNDEFINED entry here.  */
2333 	if (lastp->nmbs != 1)
2334 	  {
2335 	    int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2336 	    obstack_make_room (&extrapool, added);
2337 
2338 	    obstack_int32_grow_fast (&extrapool, 0);
2339 	    /* XXX What rule? We just pick the first.  */
2340 	    obstack_1grow_fast (&extrapool, 0);
2341 	    /* Length is zero.  */
2342 	    obstack_1grow_fast (&extrapool, 0);
2343 
2344 	    /* Add alignment bytes if necessary.  */
2345 	    while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2346 	      obstack_1grow_fast (&extrapool, '\0');
2347 	  }
2348       }
2349 
2350   /* Add padding to the tables if necessary.  */
2351   while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2352     obstack_1grow (&weightpool, 0);
2353 
2354   /* Now add the four tables.  */
2355   add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2356   add_locale_raw_obstack (&file, &weightpool);
2357   add_locale_raw_obstack (&file, &extrapool);
2358   add_locale_raw_obstack (&file, &indirectpool);
2359 
2360   /* Now the same for the wide character table.  We need to store some
2361      more information here.  */
2362   add_locale_empty (&file);
2363   add_locale_empty (&file);
2364   add_locale_empty (&file);
2365 
2366   /* Since we are using the sign of an integer to mark indirection the
2367      offsets in the arrays we are indirectly referring to must not be
2368      zero since -0 == 0.  Therefore we add a bit of dummy content.  */
2369   obstack_int32_grow (&extrapool, 0);
2370   obstack_int32_grow (&indirectpool, 0);
2371 
2372   /* Now insert the `UNDEFINED' value if it is used.  Since this value
2373      will probably be used more than once it is good to store the
2374      weights only once.  */
2375   if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2376     abort ();
2377 
2378   /* Generate the table.  Walk through the lists of sequences starting
2379      with the same wide character and add them one after the other to
2380      the table.  In case we have more than one sequence starting with
2381      the same byte we have to use extra indirection.  */
2382   tablewc.p = 6;
2383   tablewc.q = 10;
2384   collidx_table_init (&tablewc);
2385 
2386   atwc.weightpool = &weightpool;
2387   atwc.extrapool = &extrapool;
2388   atwc.indpool = &indirectpool;
2389   atwc.collate = collate;
2390   atwc.tablewc = &tablewc;
2391 
2392   wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2393 
2394   memset (&atwc, 0, sizeof (atwc));
2395 
2396   /* Now add the four tables.  */
2397   add_locale_collidx_table (&file, &tablewc);
2398   add_locale_raw_obstack (&file, &weightpool);
2399   add_locale_raw_obstack (&file, &extrapool);
2400   add_locale_raw_obstack (&file, &indirectpool);
2401 
2402   /* Finally write the table with collation element names out.  It is
2403      a hash table with a simple function which gets the name of the
2404      character as the input.  One character might have many names.  The
2405      value associated with the name is an index into the weight table
2406      where we are then interested in the first-level weight value.
2407 
2408      To determine how large the table should be we are counting the
2409      elements have to put in.  Since we are using internal chaining
2410      using a secondary hash function we have to make the table a bit
2411      larger to avoid extremely long search times.  We can achieve
2412      good results with a 40% larger table than there are entries.  */
2413   elem_size = 0;
2414   runp = collate->start;
2415   while (runp != NULL)
2416     {
2417       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2418 	/* Yep, the element really counts.  */
2419 	++elem_size;
2420 
2421       runp = runp->next;
2422     }
2423   /* Add 50% and find the next prime number.  */
2424   elem_size = next_prime (elem_size + (elem_size >> 1));
2425 
2426   /* Allocate the table.  Each entry consists of two words: the hash
2427      value and an index in a secondary table which provides the index
2428      into the weight table and the string itself (so that a match can
2429      be determined).  */
2430   elem_table = (uint32_t *) obstack_alloc (&extrapool,
2431 					   elem_size * 2 * sizeof (uint32_t));
2432   memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2433 
2434   /* Now add the elements.  */
2435   runp = collate->start;
2436   while (runp != NULL)
2437     {
2438       if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2439 	{
2440 	  /* Compute the hash value of the name.  */
2441 	  uint32_t namelen = strlen (runp->name);
2442 	  uint32_t hash = elem_hash (runp->name, namelen);
2443 	  size_t idx = hash % elem_size;
2444 #ifndef NDEBUG
2445 	  size_t start_idx = idx;
2446 #endif
2447 
2448 	  if (elem_table[idx * 2] != 0)
2449 	    {
2450 	      /* The spot is already taken.  Try iterating using the value
2451 		 from the secondary hashing function.  */
2452 	      size_t iter = hash % (elem_size - 2) + 1;
2453 
2454 	      do
2455 		{
2456 		  idx += iter;
2457 		  if (idx >= elem_size)
2458 		    idx -= elem_size;
2459 		  assert (idx != start_idx);
2460 		}
2461 	      while (elem_table[idx * 2] != 0);
2462 	    }
2463 	  /* This is the spot where we will insert the value.  */
2464  	  elem_table[idx * 2] = hash;
2465 	  elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2466 
2467 	  /* The string itself including length.  */
2468 	  obstack_1grow (&extrapool, namelen);
2469 	  obstack_grow (&extrapool, runp->name, namelen);
2470 
2471 	  /* And the multibyte representation.  */
2472 	  obstack_1grow (&extrapool, runp->nmbs);
2473 	  obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2474 
2475 	  /* And align again to 32 bits.  */
2476 	  if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2477 	    obstack_grow (&extrapool, "\0\0",
2478 			  (sizeof (int32_t)
2479 			   - ((1 + namelen + 1 + runp->nmbs)
2480 			      % sizeof (int32_t))));
2481 
2482 	  /* Now some 32-bit values: multibyte collation sequence,
2483 	     wide char string (including length), and wide char
2484 	     collation sequence.  */
2485 	  obstack_int32_grow (&extrapool, runp->mbseqorder);
2486 
2487 	  obstack_int32_grow (&extrapool, runp->nwcs);
2488 	  obstack_grow (&extrapool, runp->wcs,
2489 			runp->nwcs * sizeof (uint32_t));
2490 	  maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2491 
2492 	  obstack_int32_grow (&extrapool, runp->wcseqorder);
2493 	}
2494 
2495       runp = runp->next;
2496     }
2497 
2498   /* Prepare to write out this data.  */
2499   add_locale_uint32 (&file, elem_size);
2500   add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2501   add_locale_raw_obstack (&file, &extrapool);
2502   add_locale_raw_data (&file, collate->mbseqorder, 256);
2503   add_locale_collseq_table (&file, &collate->wcseqorder);
2504   add_locale_string (&file, charmap->code_set_name);
2505   write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2506 
2507   obstack_free (&weightpool, NULL);
2508   obstack_free (&extrapool, NULL);
2509   obstack_free (&indirectpool, NULL);
2510 }
2511 
2512 
2513 static enum token_t
skip_to(struct linereader * ldfile,struct locale_collate_t * collate,const struct charmap_t * charmap,int to_endif)2514 skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2515 	 const struct charmap_t *charmap, int to_endif)
2516 {
2517   while (1)
2518     {
2519       struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2520       enum token_t nowtok = now->tok;
2521 
2522       if (nowtok == tok_eof || nowtok == tok_end)
2523 	return nowtok;
2524 
2525       if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2526 	{
2527 	  lr_error (ldfile, _("%s: nested conditionals not supported"),
2528 		    "LC_COLLATE");
2529 	  nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2530 	  if (nowtok == tok_eof || nowtok == tok_end)
2531 	    return nowtok;
2532 	}
2533       else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2534 	{
2535 	  lr_ignore_rest (ldfile, 1);
2536 	  return nowtok;
2537 	}
2538       else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2539 	{
2540 	  /* Do not read the rest of the line.  */
2541 	  return nowtok;
2542 	}
2543       else if (nowtok == tok_else)
2544 	{
2545 	  lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2546 	}
2547 
2548       lr_ignore_rest (ldfile, 0);
2549     }
2550 }
2551 
2552 
2553 void
collate_read(struct linereader * ldfile,struct localedef_t * result,const struct charmap_t * charmap,const char * repertoire_name,int ignore_content)2554 collate_read (struct linereader *ldfile, struct localedef_t *result,
2555 	      const struct charmap_t *charmap, const char *repertoire_name,
2556 	      int ignore_content)
2557 {
2558   struct repertoire_t *repertoire = NULL;
2559   struct locale_collate_t *collate;
2560   struct token *now;
2561   struct token *arg = NULL;
2562   enum token_t nowtok;
2563   enum token_t was_ellipsis = tok_none;
2564   struct localedef_t *copy_locale = NULL;
2565   /* Parsing state:
2566      0 - start
2567      1 - between `order-start' and `order-end'
2568      2 - after `order-end'
2569      3 - after `reorder-after', waiting for `reorder-end'
2570      4 - after `reorder-end'
2571      5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2572      6 - after `reorder-sections-end'
2573   */
2574   int state = 0;
2575 
2576   /* Get the repertoire we have to use.  */
2577   if (repertoire_name != NULL)
2578     repertoire = repertoire_read (repertoire_name);
2579 
2580   /* The rest of the line containing `LC_COLLATE' must be free.  */
2581   lr_ignore_rest (ldfile, 1);
2582 
2583   while (1)
2584     {
2585       do
2586 	{
2587 	  now = lr_token (ldfile, charmap, result, NULL, verbose);
2588 	  nowtok = now->tok;
2589 	}
2590       while (nowtok == tok_eol);
2591 
2592       if (nowtok != tok_define)
2593 	break;
2594 
2595       if (ignore_content)
2596 	lr_ignore_rest (ldfile, 0);
2597       else
2598 	{
2599 	  arg = lr_token (ldfile, charmap, result, NULL, verbose);
2600 	  if (arg->tok != tok_ident)
2601 	    SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2602 	  else
2603 	    {
2604 	      /* Simply add the new symbol.  */
2605 	      struct name_list *newsym = xmalloc (sizeof (*newsym)
2606 						  + arg->val.str.lenmb + 1);
2607 	      memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2608 	      newsym->str[arg->val.str.lenmb] = '\0';
2609 	      newsym->next = defined;
2610 	      defined = newsym;
2611 
2612 	      lr_ignore_rest (ldfile, 1);
2613 	    }
2614 	}
2615     }
2616 
2617   if (nowtok == tok_copy)
2618     {
2619       now = lr_token (ldfile, charmap, result, NULL, verbose);
2620       if (now->tok != tok_string)
2621 	{
2622 	  SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2623 
2624 	skip_category:
2625 	  do
2626 	    now = lr_token (ldfile, charmap, result, NULL, verbose);
2627 	  while (now->tok != tok_eof && now->tok != tok_end);
2628 
2629 	  if (now->tok != tok_eof
2630 	      || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2631 		  now->tok == tok_eof))
2632 	    lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2633 	  else if (now->tok != tok_lc_collate)
2634 	    {
2635 	      lr_error (ldfile, _("\
2636 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2637 	      lr_ignore_rest (ldfile, 0);
2638 	    }
2639 	  else
2640 	    lr_ignore_rest (ldfile, 1);
2641 
2642 	  return;
2643 	}
2644 
2645       if (! ignore_content)
2646 	{
2647 	  /* Get the locale definition.  */
2648 	  copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2649 				     repertoire_name, charmap, NULL);
2650 	  if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2651 	    {
2652 	      /* Not yet loaded.  So do it now.  */
2653 	      if (locfile_read (copy_locale, charmap) != 0)
2654 		goto skip_category;
2655 	    }
2656 
2657 	  if (copy_locale->categories[LC_COLLATE].collate == NULL)
2658 	    return;
2659 	}
2660 
2661       lr_ignore_rest (ldfile, 1);
2662 
2663       now = lr_token (ldfile, charmap, result, NULL, verbose);
2664       nowtok = now->tok;
2665     }
2666 
2667   /* Prepare the data structures.  */
2668   collate_startup (ldfile, result, copy_locale, ignore_content);
2669   collate = result->categories[LC_COLLATE].collate;
2670 
2671   while (1)
2672     {
2673       char ucs4buf[10];
2674       char *symstr;
2675       size_t symlen;
2676 
2677       /* Of course we don't proceed beyond the end of file.  */
2678       if (nowtok == tok_eof)
2679 	break;
2680 
2681       /* Ingore empty lines.  */
2682       if (nowtok == tok_eol)
2683 	{
2684 	  now = lr_token (ldfile, charmap, result, NULL, verbose);
2685 	  nowtok = now->tok;
2686 	  continue;
2687 	}
2688 
2689       switch (nowtok)
2690 	{
2691 	case tok_codepoint_collation:
2692 	  collate->codepoint_collation = true;
2693 	  break;
2694 
2695 	case tok_copy:
2696 	  /* Allow copying other locales.  */
2697 	  now = lr_token (ldfile, charmap, result, NULL, verbose);
2698 	  if (now->tok != tok_string)
2699 	    goto err_label;
2700 
2701 	  if (! ignore_content)
2702 	    load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2703 			 charmap, result);
2704 
2705 	  lr_ignore_rest (ldfile, 1);
2706 	  break;
2707 
2708 	case tok_coll_weight_max:
2709 	  /* Ignore the rest of the line if we don't need the input of
2710 	     this line.  */
2711 	  if (ignore_content)
2712 	    {
2713 	      lr_ignore_rest (ldfile, 0);
2714 	      break;
2715 	    }
2716 
2717 	  if (state != 0)
2718 	    goto err_label;
2719 
2720 	  arg = lr_token (ldfile, charmap, result, NULL, verbose);
2721 	  if (arg->tok != tok_number)
2722 	    goto err_label;
2723 	  if (collate->col_weight_max != -1)
2724 	    lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2725 		      "LC_COLLATE", "col_weight_max");
2726 	  else
2727 	    collate->col_weight_max = arg->val.num;
2728 	  lr_ignore_rest (ldfile, 1);
2729 	  break;
2730 
2731 	case tok_section_symbol:
2732 	  /* Ignore the rest of the line if we don't need the input of
2733 	     this line.  */
2734 	  if (ignore_content)
2735 	    {
2736 	      lr_ignore_rest (ldfile, 0);
2737 	      break;
2738 	    }
2739 
2740 	  if (state != 0)
2741 	    goto err_label;
2742 
2743 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2744 	  if (arg->tok != tok_bsymbol)
2745 	    goto err_label;
2746 	  else if (!ignore_content)
2747 	    {
2748 	      /* Check whether this section is already known.  */
2749 	      struct section_list *known = collate->sections;
2750 	      while (known != NULL)
2751 		{
2752 		  if (strcmp (known->name, arg->val.str.startmb) == 0)
2753 		    break;
2754 		  known = known->next;
2755 		}
2756 
2757 	      if (known != NULL)
2758 		{
2759 		  lr_error (ldfile,
2760 			    _("%s: duplicate declaration of section `%s'"),
2761 			    "LC_COLLATE", arg->val.str.startmb);
2762 		  free (arg->val.str.startmb);
2763 		}
2764 	      else
2765 		collate->sections = make_seclist_elem (collate,
2766 						       arg->val.str.startmb,
2767 						       collate->sections);
2768 
2769 	      lr_ignore_rest (ldfile, known == NULL);
2770 	    }
2771 	  else
2772 	    {
2773 	      free (arg->val.str.startmb);
2774 	      lr_ignore_rest (ldfile, 0);
2775 	    }
2776 	  break;
2777 
2778 	case tok_collating_element:
2779 	  /* Ignore the rest of the line if we don't need the input of
2780 	     this line.  */
2781 	  if (ignore_content)
2782 	    {
2783 	      lr_ignore_rest (ldfile, 0);
2784 	      break;
2785 	    }
2786 
2787 	  if (state != 0 && state != 2)
2788 	    goto err_label;
2789 
2790 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2791 	  if (arg->tok != tok_bsymbol)
2792 	    goto err_label;
2793 	  else
2794 	    {
2795 	      const char *symbol = arg->val.str.startmb;
2796 	      size_t symbol_len = arg->val.str.lenmb;
2797 
2798 	      /* Next the `from' keyword.  */
2799 	      arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2800 	      if (arg->tok != tok_from)
2801 		{
2802 		  free ((char *) symbol);
2803 		  goto err_label;
2804 		}
2805 
2806 	      ldfile->return_widestr = 1;
2807 	      ldfile->translate_strings = 1;
2808 
2809 	      /* Finally the string with the replacement.  */
2810 	      arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2811 
2812 	      ldfile->return_widestr = 0;
2813 	      ldfile->translate_strings = 0;
2814 
2815 	      if (arg->tok != tok_string)
2816 		goto err_label;
2817 
2818 	      if (!ignore_content && symbol != NULL)
2819 		{
2820 		  /* The name is already defined.  */
2821 		  if (check_duplicate (ldfile, collate, charmap,
2822 				       repertoire, symbol, symbol_len))
2823 		    goto col_elem_free;
2824 
2825 		  if (arg->val.str.startmb != NULL)
2826 		    insert_entry (&collate->elem_table, symbol, symbol_len,
2827 				  new_element (collate,
2828 					       arg->val.str.startmb,
2829 					       arg->val.str.lenmb - 1,
2830 					       arg->val.str.startwc,
2831 					       symbol, symbol_len, 0));
2832 		}
2833 	      else
2834 		{
2835 		col_elem_free:
2836 		  free ((char *) symbol);
2837 		  free (arg->val.str.startmb);
2838 		  free (arg->val.str.startwc);
2839 		}
2840 	      lr_ignore_rest (ldfile, 1);
2841 	    }
2842 	  break;
2843 
2844 	case tok_collating_symbol:
2845 	  /* Ignore the rest of the line if we don't need the input of
2846 	     this line.  */
2847 	  if (ignore_content)
2848 	    {
2849 	      lr_ignore_rest (ldfile, 0);
2850 	      break;
2851 	    }
2852 
2853 	  if (state != 0 && state != 2)
2854 	    goto err_label;
2855 
2856 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2857 	  if (arg->tok != tok_bsymbol)
2858 	    goto err_label;
2859 	  else
2860 	    {
2861 	      char *symbol = arg->val.str.startmb;
2862 	      size_t symbol_len = arg->val.str.lenmb;
2863 	      char *endsymbol = NULL;
2864 	      size_t endsymbol_len = 0;
2865 	      enum token_t ellipsis = tok_none;
2866 
2867 	      arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2868 	      if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2869 		{
2870 		  ellipsis = arg->tok;
2871 
2872 		  arg = lr_token (ldfile, charmap, result, repertoire,
2873 				  verbose);
2874 		  if (arg->tok != tok_bsymbol)
2875 		    {
2876 		      free (symbol);
2877 		      goto err_label;
2878 		    }
2879 
2880 		  endsymbol = arg->val.str.startmb;
2881 		  endsymbol_len = arg->val.str.lenmb;
2882 
2883 		  lr_ignore_rest (ldfile, 1);
2884 		}
2885 	      else if (arg->tok != tok_eol)
2886 		{
2887 		  free (symbol);
2888 		  goto err_label;
2889 		}
2890 
2891 	      if (!ignore_content)
2892 		{
2893 		  if (symbol == NULL
2894 		      || (ellipsis != tok_none && endsymbol == NULL))
2895 		    {
2896 		      lr_error (ldfile, _("\
2897 %s: unknown character in collating symbol name"),
2898 				"LC_COLLATE");
2899 		      goto col_sym_free;
2900 		    }
2901 		  else if (ellipsis == tok_none)
2902 		    {
2903 		      /* A single symbol, no ellipsis.  */
2904 		      if (check_duplicate (ldfile, collate, charmap,
2905 					   repertoire, symbol, symbol_len))
2906 			/* The name is already defined.  */
2907 			goto col_sym_free;
2908 
2909 		      insert_entry (&collate->sym_table, symbol, symbol_len,
2910 				    new_symbol (collate, symbol, symbol_len));
2911 		    }
2912 		  else if (symbol_len != endsymbol_len)
2913 		    {
2914 		    col_sym_inv_range:
2915 		      lr_error (ldfile,
2916 				_("invalid names for character range"));
2917 		      goto col_sym_free;
2918 		    }
2919 		  else
2920 		    {
2921 		      /* Oh my, we have to handle an ellipsis.  First, as
2922 			 usual, determine the common prefix and then
2923 			 convert the rest into a range.  */
2924 		      size_t prefixlen;
2925 		      unsigned long int from;
2926 		      unsigned long int to;
2927 		      char *endp;
2928 
2929 		      for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2930 			if (symbol[prefixlen] != endsymbol[prefixlen])
2931 			  break;
2932 
2933 		      /* Convert the rest into numbers.  */
2934 		      symbol[symbol_len] = '\0';
2935 		      from = strtoul (&symbol[prefixlen], &endp,
2936 				      ellipsis == tok_ellipsis2 ? 16 : 10);
2937 		      if (*endp != '\0')
2938 			goto col_sym_inv_range;
2939 
2940 		      endsymbol[symbol_len] = '\0';
2941 		      to = strtoul (&endsymbol[prefixlen], &endp,
2942 				    ellipsis == tok_ellipsis2 ? 16 : 10);
2943 		      if (*endp != '\0')
2944 			goto col_sym_inv_range;
2945 
2946 		      if (from > to)
2947 			goto col_sym_inv_range;
2948 
2949 		      /* Now loop over all entries.  */
2950 		      while (from <= to)
2951 			{
2952 			  char *symbuf;
2953 
2954 			  symbuf = (char *) obstack_alloc (&collate->mempool,
2955 							   symbol_len + 1);
2956 
2957 			  /* Create the name.  */
2958 			  sprintf (symbuf,
2959 				   ellipsis == tok_ellipsis2
2960 				   ? "%.*s%.*lX" : "%.*s%.*lu",
2961 				   (int) prefixlen, symbol,
2962 				   (int) (symbol_len - prefixlen), from);
2963 
2964 			  if (check_duplicate (ldfile, collate, charmap,
2965 					       repertoire, symbuf, symbol_len))
2966 			    /* The name is already defined.  */
2967 			    goto col_sym_free;
2968 
2969 			  insert_entry (&collate->sym_table, symbuf,
2970 					symbol_len,
2971 					new_symbol (collate, symbuf,
2972 						    symbol_len));
2973 
2974 			  /* Increment the counter.  */
2975 			  ++from;
2976 			}
2977 
2978 		      goto col_sym_free;
2979 		    }
2980 		}
2981 	      else
2982 		{
2983 		col_sym_free:
2984 		  free (symbol);
2985 		  free (endsymbol);
2986 		}
2987 	    }
2988 	  break;
2989 
2990 	case tok_symbol_equivalence:
2991 	  /* Ignore the rest of the line if we don't need the input of
2992 	     this line.  */
2993 	  if (ignore_content)
2994 	    {
2995 	      lr_ignore_rest (ldfile, 0);
2996 	      break;
2997 	    }
2998 
2999 	  if (state != 0)
3000 	    goto err_label;
3001 
3002 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3003 	  if (arg->tok != tok_bsymbol)
3004 	    goto err_label;
3005 	  else
3006 	    {
3007 	      const char *newname = arg->val.str.startmb;
3008 	      size_t newname_len = arg->val.str.lenmb;
3009 	      const char *symname;
3010 	      size_t symname_len;
3011 	      void *symval;	/* Actually struct symbol_t*  */
3012 
3013 	      arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3014 	      if (arg->tok != tok_bsymbol)
3015 		{
3016 		  free ((char *) newname);
3017 		  goto err_label;
3018 		}
3019 
3020 	      symname = arg->val.str.startmb;
3021 	      symname_len = arg->val.str.lenmb;
3022 
3023 	      if (newname == NULL)
3024 		{
3025 		  lr_error (ldfile, _("\
3026 %s: unknown character in equivalent definition name"),
3027 			    "LC_COLLATE");
3028 
3029 		sym_equiv_free:
3030 		  free ((char *) newname);
3031 		  free ((char *) symname);
3032 		  break;
3033 		}
3034 	      if (symname == NULL)
3035 		{
3036 		  lr_error (ldfile, _("\
3037 %s: unknown character in equivalent definition value"),
3038 			    "LC_COLLATE");
3039 		  goto sym_equiv_free;
3040 		}
3041 
3042 	      /* See whether the symbol name is already defined.  */
3043 	      if (find_entry (&collate->sym_table, symname, symname_len,
3044 			      &symval) != 0)
3045 		{
3046 		  lr_error (ldfile, _("\
3047 %s: unknown symbol `%s' in equivalent definition"),
3048 			    "LC_COLLATE", symname);
3049 		  goto sym_equiv_free;
3050 		}
3051 
3052 	      if (insert_entry (&collate->sym_table,
3053 				newname, newname_len, symval) < 0)
3054 		{
3055 		  lr_error (ldfile, _("\
3056 error while adding equivalent collating symbol"));
3057 		  goto sym_equiv_free;
3058 		}
3059 
3060 	      free ((char *) symname);
3061 	    }
3062 	  lr_ignore_rest (ldfile, 1);
3063 	  break;
3064 
3065 	case tok_script:
3066 	  /* Ignore the rest of the line if we don't need the input of
3067 	     this line.  */
3068 	  if (ignore_content)
3069 	    {
3070 	      lr_ignore_rest (ldfile, 0);
3071 	      break;
3072 	    }
3073 
3074 	  /* We get told about the scripts we know.  */
3075 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3076 	  if (arg->tok != tok_bsymbol)
3077 	    goto err_label;
3078 	  else
3079 	    {
3080 	      struct section_list *runp = collate->known_sections;
3081 	      char *name;
3082 
3083 	      while (runp != NULL)
3084 		if (strncmp (runp->name, arg->val.str.startmb,
3085 			     arg->val.str.lenmb) == 0
3086 		    && runp->name[arg->val.str.lenmb] == '\0')
3087 		  break;
3088 		else
3089 		  runp = runp->def_next;
3090 
3091 	      if (runp != NULL)
3092 		{
3093 		  lr_error (ldfile, _("duplicate definition of script `%s'"),
3094 			    runp->name);
3095 		  lr_ignore_rest (ldfile, 0);
3096 		  break;
3097 		}
3098 
3099 	      runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3100 	      name = (char *) xmalloc (arg->val.str.lenmb + 1);
3101 	      memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3102 	      name[arg->val.str.lenmb] = '\0';
3103 	      runp->name = name;
3104 
3105 	      runp->def_next = collate->known_sections;
3106 	      collate->known_sections = runp;
3107 	    }
3108 	  lr_ignore_rest (ldfile, 1);
3109 	  break;
3110 
3111 	case tok_order_start:
3112 	  /* Ignore the rest of the line if we don't need the input of
3113 	     this line.  */
3114 	  if (ignore_content)
3115 	    {
3116 	      lr_ignore_rest (ldfile, 0);
3117 	      break;
3118 	    }
3119 
3120 	  if (state != 0 && state != 1 && state != 2)
3121 	    goto err_label;
3122 	  state = 1;
3123 
3124 	  /* The 14652 draft does not specify whether all `order_start' lines
3125 	     must contain the same number of sort-rules, but 14651 does.  So
3126 	     we require this here as well.  */
3127 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3128 	  if (arg->tok == tok_bsymbol)
3129 	    {
3130 	      /* This better should be a section name.  */
3131 	      struct section_list *sp = collate->known_sections;
3132 	      while (sp != NULL
3133 		     && (sp->name == NULL
3134 			 || strncmp (sp->name, arg->val.str.startmb,
3135 				     arg->val.str.lenmb) != 0
3136 			 || sp->name[arg->val.str.lenmb] != '\0'))
3137 		sp = sp->def_next;
3138 
3139 	      if (sp == NULL)
3140 		{
3141 		  lr_error (ldfile, _("\
3142 %s: unknown section name `%.*s'"),
3143 			    "LC_COLLATE", (int) arg->val.str.lenmb,
3144 			    arg->val.str.startmb);
3145 		  /* We use the error section.  */
3146 		  collate->current_section = &collate->error_section;
3147 
3148 		  if (collate->error_section.first == NULL)
3149 		    {
3150 		      /* Insert &collate->error_section at the end of
3151 			 the collate->sections list.  */
3152 		      if (collate->sections == NULL)
3153 			collate->sections = &collate->error_section;
3154 		      else
3155 			{
3156 			  sp = collate->sections;
3157 			  while (sp->next != NULL)
3158 			    sp = sp->next;
3159 
3160 			  sp->next = &collate->error_section;
3161 			}
3162 		      collate->error_section.next = NULL;
3163 		    }
3164 		}
3165 	      else
3166 		{
3167 		  /* One should not be allowed to open the same
3168 		     section twice.  */
3169 		  if (sp->first != NULL)
3170 		    lr_error (ldfile, _("\
3171 %s: multiple order definitions for section `%s'"),
3172 			      "LC_COLLATE", sp->name);
3173 		  else
3174 		    {
3175 		      /* Insert sp in the collate->sections list,
3176 			 right after collate->current_section.  */
3177 		      if (collate->current_section != NULL)
3178 			{
3179 			  sp->next = collate->current_section->next;
3180 			  collate->current_section->next = sp;
3181 			}
3182 		      else if (collate->sections == NULL)
3183 			/* This is the first section to be defined.  */
3184 			collate->sections = sp;
3185 
3186 		      collate->current_section = sp;
3187 		    }
3188 
3189 		  /* Next should come the end of the line or a semicolon.  */
3190 		  arg = lr_token (ldfile, charmap, result, repertoire,
3191 				  verbose);
3192 		  if (arg->tok == tok_eol)
3193 		    {
3194 		      uint32_t cnt;
3195 
3196 		      /* This means we have exactly one rule: `forward'.  */
3197 		      if (nrules > 1)
3198 			lr_error (ldfile, _("\
3199 %s: invalid number of sorting rules"),
3200 				  "LC_COLLATE");
3201 		      else
3202 			nrules = 1;
3203 		      sp->rules = obstack_alloc (&collate->mempool,
3204 						 (sizeof (enum coll_sort_rule)
3205 						  * nrules));
3206 		      for (cnt = 0; cnt < nrules; ++cnt)
3207 			sp->rules[cnt] = sort_forward;
3208 
3209 		      /* Next line.  */
3210 		      break;
3211 		    }
3212 
3213 		  /* Get the next token.  */
3214 		  arg = lr_token (ldfile, charmap, result, repertoire,
3215 				  verbose);
3216 		}
3217 	    }
3218 	  else
3219 	    {
3220 	      /* There is no section symbol.  Therefore we use the unnamed
3221 		 section.  */
3222 	      collate->current_section = &collate->unnamed_section;
3223 
3224 	      if (collate->unnamed_section_defined)
3225 		lr_error (ldfile, _("\
3226 %s: multiple order definitions for unnamed section"),
3227 			  "LC_COLLATE");
3228 	      else
3229 		{
3230 		  /* Insert &collate->unnamed_section at the beginning of
3231 		     the collate->sections list.  */
3232 		  collate->unnamed_section.next = collate->sections;
3233 		  collate->sections = &collate->unnamed_section;
3234 		  collate->unnamed_section_defined = true;
3235 		}
3236 	    }
3237 
3238 	  /* Now read the direction names.  */
3239 	  read_directions (ldfile, arg, charmap, repertoire, result);
3240 
3241 	  /* From now we need the strings untranslated.  */
3242 	  ldfile->translate_strings = 0;
3243 	  break;
3244 
3245 	case tok_order_end:
3246 	  /* Ignore the rest of the line if we don't need the input of
3247 	     this line.  */
3248 	  if (ignore_content)
3249 	    {
3250 	      lr_ignore_rest (ldfile, 0);
3251 	      break;
3252 	    }
3253 
3254 	  if (state != 1)
3255 	    goto err_label;
3256 
3257 	  /* Handle ellipsis at end of list.  */
3258 	  if (was_ellipsis != tok_none)
3259 	    {
3260 	      handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3261 			       repertoire, result);
3262 	      was_ellipsis = tok_none;
3263 	    }
3264 
3265 	  state = 2;
3266 	  lr_ignore_rest (ldfile, 1);
3267 	  break;
3268 
3269 	case tok_reorder_after:
3270 	  /* Ignore the rest of the line if we don't need the input of
3271 	     this line.  */
3272 	  if (ignore_content)
3273 	    {
3274 	      lr_ignore_rest (ldfile, 0);
3275 	      break;
3276 	    }
3277 
3278 	  if (state == 1)
3279 	    {
3280 	      lr_error (ldfile, _("%s: missing `order_end' keyword"),
3281 			"LC_COLLATE");
3282 	      state = 2;
3283 
3284 	      /* Handle ellipsis at end of list.  */
3285 	      if (was_ellipsis != tok_none)
3286 		{
3287 		  handle_ellipsis (ldfile, arg->val.str.startmb,
3288 				   arg->val.str.lenmb, was_ellipsis, charmap,
3289 				   repertoire, result);
3290 		  was_ellipsis = tok_none;
3291 		}
3292 	    }
3293 	  else if (state == 0 && copy_locale == NULL)
3294 	    goto err_label;
3295 	  else if (state != 0 && state != 2 && state != 3)
3296 	    goto err_label;
3297 	  state = 3;
3298 
3299 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3300 	  if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3301 	    {
3302 	      /* Find this symbol in the sequence table.  */
3303 	      char ucsbuf[10];
3304 	      char *startmb;
3305 	      size_t lenmb;
3306 	      struct element_t *insp;
3307 	      int no_error = 1;
3308 	      void *ptr;
3309 
3310 	      if (arg->tok == tok_bsymbol)
3311 		{
3312 		  startmb = arg->val.str.startmb;
3313 		  lenmb = arg->val.str.lenmb;
3314 		}
3315 	      else
3316 		{
3317 		  sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3318 		  startmb = ucsbuf;
3319 		  lenmb = 9;
3320 		}
3321 
3322 	      if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3323 		/* Yes, the symbol exists.  Simply point the cursor
3324 		   to it.  */
3325 		collate->cursor = (struct element_t *) ptr;
3326 	      else
3327 		{
3328 		  struct symbol_t *symbp;
3329 		  void *ptr;
3330 
3331 		  if (find_entry (&collate->sym_table, startmb, lenmb,
3332 				  &ptr) == 0)
3333 		    {
3334 		      symbp = ptr;
3335 
3336 		      if (symbp->order->last != NULL
3337 			  || symbp->order->next != NULL)
3338 			collate->cursor = symbp->order;
3339 		      else
3340 			{
3341 			  /* This is a collating symbol but its position
3342 			     is not yet defined.  */
3343 			  lr_error (ldfile, _("\
3344 %s: order for collating symbol %.*s not yet defined"),
3345 				    "LC_COLLATE", (int) lenmb, startmb);
3346 			  collate->cursor = NULL;
3347 			  no_error = 0;
3348 			}
3349 		    }
3350 		  else if (find_entry (&collate->elem_table, startmb, lenmb,
3351 				       &ptr) == 0)
3352 		    {
3353 		      insp = (struct element_t *) ptr;
3354 
3355 		      if (insp->last != NULL || insp->next != NULL)
3356 			collate->cursor = insp;
3357 		      else
3358 			{
3359 			  /* This is a collating element but its position
3360 			     is not yet defined.  */
3361 			  lr_error (ldfile, _("\
3362 %s: order for collating element %.*s not yet defined"),
3363 				    "LC_COLLATE", (int) lenmb, startmb);
3364 			  collate->cursor = NULL;
3365 			  no_error = 0;
3366 			}
3367 		    }
3368 		  else
3369 		    {
3370 		      /* This is bad.  The symbol after which we have to
3371 			 insert does not exist.  */
3372 		      lr_error (ldfile, _("\
3373 %s: cannot reorder after %.*s: symbol not known"),
3374 				"LC_COLLATE", (int) lenmb, startmb);
3375 		      collate->cursor = NULL;
3376 		      no_error = 0;
3377 		    }
3378 		}
3379 
3380 	      lr_ignore_rest (ldfile, no_error);
3381 	    }
3382 	  else
3383 	    /* This must not happen.  */
3384 	    goto err_label;
3385 	  break;
3386 
3387 	case tok_reorder_end:
3388 	  /* Ignore the rest of the line if we don't need the input of
3389 	     this line.  */
3390 	  if (ignore_content)
3391 	    break;
3392 
3393 	  if (state != 3)
3394 	    goto err_label;
3395 	  state = 4;
3396 	  lr_ignore_rest (ldfile, 1);
3397 	  break;
3398 
3399 	case tok_reorder_sections_after:
3400 	  /* Ignore the rest of the line if we don't need the input of
3401 	     this line.  */
3402 	  if (ignore_content)
3403 	    {
3404 	      lr_ignore_rest (ldfile, 0);
3405 	      break;
3406 	    }
3407 
3408 	  if (state == 1)
3409 	    {
3410 	      lr_error (ldfile, _("%s: missing `order_end' keyword"),
3411 			"LC_COLLATE");
3412 	      state = 2;
3413 
3414 	      /* Handle ellipsis at end of list.  */
3415 	      if (was_ellipsis != tok_none)
3416 		{
3417 		  handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3418 				   repertoire, result);
3419 		  was_ellipsis = tok_none;
3420 		}
3421 	    }
3422 	  else if (state == 3)
3423 	    {
3424 	      record_error (0, 0, _("\
3425 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3426 	      state = 4;
3427 	    }
3428 	  else if (state != 2 && state != 4)
3429 	    goto err_label;
3430 	  state = 5;
3431 
3432 	  /* Get the name of the sections we are adding after.  */
3433 	  arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3434 	  if (arg->tok == tok_bsymbol)
3435 	    {
3436 	      /* Now find a section with this name.  */
3437 	      struct section_list *runp = collate->sections;
3438 
3439 	      while (runp != NULL)
3440 		{
3441 		  if (runp->name != NULL
3442 		      && strlen (runp->name) == arg->val.str.lenmb
3443 		      && memcmp (runp->name, arg->val.str.startmb,
3444 				 arg->val.str.lenmb) == 0)
3445 		    break;
3446 
3447 		  runp = runp->next;
3448 		}
3449 
3450 	      if (runp != NULL)
3451 		collate->current_section = runp;
3452 	      else
3453 		{
3454 		  /* This is bad.  The section after which we have to
3455 		     reorder does not exist.  Therefore we cannot
3456 		     process the whole rest of this reorder
3457 		     specification.  */
3458 		  lr_error (ldfile, _("%s: section `%.*s' not known"),
3459 			    "LC_COLLATE", (int) arg->val.str.lenmb,
3460 			    arg->val.str.startmb);
3461 
3462 		  do
3463 		    {
3464 		      lr_ignore_rest (ldfile, 0);
3465 
3466 		      now = lr_token (ldfile, charmap, result, NULL, verbose);
3467 		    }
3468 		  while (now->tok == tok_reorder_sections_after
3469 			 || now->tok == tok_reorder_sections_end
3470 			 || now->tok == tok_end);
3471 
3472 		  /* Process the token we just saw.  */
3473 		  nowtok = now->tok;
3474 		  continue;
3475 		}
3476 	    }
3477 	  else
3478 	    /* This must not happen.  */
3479 	    goto err_label;
3480 	  break;
3481 
3482 	case tok_reorder_sections_end:
3483 	  /* Ignore the rest of the line if we don't need the input of
3484 	     this line.  */
3485 	  if (ignore_content)
3486 	    break;
3487 
3488 	  if (state != 5)
3489 	    goto err_label;
3490 	  state = 6;
3491 	  lr_ignore_rest (ldfile, 1);
3492 	  break;
3493 
3494 	case tok_bsymbol:
3495 	case tok_ucs4:
3496 	  /* Ignore the rest of the line if we don't need the input of
3497 	     this line.  */
3498 	  if (ignore_content)
3499 	    {
3500 	      lr_ignore_rest (ldfile, 0);
3501 	      break;
3502 	    }
3503 
3504 	  if (state != 0 && state != 1 && state != 3 && state != 5)
3505 	    goto err_label;
3506 
3507 	  if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3508 	    goto err_label;
3509 
3510 	  if (nowtok == tok_ucs4)
3511 	    {
3512 	      snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3513 	      symstr = ucs4buf;
3514 	      symlen = 9;
3515 	    }
3516 	  else if (arg != NULL)
3517 	    {
3518 	      symstr = arg->val.str.startmb;
3519 	      symlen = arg->val.str.lenmb;
3520 	    }
3521 	  else
3522 	    {
3523 	      lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3524 			(int) ldfile->token.val.str.lenmb,
3525 			ldfile->token.val.str.startmb);
3526 	      break;
3527 	    }
3528 
3529 	  struct element_t *seqp;
3530 	  if (state == 0)
3531 	    {
3532 	      /* We are outside an `order_start' region.  This means
3533 		 we must only accept definitions of values for
3534 		 collation symbols since these are purely abstract
3535 		 values and don't need directions associated.  */
3536 	      void *ptr;
3537 
3538 	      if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3539 		{
3540 		  seqp = ptr;
3541 
3542 		  /* It's already defined.  First check whether this
3543 		     is really a collating symbol.  */
3544 		  if (seqp->is_character)
3545 		    goto err_label;
3546 
3547 		  goto move_entry;
3548 		}
3549 	      else
3550 		{
3551 		  void *result;
3552 
3553 		  if (find_entry (&collate->sym_table, symstr, symlen,
3554 				  &result) != 0)
3555 		    /* No collating symbol, it's an error.  */
3556 		    goto err_label;
3557 
3558 		  /* Maybe this is the first time we define a symbol
3559 		     value and it is before the first actual section.  */
3560 		  if (collate->sections == NULL)
3561 		    collate->sections = collate->current_section =
3562 		      &collate->symbol_section;
3563 		}
3564 
3565 	      if (was_ellipsis != tok_none)
3566 		{
3567 		  handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3568 				   charmap, repertoire, result);
3569 
3570 		  /* Remember that we processed the ellipsis.  */
3571 		  was_ellipsis = tok_none;
3572 
3573 		  /* And don't add the value a second time.  */
3574 		  break;
3575 		}
3576 	    }
3577 	  else if (state == 3)
3578 	    {
3579 	      /* It is possible that we already have this collation sequence.
3580 		 In this case we move the entry.  */
3581 	      void *sym;
3582 	      void *ptr;
3583 
3584 	      /* If the symbol after which we have to insert was not found
3585 		 ignore all entries.  */
3586 	      if (collate->cursor == NULL)
3587 		{
3588 		  lr_ignore_rest (ldfile, 0);
3589 		  break;
3590 		}
3591 
3592 	      if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3593 		{
3594 		  seqp = (struct element_t *) ptr;
3595 		  goto move_entry;
3596 		}
3597 
3598 	      if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3599 		  && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3600 		goto move_entry;
3601 
3602 	      if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3603 		  && (seqp = (struct element_t *) ptr,
3604 		      seqp->last != NULL || seqp->next != NULL
3605 		      || (collate->start != NULL && seqp == collate->start)))
3606 		{
3607 		move_entry:
3608 		  /* Remove the entry from the old position.  */
3609 		  if (seqp->last == NULL)
3610 		    collate->start = seqp->next;
3611 		  else
3612 		    seqp->last->next = seqp->next;
3613 		  if (seqp->next != NULL)
3614 		    seqp->next->last = seqp->last;
3615 
3616 		  /* We also have to check whether this entry is the
3617 		     first or last of a section.  */
3618 		  if (seqp->section->first == seqp)
3619 		    {
3620 		      if (seqp->section->first == seqp->section->last)
3621 			/* This section has no content anymore.  */
3622 			seqp->section->first = seqp->section->last = NULL;
3623 		      else
3624 			seqp->section->first = seqp->next;
3625 		    }
3626 		  else if (seqp->section->last == seqp)
3627 		    seqp->section->last = seqp->last;
3628 
3629 		  /* Now insert it in the new place.  */
3630 		  insert_weights (ldfile, seqp, charmap, repertoire, result,
3631 				  tok_none);
3632 		  break;
3633 		}
3634 
3635 	      /* Otherwise we just add a new entry.  */
3636 	    }
3637 	  else if (state == 5)
3638 	    {
3639 	      /* We are reordering sections.  Find the named section.  */
3640 	      struct section_list *runp = collate->sections;
3641 	      struct section_list *prevp = NULL;
3642 
3643 	      while (runp != NULL)
3644 		{
3645 		  if (runp->name != NULL
3646 		      && strlen (runp->name) == symlen
3647 		      && memcmp (runp->name, symstr, symlen) == 0)
3648 		    break;
3649 
3650 		  prevp = runp;
3651 		  runp = runp->next;
3652 		}
3653 
3654 	      if (runp == NULL)
3655 		{
3656 		  lr_error (ldfile, _("%s: section `%.*s' not known"),
3657 			    "LC_COLLATE", (int) symlen, symstr);
3658 		  lr_ignore_rest (ldfile, 0);
3659 		}
3660 	      else
3661 		{
3662 		  if (runp != collate->current_section)
3663 		    {
3664 		      /* Remove the named section from the old place and
3665 			 insert it in the new one.  */
3666 		      prevp->next = runp->next;
3667 
3668 		      runp->next = collate->current_section->next;
3669 		      collate->current_section->next = runp;
3670 		      collate->current_section = runp;
3671 		    }
3672 
3673 		  /* Process the rest of the line which might change
3674 		     the collation rules.  */
3675 		  arg = lr_token (ldfile, charmap, result, repertoire,
3676 				  verbose);
3677 		  if (arg->tok != tok_eof && arg->tok != tok_eol)
3678 		    read_directions (ldfile, arg, charmap, repertoire,
3679 				     result);
3680 		}
3681 	      break;
3682 	    }
3683 	  else if (was_ellipsis != tok_none)
3684 	    {
3685 	      /* Using the information in the `ellipsis_weight'
3686 		 element and this and the last value we have to handle
3687 		 the ellipsis now.  */
3688 	      assert (state == 1);
3689 
3690 	      handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3691 			       repertoire, result);
3692 
3693 	      /* Remember that we processed the ellipsis.  */
3694 	      was_ellipsis = tok_none;
3695 
3696 	      /* And don't add the value a second time.  */
3697 	      break;
3698 	    }
3699 
3700 	  /* Now insert in the new place.  */
3701 	  insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3702 	  break;
3703 
3704 	case tok_undefined:
3705 	  /* Ignore the rest of the line if we don't need the input of
3706 	     this line.  */
3707 	  if (ignore_content)
3708 	    {
3709 	      lr_ignore_rest (ldfile, 0);
3710 	      break;
3711 	    }
3712 
3713 	  if (state != 1)
3714 	    goto err_label;
3715 
3716 	  if (was_ellipsis != tok_none)
3717 	    {
3718 	      lr_error (ldfile,
3719 			_("%s: cannot have `%s' as end of ellipsis range"),
3720 			"LC_COLLATE", "UNDEFINED");
3721 
3722 	      unlink_element (collate);
3723 	      was_ellipsis = tok_none;
3724 	    }
3725 
3726 	  /* See whether UNDEFINED already appeared somewhere.  */
3727 	  if (collate->undefined.next != NULL
3728 	      || &collate->undefined == collate->cursor)
3729 	    {
3730 	      lr_error (ldfile,
3731 			_("%s: order for `%.*s' already defined at %s:%Zu"),
3732 			"LC_COLLATE", 9, "UNDEFINED",
3733 			collate->undefined.file,
3734 			collate->undefined.line);
3735 	      lr_ignore_rest (ldfile, 0);
3736 	    }
3737 	  else
3738 	    /* Parse the weights.  */
3739 	     insert_weights (ldfile, &collate->undefined, charmap,
3740 			     repertoire, result, tok_none);
3741 	  break;
3742 
3743 	case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3744 	case tok_ellipsis3: /* absolute ellipsis */
3745 	case tok_ellipsis4: /* symbolic decimal ellipsis */
3746 	  /* This is the symbolic (decimal or hexadecimal) or absolute
3747 	     ellipsis.  */
3748 	  if (was_ellipsis != tok_none)
3749 	    goto err_label;
3750 
3751 	  if (state != 0 && state != 1 && state != 3)
3752 	    goto err_label;
3753 
3754 	  was_ellipsis = nowtok;
3755 
3756 	  insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3757 			  repertoire, result, nowtok);
3758 	  break;
3759 
3760 	case tok_end:
3761 	seen_end:
3762 	  /* Next we assume `LC_COLLATE'.  */
3763 	  if (!ignore_content)
3764 	    {
3765 	      if (state == 0
3766 		  && copy_locale == NULL
3767 		  && !collate->codepoint_collation)
3768 		/* We must either see a copy statement or have
3769 		   ordering values, or codepoint_collation.  */
3770 		lr_error (ldfile,
3771 			  _("%s: empty category description not allowed"),
3772 			  "LC_COLLATE");
3773 	      else if (state == 1)
3774 		{
3775 		  lr_error (ldfile, _("%s: missing `order_end' keyword"),
3776 			    "LC_COLLATE");
3777 
3778 		  /* Handle ellipsis at end of list.  */
3779 		  if (was_ellipsis != tok_none)
3780 		    {
3781 		      handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3782 				       repertoire, result);
3783 		      was_ellipsis = tok_none;
3784 		    }
3785 		}
3786 	      else if (state == 3)
3787 		record_error (0, 0, _("\
3788 %s: missing `reorder-end' keyword"), "LC_COLLATE");
3789 	      else if (state == 5)
3790 		record_error (0, 0, _("\
3791 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3792 	    }
3793 	  arg = lr_token (ldfile, charmap, result, NULL, verbose);
3794 	  if (arg->tok == tok_eof)
3795 	    break;
3796 	  if (arg->tok == tok_eol)
3797 	    lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3798 	  else if (arg->tok != tok_lc_collate)
3799 	    lr_error (ldfile, _("\
3800 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3801 	  lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3802 	  return;
3803 
3804 	case tok_define:
3805 	  if (ignore_content)
3806 	    {
3807 	      lr_ignore_rest (ldfile, 0);
3808 	      break;
3809 	    }
3810 
3811 	  arg = lr_token (ldfile, charmap, result, NULL, verbose);
3812 	  if (arg->tok != tok_ident)
3813 	    goto err_label;
3814 
3815 	  /* Simply add the new symbol.  */
3816 	  struct name_list *newsym = xmalloc (sizeof (*newsym)
3817 					      + arg->val.str.lenmb + 1);
3818 	  memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3819 	  newsym->str[arg->val.str.lenmb] = '\0';
3820 	  newsym->next = defined;
3821 	  defined = newsym;
3822 
3823 	  lr_ignore_rest (ldfile, 1);
3824 	  break;
3825 
3826 	case tok_undef:
3827 	  if (ignore_content)
3828 	    {
3829 	      lr_ignore_rest (ldfile, 0);
3830 	      break;
3831 	    }
3832 
3833 	  arg = lr_token (ldfile, charmap, result, NULL, verbose);
3834 	  if (arg->tok != tok_ident)
3835 	    goto err_label;
3836 
3837 	  /* Remove _all_ occurrences of the symbol from the list.  */
3838 	  struct name_list *prevdef = NULL;
3839 	  struct name_list *curdef = defined;
3840 	  while (curdef != NULL)
3841 	    if (strncmp (arg->val.str.startmb, curdef->str,
3842 			 arg->val.str.lenmb) == 0
3843 		&& curdef->str[arg->val.str.lenmb] == '\0')
3844 	      {
3845 		if (prevdef == NULL)
3846 		  defined = curdef->next;
3847 		else
3848 		  prevdef->next = curdef->next;
3849 
3850 		struct name_list *olddef = curdef;
3851 		curdef = curdef->next;
3852 
3853 		free (olddef);
3854 	      }
3855 	    else
3856 	      {
3857 		prevdef = curdef;
3858 		curdef = curdef->next;
3859 	      }
3860 
3861 	  lr_ignore_rest (ldfile, 1);
3862 	  break;
3863 
3864 	case tok_ifdef:
3865 	case tok_ifndef:
3866 	  if (ignore_content)
3867 	    {
3868 	      lr_ignore_rest (ldfile, 0);
3869 	      break;
3870 	    }
3871 
3872 	found_ifdef:
3873 	  arg = lr_token (ldfile, charmap, result, NULL, verbose);
3874 	  if (arg->tok != tok_ident)
3875 	    goto err_label;
3876 	  lr_ignore_rest (ldfile, 1);
3877 
3878 	  if (collate->else_action == else_none)
3879 	    {
3880 	      curdef = defined;
3881 	      while (curdef != NULL)
3882 		if (strncmp (arg->val.str.startmb, curdef->str,
3883 			     arg->val.str.lenmb) == 0
3884 		    && curdef->str[arg->val.str.lenmb] == '\0')
3885 		  break;
3886 		else
3887 		  curdef = curdef->next;
3888 
3889 	      if ((nowtok == tok_ifdef && curdef != NULL)
3890 		  || (nowtok == tok_ifndef && curdef == NULL))
3891 		{
3892 		  /* We have to use the if-branch.  */
3893 		  collate->else_action = else_ignore;
3894 		}
3895 	      else
3896 		{
3897 		  /* We have to use the else-branch, if there is one.  */
3898 		  nowtok = skip_to (ldfile, collate, charmap, 0);
3899 		  if (nowtok == tok_else)
3900 		    collate->else_action = else_seen;
3901 		  else if (nowtok == tok_elifdef)
3902 		    {
3903 		      nowtok = tok_ifdef;
3904 		      goto found_ifdef;
3905 		    }
3906 		  else if (nowtok == tok_elifndef)
3907 		    {
3908 		      nowtok = tok_ifndef;
3909 		      goto found_ifdef;
3910 		    }
3911 		  else if (nowtok == tok_eof)
3912 		    goto seen_eof;
3913 		  else if (nowtok == tok_end)
3914 		    goto seen_end;
3915 		}
3916 	    }
3917 	  else
3918 	    {
3919 	      /* XXX Should it really become necessary to support nested
3920 		 preprocessor handling we will push the state here.  */
3921 	      lr_error (ldfile, _("%s: nested conditionals not supported"),
3922 			"LC_COLLATE");
3923 	      nowtok = skip_to (ldfile, collate, charmap, 1);
3924 	      if (nowtok == tok_eof)
3925 		goto seen_eof;
3926 	      else if (nowtok == tok_end)
3927 		goto seen_end;
3928 	    }
3929 	  break;
3930 
3931 	case tok_elifdef:
3932 	case tok_elifndef:
3933 	case tok_else:
3934 	  if (ignore_content)
3935 	    {
3936 	      lr_ignore_rest (ldfile, 0);
3937 	      break;
3938 	    }
3939 
3940 	  lr_ignore_rest (ldfile, 1);
3941 
3942 	  if (collate->else_action == else_ignore)
3943 	    {
3944 	      /* Ignore everything until the endif.  */
3945 	      nowtok = skip_to (ldfile, collate, charmap, 1);
3946 	      if (nowtok == tok_eof)
3947 		goto seen_eof;
3948 	      else if (nowtok == tok_end)
3949 		goto seen_end;
3950 	    }
3951 	  else
3952 	    {
3953 	      assert (collate->else_action == else_none);
3954 	      lr_error (ldfile, _("\
3955 %s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3956 			nowtok == tok_else ? "else"
3957 			: nowtok == tok_elifdef ? "elifdef" : "elifndef");
3958 	    }
3959 	  break;
3960 
3961 	case tok_endif:
3962 	  if (ignore_content)
3963 	    {
3964 	      lr_ignore_rest (ldfile, 0);
3965 	      break;
3966 	    }
3967 
3968 	  lr_ignore_rest (ldfile, 1);
3969 
3970 	  if (collate->else_action != else_ignore
3971 	      && collate->else_action != else_seen)
3972 	    lr_error (ldfile, _("\
3973 %s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3974 
3975 	  /* XXX If we support nested preprocessor directives we pop
3976 	     the state here.  */
3977 	  collate->else_action = else_none;
3978 	  break;
3979 
3980 	default:
3981 	err_label:
3982 	  SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3983 	}
3984 
3985       /* Prepare for the next round.  */
3986       now = lr_token (ldfile, charmap, result, NULL, verbose);
3987       nowtok = now->tok;
3988     }
3989 
3990  seen_eof:
3991   /* When we come here we reached the end of the file.  */
3992   lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3993 }
3994