1 /* Copyright (C) 1995-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published
6    by the Free Software Foundation; version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
16 
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 
21 #include <argp.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <libintl.h>
25 #include <locale.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <error.h>
32 #include <sys/mman.h>
33 #include <sys/stat.h>
34 #include <ctype.h>
35 
36 #include "localedef.h"
37 #include "charmap.h"
38 #include "locfile.h"
39 
40 /* Undefine the following line in the production version.  */
41 /* #define NDEBUG 1 */
42 #include <assert.h>
43 
44 
45 /* List of copied locales.  */
46 struct copy_def_list_t *copy_list;
47 
48 /* If this is defined be POSIX conform.  */
49 int posix_conformance;
50 
51 /* If not zero force output even if warning were issued.  */
52 static int force_output;
53 
54 /* Prefix for output files.  */
55 const char *output_prefix;
56 
57 /* Name of the character map file.  */
58 static const char *charmap_file;
59 
60 /* Name of the locale definition file.  */
61 static const char *input_file;
62 
63 /* Name of the repertoire map file.  */
64 const char *repertoire_global;
65 
66 /* Name of the locale.alias file.  */
67 const char *alias_file;
68 
69 /* List of all locales.  */
70 static struct localedef_t *locales;
71 
72 /* If true don't add locale data to archive.  */
73 bool no_archive;
74 
75 /* If true add named locales to archive.  */
76 static bool add_to_archive;
77 
78 /* If true delete named locales from archive.  */
79 static bool delete_from_archive;
80 
81 /* If true replace archive content when adding.  */
82 static bool replace_archive;
83 
84 /* If true list archive content.  */
85 static bool list_archive;
86 
87 /* If true create hard links to other locales (default).  */
88 bool hard_links = true;
89 
90 /* Maximum number of retries when opening the locale archive.  */
91 int max_locarchive_open_retry = 10;
92 
93 
94 /* Name and version of program.  */
95 static void print_version (FILE *stream, struct argp_state *state);
96 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
97 
98 #define OPT_POSIX 301
99 #define OPT_QUIET 302
100 #define OPT_PREFIX 304
101 #define OPT_NO_ARCHIVE 305
102 #define OPT_ADD_TO_ARCHIVE 306
103 #define OPT_REPLACE 307
104 #define OPT_DELETE_FROM_ARCHIVE 308
105 #define OPT_LIST_ARCHIVE 309
106 #define OPT_LITTLE_ENDIAN 400
107 #define OPT_BIG_ENDIAN 401
108 #define OPT_NO_WARN 402
109 #define OPT_WARN 403
110 #define OPT_NO_HARD_LINKS 404
111 
112 /* Definitions of arguments for argp functions.  */
113 static const struct argp_option options[] =
114 {
115   { NULL, 0, NULL, 0, N_("Input Files:") },
116   { "charmap", 'f', N_("FILE"), 0,
117     N_("Symbolic character names defined in FILE") },
118   { "inputfile", 'i', N_("FILE"), 0,
119     N_("Source definitions are found in FILE") },
120   { "repertoire-map", 'u', N_("FILE"), 0,
121     N_("FILE contains mapping from symbolic names to UCS4 values") },
122 
123   { NULL, 0, NULL, 0, N_("Output control:") },
124   { "force", 'c', NULL, 0,
125     N_("Create output even if warning messages were issued") },
126   { "no-hard-links", OPT_NO_HARD_LINKS, NULL, 0,
127     N_("Do not create hard links between installed locales") },
128   { "prefix", OPT_PREFIX, N_("PATH"), 0, N_("Optional output file prefix") },
129   { "posix", OPT_POSIX, NULL, 0, N_("Strictly conform to POSIX") },
130   { "quiet", OPT_QUIET, NULL, 0,
131     N_("Suppress warnings and information messages") },
132   { "verbose", 'v', NULL, 0, N_("Print more messages") },
133   { "no-warnings", OPT_NO_WARN, N_("<warnings>"), 0,
134     N_("Comma-separated list of warnings to disable; "
135        "supported warnings are: ascii, intcurrsym") },
136   { "warnings", OPT_WARN, N_("<warnings>"), 0,
137     N_("Comma-separated list of warnings to enable; "
138        "supported warnings are: ascii, intcurrsym") },
139 
140   { NULL, 0, NULL, 0, N_("Archive control:") },
141   { "no-archive", OPT_NO_ARCHIVE, NULL, 0,
142     N_("Don't add new data to archive") },
143   { "add-to-archive", OPT_ADD_TO_ARCHIVE, NULL, 0,
144     N_("Add locales named by parameters to archive") },
145   { "replace", OPT_REPLACE, NULL, 0, N_("Replace existing archive content") },
146   { "delete-from-archive", OPT_DELETE_FROM_ARCHIVE, NULL, 0,
147     N_("Remove locales named by parameters from archive") },
148   { "list-archive", OPT_LIST_ARCHIVE, NULL, 0, N_("List content of archive") },
149   { "alias-file", 'A', N_("FILE"), 0,
150     N_("locale.alias file to consult when making archive")},
151   { "little-endian", OPT_LITTLE_ENDIAN, NULL, 0,
152     N_("Generate little-endian output") },
153   { "big-endian", OPT_BIG_ENDIAN, NULL, 0,
154     N_("Generate big-endian output") },
155   { NULL, 0, NULL, 0, NULL }
156 };
157 
158 /* Short description of program.  */
159 static const char doc[] = N_("Compile locale specification");
160 
161 /* Strings for arguments in help texts.  */
162 static const char args_doc[] = N_("\
163 NAME\n\
164 [--add-to-archive|--delete-from-archive] FILE...\n\
165 --list-archive [FILE]");
166 
167 /* Prototype for option handler.  */
168 static error_t parse_opt (int key, char *arg, struct argp_state *state);
169 
170 /* Function to print some extra text in the help message.  */
171 static char *more_help (int key, const char *text, void *input);
172 
173 /* Data structure to communicate with argp functions.  */
174 static struct argp argp =
175 {
176   options, parse_opt, args_doc, doc, NULL, more_help
177 };
178 
179 
180 /* Prototypes for local functions.  */
181 static void error_print (void);
182 static char *construct_output_path (char *path);
183 static char *normalize_codeset (const char *codeset, size_t name_len);
184 
185 
186 int
main(int argc,char * argv[])187 main (int argc, char *argv[])
188 {
189   char *output_path;
190   int cannot_write_why;
191   struct charmap_t *charmap;
192   struct localedef_t global;
193   int remaining;
194 
195   /* Set initial values for global variables.  */
196   copy_list = NULL;
197   posix_conformance = getenv ("POSIXLY_CORRECT") != NULL;
198   error_print_progname = error_print;
199 
200   /* Set locale.  Do not set LC_ALL because the other categories must
201      not be affected (according to POSIX.2).  */
202   setlocale (LC_MESSAGES, "");
203   setlocale (LC_CTYPE, "");
204 
205   /* Initialize the message catalog.  */
206   textdomain (_libc_intl_domainname);
207 
208   /* Parse and process arguments.  */
209   argp_err_exit_status = 4;
210   argp_parse (&argp, argc, argv, 0, &remaining, NULL);
211 
212   /* Handle a few special cases.  */
213   if (list_archive)
214     show_archive_content (remaining > 1 ? argv[remaining] : NULL, verbose);
215   if (add_to_archive)
216     return add_locales_to_archive (argc - remaining, &argv[remaining],
217 				   replace_archive);
218   if (delete_from_archive)
219     return delete_locales_from_archive (argc - remaining, &argv[remaining]);
220 
221   /* POSIX.2 requires to be verbose about missing characters in the
222      character map.  */
223   verbose |= posix_conformance;
224 
225   if (argc - remaining != 1)
226     {
227       /* We need exactly one non-option parameter.  */
228       argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
229 		 program_invocation_short_name);
230       exit (4);
231     }
232 
233   /* The parameter describes the output path of the constructed files.
234      If the described files cannot be written return a NULL pointer.
235      We don't free output_path because we will exit.  */
236   output_path  = construct_output_path (argv[remaining]);
237   if (output_path == NULL && ! no_archive)
238     error (4, errno, _("cannot create directory for output files"));
239   cannot_write_why = errno;
240 
241   /* Now that the parameters are processed we have to reset the local
242      ctype locale.  (P1003.2 4.35.5.2)  */
243   setlocale (LC_CTYPE, "POSIX");
244 
245   /* Look whether the system really allows locale definitions.  POSIX
246      defines error code 3 for this situation so I think it must be
247      a fatal error (see P1003.2 4.35.8).  */
248   if (sysconf (_SC_2_LOCALEDEF) < 0)
249     record_error (3, 0, _("\
250 FATAL: system does not define `_POSIX2_LOCALEDEF'"));
251 
252   /* Process charmap file.  */
253   charmap = charmap_read (charmap_file, verbose, 1, be_quiet, 1);
254 
255   /* Add the first entry in the locale list.  */
256   memset (&global, '\0', sizeof (struct localedef_t));
257   global.name = input_file ?: "/dev/stdin";
258   global.needed = ALL_LOCALES;
259   locales = &global;
260 
261   /* Now read the locale file.  */
262   if (locfile_read (&global, charmap) != 0)
263     record_error (4, errno, _("\
264 cannot open locale definition file `%s'"), input_file);
265 
266   /* Perhaps we saw some `copy' instructions.  */
267   while (1)
268     {
269       struct localedef_t *runp = locales;
270 
271       while (runp != NULL && (runp->needed & runp->avail) == runp->needed)
272 	runp = runp->next;
273 
274       if (runp == NULL)
275 	/* Everything read.  */
276 	break;
277 
278       if (locfile_read (runp, charmap) != 0)
279 	record_error (4, errno, _("\
280 cannot open locale definition file `%s'"), runp->name);
281     }
282 
283   /* Check the categories we processed in source form.  */
284   check_all_categories (locales, charmap);
285 
286   /* What we do next depends on the number of errors and warnings we
287      have generated in processing the input files.
288 
289      * No errors: Write the output file.
290 
291      * Some warnings: Write the output file and exit with status 1 to
292      indicate there may be problems using the output file e.g. missing
293      data that makes it difficult to use
294 
295      * Errors: We don't write the output file and we exit with status 4
296      to indicate no output files were written.
297 
298      The use of -c|--force writes the output file even if errors were
299      seen.  */
300   if (recorded_error_count == 0 || force_output != 0)
301     {
302       if (cannot_write_why != 0)
303 	record_error (4, cannot_write_why, _("\
304 cannot write output files to `%s'"), output_path ? : argv[remaining]);
305       else
306 	write_all_categories (locales, charmap, argv[remaining], output_path);
307     }
308   else
309     record_error (4, 0, _("\
310 no output file produced because errors were issued"));
311 
312   /* This exit status is prescribed by POSIX.2 4.35.7.  */
313   exit (recorded_warning_count != 0);
314 }
315 
316 /* Search warnings for matching warnings and if found enable those
317    warnings if ENABLED is true, otherwise disable the warnings.  */
318 static void
set_warnings(char * warnings,bool enabled)319 set_warnings (char *warnings, bool enabled)
320 {
321   char *tok = warnings;
322   char *copy = (char *) malloc (strlen (warnings) + 1);
323   char *save = copy;
324 
325   /* As we make a copy of the warnings list we remove all spaces from
326      the warnings list to make the processing a more robust.  We don't
327      support spaces in a warning name.  */
328   do
329     {
330       while (isspace (*tok) != 0)
331         tok++;
332     }
333   while ((*save++ = *tok++) != '\0');
334 
335   warnings = copy;
336 
337   /* Tokenize the input list of warnings to set, compare them to
338      known warnings, and set the warning.  We purposely ignore unknown
339      warnings, and are thus forward compatible, users can attempt to
340      disable whaterver new warnings they know about, but we will only
341      disable those *we* known about.  */
342   while ((tok = strtok_r (warnings, ",", &save)) != NULL)
343     {
344       warnings = NULL;
345       if (strcmp (tok, "ascii") == 0)
346 	warn_ascii = enabled;
347       else if (strcmp (tok, "intcurrsym") == 0)
348 	warn_int_curr_symbol = enabled;
349     }
350 
351   free (copy);
352 }
353 
354 /* Handle program arguments.  */
355 static error_t
parse_opt(int key,char * arg,struct argp_state * state)356 parse_opt (int key, char *arg, struct argp_state *state)
357 {
358   switch (key)
359     {
360     case OPT_QUIET:
361       be_quiet = 1;
362       break;
363     case OPT_POSIX:
364       posix_conformance = 1;
365       break;
366     case OPT_PREFIX:
367       output_prefix = arg;
368       break;
369     case OPT_NO_ARCHIVE:
370       no_archive = true;
371       break;
372     case OPT_ADD_TO_ARCHIVE:
373       add_to_archive = true;
374       break;
375     case OPT_REPLACE:
376       replace_archive = true;
377       break;
378     case OPT_DELETE_FROM_ARCHIVE:
379       delete_from_archive = true;
380       break;
381     case OPT_LIST_ARCHIVE:
382       list_archive = true;
383       break;
384     case OPT_LITTLE_ENDIAN:
385       set_big_endian (false);
386       break;
387     case OPT_BIG_ENDIAN:
388       set_big_endian (true);
389       break;
390     case OPT_NO_WARN:
391       /* Disable the warnings.  */
392       set_warnings (arg, false);
393       break;
394     case OPT_WARN:
395       /* Enable the warnings.  */
396       set_warnings (arg, true);
397       break;
398     case OPT_NO_HARD_LINKS:
399       /* Do not hard link to other locales.  */
400       hard_links = false;
401       break;
402     case 'c':
403       force_output = 1;
404       break;
405     case 'f':
406       charmap_file = arg;
407       break;
408     case 'A':
409       alias_file = arg;
410       break;
411     case 'i':
412       input_file = arg;
413       break;
414     case 'u':
415       repertoire_global = arg;
416       break;
417     case 'v':
418       verbose = 1;
419       break;
420     default:
421       return ARGP_ERR_UNKNOWN;
422     }
423   return 0;
424 }
425 
426 
427 static char *
more_help(int key,const char * text,void * input)428 more_help (int key, const char *text, void *input)
429 {
430   char *cp;
431   char *tp;
432 
433   switch (key)
434     {
435     case ARGP_KEY_HELP_EXTRA:
436       /* We print some extra information.  */
437       tp = xasprintf (gettext ("\
438 For bug reporting instructions, please see:\n\
439 %s.\n"), REPORT_BUGS_TO);
440       cp = xasprintf (gettext ("\
441 System's directory for character maps : %s\n\
442 		       repertoire maps: %s\n\
443 		       locale path    : %s\n\
444 %s"),
445 		    CHARMAP_PATH, REPERTOIREMAP_PATH, LOCALE_PATH, tp);
446       free (tp);
447       return cp;
448     default:
449       break;
450     }
451   return (char *) text;
452 }
453 
454 /* Print the version information.  */
455 static void
print_version(FILE * stream,struct argp_state * state)456 print_version (FILE *stream, struct argp_state *state)
457 {
458   fprintf (stream, "localedef %s%s\n", PKGVERSION, VERSION);
459   fprintf (stream, gettext ("\
460 Copyright (C) %s Free Software Foundation, Inc.\n\
461 This is free software; see the source for copying conditions.  There is NO\n\
462 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
463 "), "2022");
464   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
465 }
466 
467 
468 /* The address of this function will be assigned to the hook in the error
469    functions.  */
470 static void
error_print(void)471 error_print (void)
472 {
473 }
474 
475 
476 /* The parameter to localedef describes the output path.  If it does contain a
477    '/' character it is a relative path.  Otherwise it names the locale this
478    definition is for.   The returned path must be freed by the caller. */
479 static char *
construct_output_path(char * path)480 construct_output_path (char *path)
481 {
482   char *result;
483 
484   if (strchr (path, '/') == NULL)
485     {
486       /* This is a system path.  First examine whether the locale name
487 	 contains a reference to the codeset.  This should be
488 	 normalized.  */
489       char *startp;
490       char *endp = NULL;
491       char *normal = NULL;
492 
493       startp = path;
494       /* Either we have a '@' which starts a CEN name or '.' which starts the
495 	 codeset specification.  The CEN name starts with '@' and may also have
496 	 a codeset specification, but we do not normalize the string after '@'.
497 	 If we only find the codeset specification then we normalize only the codeset
498 	 specification (but not anything after a subsequent '@').  */
499       while (*startp != '\0' && *startp != '@' && *startp != '.')
500 	++startp;
501       if (*startp == '.')
502 	{
503 	  /* We found a codeset specification.  Now find the end.  */
504 	  endp = ++startp;
505 
506 	  /* Stop at the first '@', and don't normalize anything past that.  */
507 	  while (*endp != '\0' && *endp != '@')
508 	    ++endp;
509 
510 	  if (endp > startp)
511 	    normal = normalize_codeset (startp, endp - startp);
512 	}
513 
514       if (normal == NULL)
515 	result = xasprintf ("%s%s/%s/", output_prefix ?: "",
516 			    COMPLOCALEDIR, path);
517       else
518 	result = xasprintf ("%s%s/%.*s%s%s/",
519 			    output_prefix ?: "", COMPLOCALEDIR,
520 			    (int) (startp - path), path, normal, endp ?: "");
521       /* Free the allocated normalized codeset name.  */
522       free (normal);
523     }
524   else
525     {
526       /* This is a user path.  */
527       result = xasprintf ("%s/", path);
528 
529       /* If the user specified an output path we cannot add the output
530 	 to the archive.  */
531       no_archive = true;
532     }
533 
534   errno = 0;
535 
536   if (no_archive && euidaccess (result, W_OK) == -1)
537     {
538       /* Perhaps the directory does not exist now.  Try to create it.  */
539       if (errno == ENOENT)
540 	{
541 	  errno = 0;
542 	  if (mkdir (result, 0777) < 0)
543 	    {
544 	      record_verbose (stderr,
545 			      _("cannot create output path \'%s\': %s"),
546 			      result, strerror (errno));
547 	      free (result);
548 	      return NULL;
549 	    }
550 	}
551       else
552 	record_verbose (stderr,
553 			_("no write permission to output path \'%s\': %s"),
554 			result, strerror (errno));
555     }
556 
557   return result;
558 }
559 
560 
561 /* Normalize codeset name.  There is no standard for the codeset names.
562    Normalization allows the user to use any of the common names e.g. UTF-8,
563    utf-8, utf8, UTF8 etc.
564 
565    We normalize using the following rules:
566    - Remove all non-alpha-numeric characters
567    - Lowercase all characters.
568    - If there are only digits assume it's an ISO standard and prefix with 'iso'
569 
570    We return the normalized string which needs to be freed by free.  */
571 static char *
normalize_codeset(const char * codeset,size_t name_len)572 normalize_codeset (const char *codeset, size_t name_len)
573 {
574   int len = 0;
575   int only_digit = 1;
576   char *retval;
577   char *wp;
578   size_t cnt;
579 
580   /* Compute the length of only the alpha-numeric characters.  */
581   for (cnt = 0; cnt < name_len; ++cnt)
582     if (isalnum (codeset[cnt]))
583       {
584 	++len;
585 
586 	if (isalpha (codeset[cnt]))
587 	  only_digit = 0;
588       }
589 
590   /* If there were only digits we assume it's an ISO standard and we will
591      prefix with 'iso' so include space for that.  We fill in the required
592      space from codeset up to the converted length.  */
593   wp = retval = xasprintf ("%s%.*s", only_digit ? "iso" : "", len, codeset);
594 
595   /* Skip "iso".  */
596   if (only_digit)
597     wp += 3;
598 
599   /* Lowercase all characters. */
600   for (cnt = 0; cnt < name_len; ++cnt)
601     if (isalpha (codeset[cnt]))
602       *wp++ = tolower (codeset[cnt]);
603     else if (isdigit (codeset[cnt]))
604       *wp++ = codeset[cnt];
605 
606   /* Return allocated and converted name for caller to free.  */
607   return retval;
608 }
609 
610 
611 struct localedef_t *
add_to_readlist(int category,const char * name,const char * repertoire_name,int generate,struct localedef_t * copy_locale)612 add_to_readlist (int category, const char *name, const char *repertoire_name,
613 		 int generate, struct localedef_t *copy_locale)
614 {
615   struct localedef_t *runp = locales;
616 
617   while (runp != NULL && strcmp (name, runp->name) != 0)
618     runp = runp->next;
619 
620   if (runp == NULL)
621     {
622       /* Add a new entry at the end.  */
623       struct localedef_t *newp;
624 
625       assert (generate == 1);
626 
627       newp = xcalloc (1, sizeof (struct localedef_t));
628       newp->name = name;
629       newp->repertoire_name = repertoire_name;
630 
631       if (locales == NULL)
632 	runp = locales = newp;
633       else
634 	{
635 	  runp = locales;
636 	  while (runp->next != NULL)
637 	    runp = runp->next;
638 	  runp = runp->next = newp;
639 	}
640     }
641 
642   if (generate
643       && (runp->needed & (1 << category)) != 0
644       && (runp->avail & (1 << category)) == 0)
645     record_error (5, 0, _("\
646 circular dependencies between locale definitions"));
647 
648   if (copy_locale != NULL)
649     {
650       if (runp->categories[category].generic != NULL)
651 	record_error (5, 0, _("\
652 cannot add already read locale `%s' a second time"), name);
653       else
654 	runp->categories[category].generic =
655 	  copy_locale->categories[category].generic;
656     }
657 
658   runp->needed |= 1 << category;
659 
660   return runp;
661 }
662 
663 
664 struct localedef_t *
find_locale(int category,const char * name,const char * repertoire_name,const struct charmap_t * charmap)665 find_locale (int category, const char *name, const char *repertoire_name,
666 	     const struct charmap_t *charmap)
667 {
668   struct localedef_t *result;
669 
670   /* Find the locale, but do not generate it since this would be a bug.  */
671   result = add_to_readlist (category, name, repertoire_name, 0, NULL);
672 
673   assert (result != NULL);
674 
675   if ((result->avail & (1 << category)) == 0
676       && locfile_read (result, charmap) != 0)
677     record_error (4, errno, _("\
678 cannot open locale definition file `%s'"), result->name);
679 
680   return result;
681 }
682 
683 
684 struct localedef_t *
load_locale(int category,const char * name,const char * repertoire_name,const struct charmap_t * charmap,struct localedef_t * copy_locale)685 load_locale (int category, const char *name, const char *repertoire_name,
686 	     const struct charmap_t *charmap, struct localedef_t *copy_locale)
687 {
688   struct localedef_t *result;
689 
690   /* Generate the locale if it does not exist.  */
691   result = add_to_readlist (category, name, repertoire_name, 1, copy_locale);
692 
693   assert (result != NULL);
694 
695   if ((result->avail & (1 << category)) == 0
696       && locfile_read (result, charmap) != 0)
697     record_error (4, errno, _("\
698 cannot open locale definition file `%s'"), result->name);
699 
700   return result;
701 }
702