1 /* Declarations for internal libc locale interfaces
2    Copyright (C) 1995-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #ifndef _LOCALEINFO_H
20 #define _LOCALEINFO_H 1
21 
22 #include <stddef.h>
23 #include <langinfo.h>
24 #include <limits.h>
25 #include <locale.h>
26 #include <time.h>
27 #include <stdint.h>
28 #include <sys/types.h>
29 
30 #include <intl/loadinfo.h>	/* For loaded_l10nfile definition.  */
31 
32 /* Magic number at the beginning of a locale data file for CATEGORY.  */
33 #define	LIMAGIC(category) \
34   (category == LC_COLLATE						\
35    ? ((unsigned int) (0x20051014 ^ (category)))				\
36    : category == LC_CTYPE						\
37    ? ((unsigned int) (0x20090720 ^ (category)))				\
38    : ((unsigned int) (0x20031115 ^ (category))))
39 
40 /* Two special weight constants for the collation data.  */
41 #define IGNORE_CHAR	2
42 
43 /* We use a special value for the usage counter in `__locale_data' to
44    signal that this data must never be removed anymore.  */
45 #define MAX_USAGE_COUNT (UINT_MAX - 1)
46 #define UNDELETABLE	UINT_MAX
47 
48 /* Structure describing locale data in core for a category.  */
49 struct __locale_data
50 {
51   const char *name;
52   const char *filedata;		/* Region mapping the file data.  */
53   off_t filesize;		/* Size of the file (and the region).  */
54   enum				/* Flavor of storage used for those.  */
55   {
56     ld_malloced,		/* Both are malloc'd.  */
57     ld_mapped,			/* name is malloc'd, filedata mmap'd */
58     ld_archive			/* Both point into mmap'd archive regions.  */
59   } alloc;
60 
61   /* This provides a slot for category-specific code to cache data
62      computed about this locale.  Type of the data pointed to:
63 
64      LC_CTYPE   struct lc_ctype_data (_nl_intern_locale_data)
65      LC_TIME    struct lc_time_data (_nl_init_alt_digit, _nl_init_era_entries)
66 
67      This data deallocated at the start of _nl_unload_locale.  */
68   void *private;
69 
70   unsigned int usage_count;	/* Counter for users.  */
71 
72   int use_translit;		/* Nonzero if the mb*towv*() and wc*tomb()
73 				   functions should use transliteration.  */
74 
75   unsigned int nstrings;	/* Number of strings below.  */
76   union locale_data_value
77   {
78     const uint32_t *wstr;
79     const char *string;
80     unsigned int word;		/* Note endian issues vs 64-bit pointers.  */
81   }
82   values __flexarr;	/* Items, usually pointers into `filedata'.  */
83 };
84 
85 /* This alignment is used for 32-bit integers in locale files, both
86    those that are explicitly int32_t or uint32_t and those that are
87    wchar_t, regardless of the (possibly smaller) alignment required
88    for such integers on a particular host.  */
89 #define LOCFILE_ALIGN		sizeof (int32_t)
90 #define LOCFILE_ALIGN_MASK	(LOCFILE_ALIGN - 1)
91 #define LOCFILE_ALIGN_UP(x)	(((x) + LOCFILE_ALIGN - 1)	\
92 				 & ~LOCFILE_ALIGN_MASK)
93 #define LOCFILE_ALIGNED_P(x)	(((x) & LOCFILE_ALIGN_MASK) == 0)
94 
95 /* We know three kinds of collation sorting rules.  */
96 enum coll_sort_rule
97 {
98   illegal_0__,
99   sort_forward,
100   sort_backward,
101   illegal_3__,
102   sort_position,
103   sort_forward_position,
104   sort_backward_position,
105   sort_mask
106 };
107 
108 /* We can map the types of the entries into a few categories.  */
109 enum value_type
110 {
111   none,
112   string,
113   stringarray,
114   byte,
115   bytearray,
116   word,
117   stringlist,
118   wordarray,
119   wstring,
120   wstringarray,
121   wstringlist
122 };
123 
124 
125 /* Definitions for `era' information from LC_TIME.  */
126 #define ERA_NAME_FORMAT_MEMBERS 4
127 #define ERA_M_NAME   0
128 #define ERA_M_FORMAT 1
129 #define ERA_W_NAME   2
130 #define ERA_W_FORMAT 3
131 
132 
133 /* Structure to access `era' information from LC_TIME.  */
134 struct era_entry
135 {
136   uint32_t direction;		/* Contains '+' or '-'.  */
137   int32_t offset;
138   int32_t start_date[3];
139   int32_t stop_date[3];
140   const char *era_name;
141   const char *era_format;
142   const wchar_t *era_wname;
143   const wchar_t *era_wformat;
144   int absolute_direction;
145   /* absolute direction:
146      +1 indicates that year number is higher in the future. (like A.D.)
147      -1 indicates that year number is higher in the past. (like B.C.)  */
148 };
149 
150 /* Structure caching computed data about information from LC_TIME.
151    The `private.time' member of `struct __locale_data' points to this.  */
152 struct lc_time_data
153 {
154   struct era_entry *eras;
155   size_t num_eras;
156   int era_initialized;
157 
158   const char **alt_digits;
159   const wchar_t **walt_digits;
160   int alt_digits_initialized;
161   int walt_digits_initialized;
162 };
163 
164 /* Ancillary data for LC_CTYPE.  Co-allocated after struct
165    __locale_data by _nl_intern_locale_data.  */
166 struct lc_ctype_data
167 {
168   /* See get_gconv_fcts and __wcsmbs_load_conv.  */
169   const struct gconv_fcts *fcts;
170 
171   /* If false, outdigit just maps to the ASCII digits.  */
172   bool outdigit_translation_needed;
173 
174   /* Cached multi-byte string lengths.  This could be added to the
175      locale data itself if the format is changed (which impacts
176      existing statically linked binaries).  */
177 
178   /* For the outdigit decimal digits (copied from LC_CTYPE).  */
179   unsigned char outdigit_bytes[10];
180 
181   /* If all outdigit_bytes elements are equal, this is that value,
182      otherwise it is 0.  */
183   unsigned char outdigit_bytes_all_equal;
184 };
185 
186 /* LC_CTYPE specific:
187    Hardwired indices for standard wide character translation mappings.  */
188 enum
189 {
190   __TOW_toupper = 0,
191   __TOW_tolower = 1
192 };
193 
194 
195 /* LC_CTYPE specific:
196    Access a wide character class with a single character index.
197    _ISCTYPE (c, desc) = iswctype (btowc (c), desc).
198    c must be an `unsigned char'.  desc must be a nonzero wctype_t.  */
199 #define _ISCTYPE(c, desc) \
200   (((((const uint32_t *) (desc)) - 8)[(c) >> 5] >> ((c) & 0x1f)) & 1)
201 
202 /* Category name handling variables.  Concatenate all the strings in a
203    single object to minimize relocations.  Individual strings can be
204    accessed using _nl_category_names.  */
205 #define CATNAMEMF(line) CATNAMEMF1 (line)
206 #define CATNAMEMF1(line) str##line
207 extern const struct catnamestr_t
208 {
209 #define DEFINE_CATEGORY(category, category_name, items, a) \
210   char CATNAMEMF (__LINE__)[sizeof (category_name)];
211 #include "categories.def"
212 #undef DEFINE_CATEGORY
213 } _nl_category_names attribute_hidden;
214 extern const uint8_t _nl_category_name_idxs[__LC_LAST] attribute_hidden;
215 extern const uint8_t _nl_category_name_sizes[__LC_LAST] attribute_hidden;
216 
217 /* Return the name of the category INDEX, which must be nonnegative
218    and less than _LC_LAST.  */
219 static inline const char *
_nl_category_names_get(int index)220 _nl_category_names_get (int index)
221 {
222   return (const char *) &_nl_category_names + _nl_category_name_idxs[index];
223 }
224 
225 /* Name of the standard locales.  */
226 extern const char _nl_C_name[] attribute_hidden;
227 extern const char _nl_POSIX_name[] attribute_hidden;
228 
229 /* The standard codeset.  */
230 extern const char _nl_C_codeset[] attribute_hidden;
231 
232 /* This is the internal locale_t object that holds the global locale
233    controlled by calls to setlocale.  A thread's TSD locale pointer
234    points to this when `uselocale (LC_GLOBAL_LOCALE)' is in effect.  */
235 extern struct __locale_struct _nl_global_locale attribute_hidden;
236 
237 /* This fetches the thread-local locale_t pointer, either one set with
238    uselocale or &_nl_global_locale.  */
239 #define _NL_CURRENT_LOCALE	(__libc_tsd_get (locale_t, LOCALE))
240 #include <libc-tsd.h>
__libc_tsd_define(extern,locale_t,LOCALE)241 __libc_tsd_define (extern, locale_t, LOCALE)
242 
243 
244 /* For static linking it is desireable to avoid always linking in the code
245    and data for every category when we can tell at link time that they are
246    unused.  We can manage this playing some tricks with weak references.
247    But with thread-local locale settings, it becomes quite ungainly unless
248    we can use __thread variables.  So only in that case do we attempt this.  */
249 #ifndef SHARED
250 # include <tls.h>
251 # define NL_CURRENT_INDIRECT	1
252 #endif
253 
254 #ifdef NL_CURRENT_INDIRECT
255 
256 /* For each category declare the thread-local variable for the current
257    locale data.  This has an extra indirection so it points at the
258    __locales[CATEGORY] element in either _nl_global_locale or the current
259    locale object set by uselocale, which points at the actual data.  The
260    reason for having these variables is so that references to particular
261    categories will link in the lc-CATEGORY.c module to define this symbol,
262    and we arrange that linking that module is what brings in all the code
263    associated with this category.  */
264 #define DEFINE_CATEGORY(category, category_name, items, a) \
265 extern __thread struct __locale_data *const *_nl_current_##category \
266   attribute_hidden attribute_tls_model_ie;
267 #include "categories.def"
268 #undef	DEFINE_CATEGORY
269 
270 /* Return a pointer to the current `struct __locale_data' for CATEGORY.  */
271 #define _NL_CURRENT_DATA(category)	(*_nl_current_##category)
272 
273 /* Extract the current CATEGORY locale's string for ITEM.  */
274 #define _NL_CURRENT(category, item) \
275   ((*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].string)
276 
277 /* Extract the current CATEGORY locale's string for ITEM.  */
278 #define _NL_CURRENT_WSTR(category, item) \
279   ((wchar_t *) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].wstr)
280 
281 /* Extract the current CATEGORY locale's word for ITEM.  */
282 #define _NL_CURRENT_WORD(category, item) \
283   ((uint32_t) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].word)
284 
285 /* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY.  The symbol
286    _nl_current_CATEGORY_used is set to a value unequal to zero to mark this
287    category as used.  On S390 the used relocation to load the symbol address
288    can only handle even addresses.  */
289 #define _NL_CURRENT_DEFINE(category) \
290   __thread struct __locale_data *const *_nl_current_##category \
291     attribute_hidden = &_nl_global_locale.__locales[category]; \
292   asm (".globl " __SYMBOL_PREFIX "_nl_current_" #category "_used\n" \
293        _NL_CURRENT_DEFINE_ABS (_nl_current_##category##_used, 2));
294 #ifdef HAVE_ASM_SET_DIRECTIVE
295 # define _NL_CURRENT_DEFINE_ABS(sym, val) ".set " #sym ", " #val
296 #else
297 # define _NL_CURRENT_DEFINE_ABS(sym, val) #sym " = " #val
298 #endif
299 
300 #else
301 
302 /* All categories are always loaded in the shared library, so there is no
303    point in having lots of separate symbols for linking.  */
304 
305 /* Return a pointer to the current `struct __locale_data' for CATEGORY.  */
306 # define _NL_CURRENT_DATA(category) \
307   (_NL_CURRENT_LOCALE->__locales[category])
308 
309 /* Extract the current CATEGORY locale's string for ITEM.  */
310 # define _NL_CURRENT(category, item) \
311   (_NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].string)
312 
313 /* Extract the current CATEGORY locale's string for ITEM.  */
314 # define _NL_CURRENT_WSTR(category, item) \
315   ((wchar_t *) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].wstr)
316 
317 /* Extract the current CATEGORY locale's word for ITEM.  */
318 # define _NL_CURRENT_WORD(category, item) \
319   ((uint32_t) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].word)
320 
321 /* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY.  */
322 # define _NL_CURRENT_DEFINE(category) \
323   /* No per-category variable here. */
324 
325 #endif
326 
327 /* Extract CATEGORY locale's string for ITEM.  */
328 static inline const char *
329 _nl_lookup (locale_t l, int category, int item)
330 {
331   return l->__locales[category]->values[_NL_ITEM_INDEX (item)].string;
332 }
333 
334 /* Extract CATEGORY locale's wide string for ITEM.  */
335 static inline const wchar_t *
_nl_lookup_wstr(locale_t l,int category,int item)336 _nl_lookup_wstr (locale_t l, int category, int item)
337 {
338   return (wchar_t *) l->__locales[category]
339     ->values[_NL_ITEM_INDEX (item)].wstr;
340 }
341 
342 /* Extract the CATEGORY locale's word for ITEM.  */
343 static inline uint32_t
_nl_lookup_word(locale_t l,int category,int item)344 _nl_lookup_word (locale_t l, int category, int item)
345 {
346   return l->__locales[category]->values[_NL_ITEM_INDEX (item)].word;
347 }
348 
349 /* Default search path if no LOCPATH environment variable.  */
350 extern const char _nl_default_locale_path[] attribute_hidden;
351 
352 /* Load the locale data for CATEGORY from the file specified by *NAME.
353    If *NAME is "", use environment variables as specified by POSIX, and
354    fill in *NAME with the actual name used.  If LOCALE_PATH is not null,
355    those directories are searched for the locale files.  If it's null,
356    the locale archive is checked first and then _nl_default_locale_path
357    is searched for locale files.  */
358 extern struct __locale_data *_nl_find_locale (const char *locale_path,
359 					      size_t locale_path_len,
360 					      int category, const char **name)
361      attribute_hidden;
362 
363 /* Try to load the file described by FILE.  */
364 extern void _nl_load_locale (struct loaded_l10nfile *file, int category)
365      attribute_hidden;
366 
367 /* Free all resource.  */
368 extern void _nl_unload_locale (int category, struct __locale_data *locale)
369   attribute_hidden;
370 
371 /* Free the locale and give back all memory if the usage count is one.  */
372 extern void _nl_remove_locale (int locale, struct __locale_data *data)
373      attribute_hidden;
374 
375 /* Find the locale *NAMEP in the locale archive, and return the
376    internalized data structure for its CATEGORY data.  If this locale has
377    already been loaded from the archive, just returns the existing data
378    structure.  If successful, sets *NAMEP to point directly into the mapped
379    archive string table; that way, the next call can short-circuit strcmp.  */
380 extern struct __locale_data *_nl_load_locale_from_archive (int category,
381 							   const char **namep)
382      attribute_hidden;
383 
384 /* Subroutine of setlocale's __libc_subfreeres hook.  */
385 extern void _nl_archive_subfreeres (void) attribute_hidden;
386 
387 /* Subroutine of gconv-db's __libc_subfreeres hook.  */
388 extern void _nl_locale_subfreeres (void) attribute_hidden;
389 
390 /* Validate the contents of a locale file and set up the in-core
391    data structure to point into the data.  This leaves the `alloc'
392    and `name' fields uninitialized, for the caller to fill in.
393    If any bogons are detected in the data, this will refuse to
394    intern it, and return a null pointer instead.  */
395 extern struct __locale_data *_nl_intern_locale_data (int category,
396 						     const void *data,
397 						     size_t datasize)
398      attribute_hidden;
399 
400 
401 /* Return `era' entry which corresponds to TP.  Used in strftime.  */
402 extern struct era_entry *_nl_get_era_entry (const struct tm *tp,
403 					    struct __locale_data *lc_time)
404      attribute_hidden;
405 
406 /* Return `era' cnt'th entry .  Used in strptime.  */
407 extern struct era_entry *_nl_select_era_entry (int cnt,
408 					       struct __locale_data *lc_time)
409 	  attribute_hidden;
410 
411 /* Return `alt_digit' which corresponds to NUMBER.  Used in strftime.  */
412 extern const char *_nl_get_alt_digit (unsigned int number,
413 				      struct __locale_data *lc_time)
414 	  attribute_hidden;
415 
416 /* Similar, but now for wide characters.  */
417 extern const wchar_t *_nl_get_walt_digit (unsigned int number,
418 					  struct __locale_data *lc_time)
419      attribute_hidden;
420 
421 /* Parse string as alternative digit and return numeric value.  */
422 extern int _nl_parse_alt_digit (const char **strp,
423 				struct __locale_data *lc_time)
424      attribute_hidden;
425 
426 /* Postload processing.  */
427 extern void _nl_postload_ctype (void);
428 
429 /* Deallocate category-specific data.  Used in _nl_unload_locale.  */
430 extern void _nl_cleanup_ctype (struct __locale_data *) attribute_hidden;
431 extern void _nl_cleanup_time (struct __locale_data *) attribute_hidden;
432 
433 
434 #endif	/* localeinfo.h */
435