1 /* Declarations for internal libc locale interfaces
2 Copyright (C) 1995-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #ifndef _LOCALEINFO_H
20 #define _LOCALEINFO_H 1
21
22 #include <stddef.h>
23 #include <langinfo.h>
24 #include <limits.h>
25 #include <locale.h>
26 #include <time.h>
27 #include <stdint.h>
28 #include <sys/types.h>
29
30 #include <intl/loadinfo.h> /* For loaded_l10nfile definition. */
31
32 /* Magic number at the beginning of a locale data file for CATEGORY. */
33 #define LIMAGIC(category) \
34 (category == LC_COLLATE \
35 ? ((unsigned int) (0x20051014 ^ (category))) \
36 : category == LC_CTYPE \
37 ? ((unsigned int) (0x20090720 ^ (category))) \
38 : ((unsigned int) (0x20031115 ^ (category))))
39
40 /* Two special weight constants for the collation data. */
41 #define IGNORE_CHAR 2
42
43 /* We use a special value for the usage counter in `__locale_data' to
44 signal that this data must never be removed anymore. */
45 #define MAX_USAGE_COUNT (UINT_MAX - 1)
46 #define UNDELETABLE UINT_MAX
47
48 /* Structure describing locale data in core for a category. */
49 struct __locale_data
50 {
51 const char *name;
52 const char *filedata; /* Region mapping the file data. */
53 off_t filesize; /* Size of the file (and the region). */
54 enum /* Flavor of storage used for those. */
55 {
56 ld_malloced, /* Both are malloc'd. */
57 ld_mapped, /* name is malloc'd, filedata mmap'd */
58 ld_archive /* Both point into mmap'd archive regions. */
59 } alloc;
60
61 /* This provides a slot for category-specific code to cache data
62 computed about this locale. Type of the data pointed to:
63
64 LC_CTYPE struct lc_ctype_data (_nl_intern_locale_data)
65 LC_TIME struct lc_time_data (_nl_init_alt_digit, _nl_init_era_entries)
66
67 This data deallocated at the start of _nl_unload_locale. */
68 void *private;
69
70 unsigned int usage_count; /* Counter for users. */
71
72 int use_translit; /* Nonzero if the mb*towv*() and wc*tomb()
73 functions should use transliteration. */
74
75 unsigned int nstrings; /* Number of strings below. */
76 union locale_data_value
77 {
78 const uint32_t *wstr;
79 const char *string;
80 unsigned int word; /* Note endian issues vs 64-bit pointers. */
81 }
82 values __flexarr; /* Items, usually pointers into `filedata'. */
83 };
84
85 /* This alignment is used for 32-bit integers in locale files, both
86 those that are explicitly int32_t or uint32_t and those that are
87 wchar_t, regardless of the (possibly smaller) alignment required
88 for such integers on a particular host. */
89 #define LOCFILE_ALIGN sizeof (int32_t)
90 #define LOCFILE_ALIGN_MASK (LOCFILE_ALIGN - 1)
91 #define LOCFILE_ALIGN_UP(x) (((x) + LOCFILE_ALIGN - 1) \
92 & ~LOCFILE_ALIGN_MASK)
93 #define LOCFILE_ALIGNED_P(x) (((x) & LOCFILE_ALIGN_MASK) == 0)
94
95 /* We know three kinds of collation sorting rules. */
96 enum coll_sort_rule
97 {
98 illegal_0__,
99 sort_forward,
100 sort_backward,
101 illegal_3__,
102 sort_position,
103 sort_forward_position,
104 sort_backward_position,
105 sort_mask
106 };
107
108 /* We can map the types of the entries into a few categories. */
109 enum value_type
110 {
111 none,
112 string,
113 stringarray,
114 byte,
115 bytearray,
116 word,
117 stringlist,
118 wordarray,
119 wstring,
120 wstringarray,
121 wstringlist
122 };
123
124
125 /* Definitions for `era' information from LC_TIME. */
126 #define ERA_NAME_FORMAT_MEMBERS 4
127 #define ERA_M_NAME 0
128 #define ERA_M_FORMAT 1
129 #define ERA_W_NAME 2
130 #define ERA_W_FORMAT 3
131
132
133 /* Structure to access `era' information from LC_TIME. */
134 struct era_entry
135 {
136 uint32_t direction; /* Contains '+' or '-'. */
137 int32_t offset;
138 int32_t start_date[3];
139 int32_t stop_date[3];
140 const char *era_name;
141 const char *era_format;
142 const wchar_t *era_wname;
143 const wchar_t *era_wformat;
144 int absolute_direction;
145 /* absolute direction:
146 +1 indicates that year number is higher in the future. (like A.D.)
147 -1 indicates that year number is higher in the past. (like B.C.) */
148 };
149
150 /* Structure caching computed data about information from LC_TIME.
151 The `private.time' member of `struct __locale_data' points to this. */
152 struct lc_time_data
153 {
154 struct era_entry *eras;
155 size_t num_eras;
156 int era_initialized;
157
158 const char **alt_digits;
159 const wchar_t **walt_digits;
160 int alt_digits_initialized;
161 int walt_digits_initialized;
162 };
163
164 /* Ancillary data for LC_CTYPE. Co-allocated after struct
165 __locale_data by _nl_intern_locale_data. */
166 struct lc_ctype_data
167 {
168 /* See get_gconv_fcts and __wcsmbs_load_conv. */
169 const struct gconv_fcts *fcts;
170
171 /* If false, outdigit just maps to the ASCII digits. */
172 bool outdigit_translation_needed;
173
174 /* Cached multi-byte string lengths. This could be added to the
175 locale data itself if the format is changed (which impacts
176 existing statically linked binaries). */
177
178 /* For the outdigit decimal digits (copied from LC_CTYPE). */
179 unsigned char outdigit_bytes[10];
180
181 /* If all outdigit_bytes elements are equal, this is that value,
182 otherwise it is 0. */
183 unsigned char outdigit_bytes_all_equal;
184 };
185
186 /* LC_CTYPE specific:
187 Hardwired indices for standard wide character translation mappings. */
188 enum
189 {
190 __TOW_toupper = 0,
191 __TOW_tolower = 1
192 };
193
194
195 /* LC_CTYPE specific:
196 Access a wide character class with a single character index.
197 _ISCTYPE (c, desc) = iswctype (btowc (c), desc).
198 c must be an `unsigned char'. desc must be a nonzero wctype_t. */
199 #define _ISCTYPE(c, desc) \
200 (((((const uint32_t *) (desc)) - 8)[(c) >> 5] >> ((c) & 0x1f)) & 1)
201
202 /* Category name handling variables. Concatenate all the strings in a
203 single object to minimize relocations. Individual strings can be
204 accessed using _nl_category_names. */
205 #define CATNAMEMF(line) CATNAMEMF1 (line)
206 #define CATNAMEMF1(line) str##line
207 extern const struct catnamestr_t
208 {
209 #define DEFINE_CATEGORY(category, category_name, items, a) \
210 char CATNAMEMF (__LINE__)[sizeof (category_name)];
211 #include "categories.def"
212 #undef DEFINE_CATEGORY
213 } _nl_category_names attribute_hidden;
214 extern const uint8_t _nl_category_name_idxs[__LC_LAST] attribute_hidden;
215 extern const uint8_t _nl_category_name_sizes[__LC_LAST] attribute_hidden;
216
217 /* Return the name of the category INDEX, which must be nonnegative
218 and less than _LC_LAST. */
219 static inline const char *
_nl_category_names_get(int index)220 _nl_category_names_get (int index)
221 {
222 return (const char *) &_nl_category_names + _nl_category_name_idxs[index];
223 }
224
225 /* Name of the standard locales. */
226 extern const char _nl_C_name[] attribute_hidden;
227 extern const char _nl_POSIX_name[] attribute_hidden;
228
229 /* The standard codeset. */
230 extern const char _nl_C_codeset[] attribute_hidden;
231
232 /* This is the internal locale_t object that holds the global locale
233 controlled by calls to setlocale. A thread's TSD locale pointer
234 points to this when `uselocale (LC_GLOBAL_LOCALE)' is in effect. */
235 extern struct __locale_struct _nl_global_locale attribute_hidden;
236
237 /* This fetches the thread-local locale_t pointer, either one set with
238 uselocale or &_nl_global_locale. */
239 #define _NL_CURRENT_LOCALE (__libc_tsd_get (locale_t, LOCALE))
240 #include <libc-tsd.h>
__libc_tsd_define(extern,locale_t,LOCALE)241 __libc_tsd_define (extern, locale_t, LOCALE)
242
243
244 /* For static linking it is desireable to avoid always linking in the code
245 and data for every category when we can tell at link time that they are
246 unused. We can manage this playing some tricks with weak references.
247 But with thread-local locale settings, it becomes quite ungainly unless
248 we can use __thread variables. So only in that case do we attempt this. */
249 #ifndef SHARED
250 # include <tls.h>
251 # define NL_CURRENT_INDIRECT 1
252 #endif
253
254 #ifdef NL_CURRENT_INDIRECT
255
256 /* For each category declare the thread-local variable for the current
257 locale data. This has an extra indirection so it points at the
258 __locales[CATEGORY] element in either _nl_global_locale or the current
259 locale object set by uselocale, which points at the actual data. The
260 reason for having these variables is so that references to particular
261 categories will link in the lc-CATEGORY.c module to define this symbol,
262 and we arrange that linking that module is what brings in all the code
263 associated with this category. */
264 #define DEFINE_CATEGORY(category, category_name, items, a) \
265 extern __thread struct __locale_data *const *_nl_current_##category \
266 attribute_hidden attribute_tls_model_ie;
267 #include "categories.def"
268 #undef DEFINE_CATEGORY
269
270 /* Return a pointer to the current `struct __locale_data' for CATEGORY. */
271 #define _NL_CURRENT_DATA(category) (*_nl_current_##category)
272
273 /* Extract the current CATEGORY locale's string for ITEM. */
274 #define _NL_CURRENT(category, item) \
275 ((*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].string)
276
277 /* Extract the current CATEGORY locale's string for ITEM. */
278 #define _NL_CURRENT_WSTR(category, item) \
279 ((wchar_t *) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].wstr)
280
281 /* Extract the current CATEGORY locale's word for ITEM. */
282 #define _NL_CURRENT_WORD(category, item) \
283 ((uint32_t) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].word)
284
285 /* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY. The symbol
286 _nl_current_CATEGORY_used is set to a value unequal to zero to mark this
287 category as used. On S390 the used relocation to load the symbol address
288 can only handle even addresses. */
289 #define _NL_CURRENT_DEFINE(category) \
290 __thread struct __locale_data *const *_nl_current_##category \
291 attribute_hidden = &_nl_global_locale.__locales[category]; \
292 asm (".globl " __SYMBOL_PREFIX "_nl_current_" #category "_used\n" \
293 _NL_CURRENT_DEFINE_ABS (_nl_current_##category##_used, 2));
294 #ifdef HAVE_ASM_SET_DIRECTIVE
295 # define _NL_CURRENT_DEFINE_ABS(sym, val) ".set " #sym ", " #val
296 #else
297 # define _NL_CURRENT_DEFINE_ABS(sym, val) #sym " = " #val
298 #endif
299
300 #else
301
302 /* All categories are always loaded in the shared library, so there is no
303 point in having lots of separate symbols for linking. */
304
305 /* Return a pointer to the current `struct __locale_data' for CATEGORY. */
306 # define _NL_CURRENT_DATA(category) \
307 (_NL_CURRENT_LOCALE->__locales[category])
308
309 /* Extract the current CATEGORY locale's string for ITEM. */
310 # define _NL_CURRENT(category, item) \
311 (_NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].string)
312
313 /* Extract the current CATEGORY locale's string for ITEM. */
314 # define _NL_CURRENT_WSTR(category, item) \
315 ((wchar_t *) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].wstr)
316
317 /* Extract the current CATEGORY locale's word for ITEM. */
318 # define _NL_CURRENT_WORD(category, item) \
319 ((uint32_t) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].word)
320
321 /* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY. */
322 # define _NL_CURRENT_DEFINE(category) \
323 /* No per-category variable here. */
324
325 #endif
326
327 /* Extract CATEGORY locale's string for ITEM. */
328 static inline const char *
329 _nl_lookup (locale_t l, int category, int item)
330 {
331 return l->__locales[category]->values[_NL_ITEM_INDEX (item)].string;
332 }
333
334 /* Extract CATEGORY locale's wide string for ITEM. */
335 static inline const wchar_t *
_nl_lookup_wstr(locale_t l,int category,int item)336 _nl_lookup_wstr (locale_t l, int category, int item)
337 {
338 return (wchar_t *) l->__locales[category]
339 ->values[_NL_ITEM_INDEX (item)].wstr;
340 }
341
342 /* Extract the CATEGORY locale's word for ITEM. */
343 static inline uint32_t
_nl_lookup_word(locale_t l,int category,int item)344 _nl_lookup_word (locale_t l, int category, int item)
345 {
346 return l->__locales[category]->values[_NL_ITEM_INDEX (item)].word;
347 }
348
349 /* Default search path if no LOCPATH environment variable. */
350 extern const char _nl_default_locale_path[] attribute_hidden;
351
352 /* Load the locale data for CATEGORY from the file specified by *NAME.
353 If *NAME is "", use environment variables as specified by POSIX, and
354 fill in *NAME with the actual name used. If LOCALE_PATH is not null,
355 those directories are searched for the locale files. If it's null,
356 the locale archive is checked first and then _nl_default_locale_path
357 is searched for locale files. */
358 extern struct __locale_data *_nl_find_locale (const char *locale_path,
359 size_t locale_path_len,
360 int category, const char **name)
361 attribute_hidden;
362
363 /* Try to load the file described by FILE. */
364 extern void _nl_load_locale (struct loaded_l10nfile *file, int category)
365 attribute_hidden;
366
367 /* Free all resource. */
368 extern void _nl_unload_locale (int category, struct __locale_data *locale)
369 attribute_hidden;
370
371 /* Free the locale and give back all memory if the usage count is one. */
372 extern void _nl_remove_locale (int locale, struct __locale_data *data)
373 attribute_hidden;
374
375 /* Find the locale *NAMEP in the locale archive, and return the
376 internalized data structure for its CATEGORY data. If this locale has
377 already been loaded from the archive, just returns the existing data
378 structure. If successful, sets *NAMEP to point directly into the mapped
379 archive string table; that way, the next call can short-circuit strcmp. */
380 extern struct __locale_data *_nl_load_locale_from_archive (int category,
381 const char **namep)
382 attribute_hidden;
383
384 /* Subroutine of setlocale's __libc_subfreeres hook. */
385 extern void _nl_archive_subfreeres (void) attribute_hidden;
386
387 /* Subroutine of gconv-db's __libc_subfreeres hook. */
388 extern void _nl_locale_subfreeres (void) attribute_hidden;
389
390 /* Validate the contents of a locale file and set up the in-core
391 data structure to point into the data. This leaves the `alloc'
392 and `name' fields uninitialized, for the caller to fill in.
393 If any bogons are detected in the data, this will refuse to
394 intern it, and return a null pointer instead. */
395 extern struct __locale_data *_nl_intern_locale_data (int category,
396 const void *data,
397 size_t datasize)
398 attribute_hidden;
399
400
401 /* Return `era' entry which corresponds to TP. Used in strftime. */
402 extern struct era_entry *_nl_get_era_entry (const struct tm *tp,
403 struct __locale_data *lc_time)
404 attribute_hidden;
405
406 /* Return `era' cnt'th entry . Used in strptime. */
407 extern struct era_entry *_nl_select_era_entry (int cnt,
408 struct __locale_data *lc_time)
409 attribute_hidden;
410
411 /* Return `alt_digit' which corresponds to NUMBER. Used in strftime. */
412 extern const char *_nl_get_alt_digit (unsigned int number,
413 struct __locale_data *lc_time)
414 attribute_hidden;
415
416 /* Similar, but now for wide characters. */
417 extern const wchar_t *_nl_get_walt_digit (unsigned int number,
418 struct __locale_data *lc_time)
419 attribute_hidden;
420
421 /* Parse string as alternative digit and return numeric value. */
422 extern int _nl_parse_alt_digit (const char **strp,
423 struct __locale_data *lc_time)
424 attribute_hidden;
425
426 /* Postload processing. */
427 extern void _nl_postload_ctype (void);
428
429 /* Deallocate category-specific data. Used in _nl_unload_locale. */
430 extern void _nl_cleanup_ctype (struct __locale_data *) attribute_hidden;
431 extern void _nl_cleanup_time (struct __locale_data *) attribute_hidden;
432
433
434 #endif /* localeinfo.h */
435