1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <langinfo.h>
6 #include <libintl.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <sys/mman.h>
11 #include <sys/stat.h>
12 
13 #include "def.h"
14 #include "dirent-util.h"
15 #include "env-util.h"
16 #include "fd-util.h"
17 #include "fileio.h"
18 #include "hashmap.h"
19 #include "locale-util.h"
20 #include "path-util.h"
21 #include "set.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "strv.h"
25 #include "utf8.h"
26 
normalize_locale(const char * name)27 static char *normalize_locale(const char *name) {
28         const char *e;
29 
30         /* Locale names are weird: glibc has some magic rules when looking for the charset name on disk: it
31          * lowercases everything, and removes most special chars. This means the official .UTF-8 suffix
32          * becomes .utf8 when looking things up on disk. When enumerating locales, let's do the reverse
33          * operation, and go back to ".UTF-8" which appears to be the more commonly accepted name. We only do
34          * that for UTF-8 however, since it's kinda the only charset that matters. */
35 
36         e = endswith(name, ".utf8");
37         if (e) {
38                 _cleanup_free_ char *prefix = NULL;
39 
40                 prefix = strndup(name, e - name);
41                 if (!prefix)
42                         return NULL;
43 
44                 return strjoin(prefix, ".UTF-8");
45         }
46 
47         e = strstr(name, ".utf8@");
48         if (e) {
49                 _cleanup_free_ char *prefix = NULL;
50 
51                 prefix = strndup(name, e - name);
52                 if (!prefix)
53                         return NULL;
54 
55                 return strjoin(prefix, ".UTF-8@", e + 6);
56         }
57 
58         return strdup(name);
59 }
60 
add_locales_from_archive(Set * locales)61 static int add_locales_from_archive(Set *locales) {
62         /* Stolen from glibc... */
63 
64         struct locarhead {
65                 uint32_t magic;
66                 /* Serial number.  */
67                 uint32_t serial;
68                 /* Name hash table.  */
69                 uint32_t namehash_offset;
70                 uint32_t namehash_used;
71                 uint32_t namehash_size;
72                 /* String table.  */
73                 uint32_t string_offset;
74                 uint32_t string_used;
75                 uint32_t string_size;
76                 /* Table with locale records.  */
77                 uint32_t locrectab_offset;
78                 uint32_t locrectab_used;
79                 uint32_t locrectab_size;
80                 /* MD5 sum hash table.  */
81                 uint32_t sumhash_offset;
82                 uint32_t sumhash_used;
83                 uint32_t sumhash_size;
84         };
85 
86         struct namehashent {
87                 /* Hash value of the name.  */
88                 uint32_t hashval;
89                 /* Offset of the name in the string table.  */
90                 uint32_t name_offset;
91                 /* Offset of the locale record.  */
92                 uint32_t locrec_offset;
93         };
94 
95         const struct locarhead *h;
96         const struct namehashent *e;
97         const void *p = MAP_FAILED;
98         _cleanup_close_ int fd = -1;
99         size_t sz = 0;
100         struct stat st;
101         int r;
102 
103         fd = open("/usr/lib/locale/locale-archive", O_RDONLY|O_NOCTTY|O_CLOEXEC);
104         if (fd < 0)
105                 return errno == ENOENT ? 0 : -errno;
106 
107         if (fstat(fd, &st) < 0)
108                 return -errno;
109 
110         if (!S_ISREG(st.st_mode))
111                 return -EBADMSG;
112 
113         if (st.st_size < (off_t) sizeof(struct locarhead))
114                 return -EBADMSG;
115 
116         if (file_offset_beyond_memory_size(st.st_size))
117                 return -EFBIG;
118 
119         p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
120         if (p == MAP_FAILED)
121                 return -errno;
122 
123         h = (const struct locarhead *) p;
124         if (h->magic != 0xde020109 ||
125             h->namehash_offset + h->namehash_size > st.st_size ||
126             h->string_offset + h->string_size > st.st_size ||
127             h->locrectab_offset + h->locrectab_size > st.st_size ||
128             h->sumhash_offset + h->sumhash_size > st.st_size) {
129                 r = -EBADMSG;
130                 goto finish;
131         }
132 
133         e = (const struct namehashent*) ((const uint8_t*) p + h->namehash_offset);
134         for (size_t i = 0; i < h->namehash_size; i++) {
135                 char *z;
136 
137                 if (e[i].locrec_offset == 0)
138                         continue;
139 
140                 if (!utf8_is_valid((char*) p + e[i].name_offset))
141                         continue;
142 
143                 z = normalize_locale((char*) p + e[i].name_offset);
144                 if (!z) {
145                         r = -ENOMEM;
146                         goto finish;
147                 }
148 
149                 r = set_consume(locales, z);
150                 if (r < 0)
151                         goto finish;
152         }
153 
154         r = 0;
155 
156  finish:
157         if (p != MAP_FAILED)
158                 munmap((void*) p, sz);
159 
160         return r;
161 }
162 
add_locales_from_libdir(Set * locales)163 static int add_locales_from_libdir (Set *locales) {
164         _cleanup_closedir_ DIR *dir = NULL;
165         int r;
166 
167         dir = opendir("/usr/lib/locale");
168         if (!dir)
169                 return errno == ENOENT ? 0 : -errno;
170 
171         FOREACH_DIRENT(de, dir, return -errno) {
172                 char *z;
173 
174                 if (de->d_type != DT_DIR)
175                         continue;
176 
177                 z = normalize_locale(de->d_name);
178                 if (!z)
179                         return -ENOMEM;
180 
181                 r = set_consume(locales, z);
182                 if (r < 0 && r != -EEXIST)
183                         return r;
184         }
185 
186         return 0;
187 }
188 
get_locales(char *** ret)189 int get_locales(char ***ret) {
190         _cleanup_set_free_ Set *locales = NULL;
191         _cleanup_strv_free_ char **l = NULL;
192         int r;
193 
194         locales = set_new(&string_hash_ops);
195         if (!locales)
196                 return -ENOMEM;
197 
198         r = add_locales_from_archive(locales);
199         if (r < 0 && r != -ENOENT)
200                 return r;
201 
202         r = add_locales_from_libdir(locales);
203         if (r < 0)
204                 return r;
205 
206         l = set_get_strv(locales);
207         if (!l)
208                 return -ENOMEM;
209 
210         r = getenv_bool("SYSTEMD_LIST_NON_UTF8_LOCALES");
211         if (r == -ENXIO || r == 0) {
212                 char **a, **b;
213 
214                 /* Filter out non-UTF-8 locales, because it's 2019, by default */
215                 for (a = b = l; *a; a++) {
216 
217                         if (endswith(*a, "UTF-8") ||
218                             strstr(*a, ".UTF-8@"))
219                                 *(b++) = *a;
220                         else
221                                 free(*a);
222                 }
223 
224                 *b = NULL;
225 
226         } else if (r < 0)
227                 log_debug_errno(r, "Failed to parse $SYSTEMD_LIST_NON_UTF8_LOCALES as boolean");
228 
229         strv_sort(l);
230 
231         *ret = TAKE_PTR(l);
232 
233         return 0;
234 }
235 
locale_is_valid(const char * name)236 bool locale_is_valid(const char *name) {
237 
238         if (isempty(name))
239                 return false;
240 
241         if (strlen(name) >= 128)
242                 return false;
243 
244         if (!utf8_is_valid(name))
245                 return false;
246 
247         if (!filename_is_valid(name))
248                 return false;
249 
250         if (!string_is_safe(name))
251                 return false;
252 
253         return true;
254 }
255 
locale_is_installed(const char * name)256 int locale_is_installed(const char *name) {
257         if (!locale_is_valid(name))
258                 return false;
259 
260         if (STR_IN_SET(name, "C", "POSIX")) /* These ones are always OK */
261                 return true;
262 
263         _cleanup_(freelocalep) locale_t loc =
264                 newlocale(LC_ALL_MASK, name, 0);
265         if (loc == (locale_t) 0)
266                 return errno == ENOMEM ? -ENOMEM : false;
267 
268         return true;
269 }
270 
init_gettext(void)271 void init_gettext(void) {
272         setlocale(LC_ALL, "");
273         textdomain(GETTEXT_PACKAGE);
274 }
275 
is_locale_utf8(void)276 bool is_locale_utf8(void) {
277         const char *set;
278         static int cached_answer = -1;
279 
280         /* Note that we default to 'true' here, since today UTF8 is
281          * pretty much supported everywhere. */
282 
283         if (cached_answer >= 0)
284                 goto out;
285 
286         if (!setlocale(LC_ALL, "")) {
287                 cached_answer = true;
288                 goto out;
289         }
290 
291         set = nl_langinfo(CODESET);
292         if (!set) {
293                 cached_answer = true;
294                 goto out;
295         }
296 
297         if (streq(set, "UTF-8")) {
298                 cached_answer = true;
299                 goto out;
300         }
301 
302         /* For LC_CTYPE=="C" return true, because CTYPE is effectively
303          * unset and everything can do to UTF-8 nowadays. */
304         set = setlocale(LC_CTYPE, NULL);
305         if (!set) {
306                 cached_answer = true;
307                 goto out;
308         }
309 
310         /* Check result, but ignore the result if C was set
311          * explicitly. */
312         cached_answer =
313                 STR_IN_SET(set, "C", "POSIX") &&
314                 !getenv("LC_ALL") &&
315                 !getenv("LC_CTYPE") &&
316                 !getenv("LANG");
317 
318 out:
319         return (bool) cached_answer;
320 }
321 
locale_variables_free(char * l[_VARIABLE_LC_MAX])322 void locale_variables_free(char *l[_VARIABLE_LC_MAX]) {
323         if (!l)
324                 return;
325 
326         for (LocaleVariable i = 0; i < _VARIABLE_LC_MAX; i++)
327                 l[i] = mfree(l[i]);
328 }
329 
330 static const char * const locale_variable_table[_VARIABLE_LC_MAX] = {
331         [VARIABLE_LANG] = "LANG",
332         [VARIABLE_LANGUAGE] = "LANGUAGE",
333         [VARIABLE_LC_CTYPE] = "LC_CTYPE",
334         [VARIABLE_LC_NUMERIC] = "LC_NUMERIC",
335         [VARIABLE_LC_TIME] = "LC_TIME",
336         [VARIABLE_LC_COLLATE] = "LC_COLLATE",
337         [VARIABLE_LC_MONETARY] = "LC_MONETARY",
338         [VARIABLE_LC_MESSAGES] = "LC_MESSAGES",
339         [VARIABLE_LC_PAPER] = "LC_PAPER",
340         [VARIABLE_LC_NAME] = "LC_NAME",
341         [VARIABLE_LC_ADDRESS] = "LC_ADDRESS",
342         [VARIABLE_LC_TELEPHONE] = "LC_TELEPHONE",
343         [VARIABLE_LC_MEASUREMENT] = "LC_MEASUREMENT",
344         [VARIABLE_LC_IDENTIFICATION] = "LC_IDENTIFICATION"
345 };
346 
347 DEFINE_STRING_TABLE_LOOKUP(locale_variable, LocaleVariable);
348