1 /* Copyright (C) 2000-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18 /* We always want assert to be fully defined. */
19 #undef NDEBUG
20 #include <assert.h>
21 #include <locale.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <wchar.h>
26
27
28 static int check_ascii (const char *locname);
29
30 /* UTF-8 single byte feeding test for mbrtowc(). */
31 static int
utf8_test_1(void)32 utf8_test_1 (void)
33 {
34 wchar_t wc;
35 mbstate_t s;
36
37 wc = 42; /* arbitrary number */
38 memset (&s, 0, sizeof (s)); /* get s into initial state */
39 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
40 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
41 assert (wc == 42); /* no value has not been stored into &wc yet */
42 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
43 assert (wc == 0x2260); /* E2 89 A0 = U+2260 (not equal) decoded correctly */
44 assert (mbrtowc (&wc, "", 1, &s) == 0); /* test final byte processing */
45 assert (wc == 0); /* test final byte decoding */
46
47 /* The following test is by Al Viro <aviro@redhat.com>. */
48 const char str[] = "\xe0\xa0\x80";
49
50 wc = 42; /* arbitrary number */
51 memset (&s, 0, sizeof (s)); /* get s into initial state */
52 assert (mbrtowc (&wc, str, 1, &s) == -2);
53 assert (mbrtowc (&wc, str + 1, 2, &s) == 2);
54 assert (wc == 0x800);
55
56 wc = 42; /* arbitrary number */
57 memset (&s, 0, sizeof (s)); /* get s into initial state */
58 assert (mbrtowc (&wc, str, 3, &s) == 3);
59 assert (wc == 0x800);
60
61 return 0;
62 }
63
64 /* Test for NUL byte processing via empty string. */
65 static int
utf8_test_2(void)66 utf8_test_2 (void)
67 {
68 wchar_t wc;
69 mbstate_t s;
70
71 wc = 42; /* arbitrary number */
72 memset (&s, 0, sizeof (s)); /* get s into initial state */
73 assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
74 assert (mbsinit (&s));
75
76 wc = 42; /* arbitrary number */
77 memset (&s, 0, sizeof (s)); /* get s into initial state */
78 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
79 assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
80
81 wc = 42; /* arbitrary number */
82 memset (&s, 0, sizeof (s)); /* get s into initial state */
83 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
84 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
85 assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
86
87 wc = 42; /* arbitrary number */
88 memset (&s, 0, sizeof (s)); /* get s into initial state */
89 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
90 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
91 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
92 assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
93 assert (mbsinit (&s));
94
95 return 0;
96 }
97
98 /* Test for NUL byte processing via NULL string. */
99 static int
utf8_test_3(void)100 utf8_test_3 (void)
101 {
102 wchar_t wc;
103 mbstate_t s;
104
105 wc = 42; /* arbitrary number */
106 memset (&s, 0, sizeof (s)); /* get s into initial state */
107 assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
108 assert (mbsinit (&s));
109
110 wc = 42; /* arbitrary number */
111 memset (&s, 0, sizeof (s)); /* get s into initial state */
112 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
113 assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
114
115 wc = 42; /* arbitrary number */
116 memset (&s, 0, sizeof (s)); /* get s into initial state */
117 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
118 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
119 assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
120
121 wc = 42; /* arbitrary number */
122 memset (&s, 0, sizeof (s)); /* get s into initial state */
123 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
124 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
125 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
126 assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
127 assert (mbsinit (&s));
128
129 return 0;
130 }
131
132 static int
utf8_test(void)133 utf8_test (void)
134 {
135 const char *locale = "de_DE.UTF-8";
136 int error = 0;
137
138 if (!setlocale (LC_CTYPE, locale))
139 {
140 fprintf (stderr, "locale '%s' not available!\n", locale);
141 exit (1);
142 }
143
144 error |= utf8_test_1 ();
145 error |= utf8_test_2 ();
146 error |= utf8_test_3 ();
147
148 return error;
149 }
150
151
152 static int
do_test(void)153 do_test (void)
154 {
155 int result = 0;
156
157 /* Check mapping of ASCII range for some character sets which have
158 ASCII as a subset. For those the wide char generated must have
159 the same value. */
160 setlocale (LC_ALL, "C");
161 result |= check_ascii (setlocale (LC_ALL, NULL));
162
163 setlocale (LC_ALL, "de_DE.UTF-8");
164 result |= check_ascii (setlocale (LC_ALL, NULL));
165 result |= utf8_test ();
166
167 setlocale (LC_ALL, "ja_JP.EUC-JP");
168 result |= check_ascii (setlocale (LC_ALL, NULL));
169
170 return result;
171 }
172
173
174 static int
check_ascii(const char * locname)175 check_ascii (const char *locname)
176 {
177 int c;
178 int res = 0;
179
180 printf ("Testing locale \"%s\":\n", locname);
181
182 for (c = 0; c <= 127; ++c)
183 {
184 char buf[MB_CUR_MAX];
185 wchar_t wc = 0xffffffff;
186 mbstate_t s;
187 size_t n, i;
188
189 for (i = 0; i < MB_CUR_MAX; ++i)
190 buf[i] = c + i;
191
192 memset (&s, '\0', sizeof (s));
193
194 n = mbrtowc (&wc, buf, MB_CUR_MAX, &s);
195 if (n == (size_t) -1)
196 {
197 printf ("%s: '\\x%x': encoding error\n", locname, c);
198 ++res;
199 }
200 else if (n == (size_t) -2)
201 {
202 printf ("%s: '\\x%x': incomplete character\n", locname, c);
203 ++res;
204 }
205 else if (n == 0 && c != 0)
206 {
207 printf ("%s: '\\x%x': 0 returned\n", locname, c);
208 ++res;
209 }
210 else if (n != 0 && c == 0)
211 {
212 printf ("%s: '\\x%x': not 0 returned\n", locname, c);
213 ++res;
214 }
215 else if (c != 0 && n != 1)
216 {
217 printf ("%s: '\\x%x': not 1 returned\n", locname, c);
218 ++res;
219 }
220 else if (wc != (wchar_t) c)
221 {
222 printf ("%s: '\\x%x': wc != L'\\x%x'\n", locname, c, c);
223 ++res;
224 }
225 }
226
227 printf (res == 1 ? "%d error\n" : "%d errors\n", res);
228
229 return res != 0;
230 }
231
232 #define TEST_FUNCTION do_test ()
233 #include "../test-skeleton.c"
234