1 /* Copyright (C) 2000-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8 
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library; if not, see
16    <https://www.gnu.org/licenses/>.  */
17 
18 /* We always want assert to be fully defined.  */
19 #undef NDEBUG
20 #include <assert.h>
21 #include <locale.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <wchar.h>
26 
27 
28 static int check_ascii (const char *locname);
29 
30 /* UTF-8 single byte feeding test for mbrtowc().  */
31 static int
utf8_test_1(void)32 utf8_test_1 (void)
33 {
34   wchar_t wc;
35   mbstate_t s;
36 
37   wc = 42;			/* arbitrary number */
38   memset (&s, 0, sizeof (s));	/* get s into initial state */
39   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2);	/* 1st byte processed */
40   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2);	/* 2nd byte processed */
41   assert (wc == 42);		/* no value has not been stored into &wc yet */
42   assert (mbrtowc (&wc, "\xA0", 1, &s) == 1);	/* 3nd byte processed */
43   assert (wc == 0x2260);	/* E2 89 A0 = U+2260 (not equal) decoded correctly */
44   assert (mbrtowc (&wc, "", 1, &s) == 0);	/* test final byte processing */
45   assert (wc == 0);		/* test final byte decoding */
46 
47   /* The following test is by Al Viro <aviro@redhat.com>.  */
48   const char str[] = "\xe0\xa0\x80";
49 
50   wc = 42;			/* arbitrary number */
51   memset (&s, 0, sizeof (s));	/* get s into initial state */
52   assert (mbrtowc (&wc, str, 1, &s) == -2);
53   assert (mbrtowc (&wc, str + 1, 2, &s) == 2);
54   assert (wc == 0x800);
55 
56   wc = 42;			/* arbitrary number */
57   memset (&s, 0, sizeof (s));	/* get s into initial state */
58   assert (mbrtowc (&wc, str, 3, &s) == 3);
59   assert (wc == 0x800);
60 
61   return 0;
62 }
63 
64 /* Test for NUL byte processing via empty string.  */
65 static int
utf8_test_2(void)66 utf8_test_2 (void)
67 {
68   wchar_t wc;
69   mbstate_t s;
70 
71   wc = 42;			/* arbitrary number */
72   memset (&s, 0, sizeof (s));	/* get s into initial state */
73   assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
74   assert (mbsinit (&s));
75 
76   wc = 42;			/* arbitrary number */
77   memset (&s, 0, sizeof (s));	/* get s into initial state */
78   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2);	/* 1st byte processed */
79   assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
80 
81   wc = 42;			/* arbitrary number */
82   memset (&s, 0, sizeof (s));	/* get s into initial state */
83   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2);	/* 1st byte processed */
84   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2);	/* 2nd byte processed */
85   assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
86 
87   wc = 42;			/* arbitrary number */
88   memset (&s, 0, sizeof (s));	/* get s into initial state */
89   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2);	/* 1st byte processed */
90   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2);	/* 2nd byte processed */
91   assert (mbrtowc (&wc, "\xA0", 1, &s) == 1);	/* 3nd byte processed */
92   assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
93   assert (mbsinit (&s));
94 
95   return 0;
96 }
97 
98 /* Test for NUL byte processing via NULL string.  */
99 static int
utf8_test_3(void)100 utf8_test_3 (void)
101 {
102   wchar_t wc;
103   mbstate_t s;
104 
105   wc = 42;			/* arbitrary number */
106   memset (&s, 0, sizeof (s));	/* get s into initial state */
107   assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
108   assert (mbsinit (&s));
109 
110   wc = 42;			/* arbitrary number */
111   memset (&s, 0, sizeof (s));	/* get s into initial state */
112   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2);	/* 1st byte processed */
113   assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
114 
115   wc = 42;			/* arbitrary number */
116   memset (&s, 0, sizeof (s));	/* get s into initial state */
117   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2);	/* 1st byte processed */
118   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2);	/* 2nd byte processed */
119   assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
120 
121   wc = 42;			/* arbitrary number */
122   memset (&s, 0, sizeof (s));	/* get s into initial state */
123   assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2);	/* 1st byte processed */
124   assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2);	/* 2nd byte processed */
125   assert (mbrtowc (&wc, "\xA0", 1, &s) == 1);	/* 3nd byte processed */
126   assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
127   assert (mbsinit (&s));
128 
129   return 0;
130 }
131 
132 static int
utf8_test(void)133 utf8_test (void)
134 {
135   const char *locale = "de_DE.UTF-8";
136   int error = 0;
137 
138   if (!setlocale (LC_CTYPE, locale))
139     {
140       fprintf (stderr, "locale '%s' not available!\n", locale);
141       exit (1);
142     }
143 
144   error |= utf8_test_1 ();
145   error |= utf8_test_2 ();
146   error |= utf8_test_3 ();
147 
148   return error;
149 }
150 
151 
152 static int
do_test(void)153 do_test (void)
154 {
155   int result = 0;
156 
157   /* Check mapping of ASCII range for some character sets which have
158      ASCII as a subset.  For those the wide char generated must have
159      the same value.  */
160   setlocale (LC_ALL, "C");
161   result |= check_ascii (setlocale (LC_ALL, NULL));
162 
163   setlocale (LC_ALL, "de_DE.UTF-8");
164   result |= check_ascii (setlocale (LC_ALL, NULL));
165   result |= utf8_test ();
166 
167   setlocale (LC_ALL, "ja_JP.EUC-JP");
168   result |= check_ascii (setlocale (LC_ALL, NULL));
169 
170   return result;
171 }
172 
173 
174 static int
check_ascii(const char * locname)175 check_ascii (const char *locname)
176 {
177   int c;
178   int res = 0;
179 
180   printf ("Testing locale \"%s\":\n", locname);
181 
182   for (c = 0; c <= 127; ++c)
183     {
184       char buf[MB_CUR_MAX];
185       wchar_t wc = 0xffffffff;
186       mbstate_t s;
187       size_t n, i;
188 
189       for (i = 0; i < MB_CUR_MAX; ++i)
190 	buf[i] = c + i;
191 
192       memset (&s, '\0', sizeof (s));
193 
194       n = mbrtowc (&wc, buf, MB_CUR_MAX, &s);
195       if (n == (size_t) -1)
196 	{
197 	  printf ("%s: '\\x%x': encoding error\n", locname, c);
198 	  ++res;
199 	}
200       else if (n == (size_t) -2)
201 	{
202 	  printf ("%s: '\\x%x': incomplete character\n", locname, c);
203 	  ++res;
204 	}
205       else if (n == 0 && c != 0)
206 	{
207 	  printf ("%s: '\\x%x': 0 returned\n", locname, c);
208 	  ++res;
209 	}
210       else if (n != 0 && c == 0)
211 	{
212 	  printf ("%s: '\\x%x': not 0 returned\n", locname, c);
213 	  ++res;
214 	}
215       else if (c != 0 && n != 1)
216 	{
217 	  printf ("%s: '\\x%x': not 1 returned\n", locname, c);
218 	  ++res;
219 	}
220       else if (wc != (wchar_t) c)
221 	{
222 	  printf ("%s: '\\x%x': wc != L'\\x%x'\n", locname, c, c);
223 	  ++res;
224 	}
225     }
226 
227   printf (res == 1 ? "%d error\n" : "%d errors\n", res);
228 
229   return res != 0;
230 }
231 
232 #define TEST_FUNCTION do_test ()
233 #include "../test-skeleton.c"
234