1 /* German regular expression tests.
2    Copyright (C) 2002-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <sys/types.h>
20 #include <mcheck.h>
21 #include <regex.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <locale.h>
25 
26 /* Tests supposed to match.  */
27 struct
28 {
29   const char *pattern;
30   const char *string;
31   int flags, nmatch;
32   regmatch_t rm[5];
33 } tests[] = {
34   /* U+00C4	\xc3\x84	LATIN CAPITAL LETTER A WITH DIAERESIS
35      U+00D6	\xc3\x96	LATIN CAPITAL LETTER O WITH DIAERESIS
36      U+00E4	\xc3\xa4	LATIN SMALL LETTER A WITH DIAERESIS
37      U+00F6	\xc3\xb6	LATIN SMALL LETTER O WITH DIAERESIS  */
38   { "\xc3\x84\xc3\x96*\xc3\xb6$", "aB\xc3\xa4\xc3\xb6\xc3\xb6\xc3\x96", REG_ICASE, 2,
39     { { 2, 10 }, { -1, -1 } } },
40   { "[\xc3\x84x]\xc3\x96*\xc3\xb6$", "aB\xc3\x84\xc3\xb6\xc3\xb6\xc3\x96", REG_ICASE, 2,
41     { { 2, 10 }, { -1, -1 } } },
42   { "[\xc3\x84x]\xc3\x96*\xc3\xb6$", "aB\xc3\xa4\xc3\xb6\xc3\xb6\xc3\x96", REG_ICASE, 2,
43     { { 2, 10 }, { -1, -1 } } },
44   { "[^x]\xc3\x96*\xc3\xb6$", "aB\xc3\xa4\xc3\xb6\xc3\xb6\xc3\x96", REG_ICASE, 2,
45     { { 2, 10 }, { -1, -1 } } },
46 
47   /* Tests for bug 9697:
48      U+00DF	\xc3\x9f	LATIN SMALL LETTER SHARP S
49      U+02DA	\xcb\x9a	RING ABOVE
50      U+02E2	\xcb\xa2	MODIFIER LETTER SMALL S  */
51   { "[a-z]|[^a-z]", "\xcb\xa2", REG_EXTENDED, 2,
52     { { 0, 2 }, { -1, -1 } } },
53   { "[a-z]", "\xc3\x9f", REG_EXTENDED, 2,
54     { { 0, 2 }, { -1, -1 } } },
55   { "[^a-z]", "\xcb\x9a", REG_EXTENDED, 2,
56     { { 0, 2 }, { -1, -1 } } },
57 };
58 
59 
60 static int
do_test(void)61 do_test (void)
62 {
63   regex_t re;
64   regmatch_t rm[5];
65   size_t i;
66   int n, ret = 0;
67 
68   setlocale (LC_ALL, "de_DE.UTF-8");
69   for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
70     {
71       n = regcomp (&re, tests[i].pattern, tests[i].flags);
72       if (n != 0)
73 	{
74 	  char buf[500];
75 	  regerror (n, &re, buf, sizeof (buf));
76 	  printf ("regcomp %zd failed: %s\n", i, buf);
77 	  ret = 1;
78 	  continue;
79 	}
80 
81       if (regexec (&re, tests[i].string, tests[i].nmatch, rm, 0))
82 	{
83 	  printf ("regexec %zd failed\n", i);
84 	  ret = 1;
85 	  regfree (&re);
86 	  continue;
87 	}
88 
89       for (n = 0; n < tests[i].nmatch; ++n)
90 	if (rm[n].rm_so != tests[i].rm[n].rm_so
91               || rm[n].rm_eo != tests[i].rm[n].rm_eo)
92 	  {
93 	    if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1)
94 	      break;
95 	    printf ("regexec match failure rm[%d] %d..%d\n",
96 		    n, rm[n].rm_so, rm[n].rm_eo);
97 	    ret = 1;
98 	    break;
99 	  }
100 
101       regfree (&re);
102     }
103 
104   return ret;
105 }
106 
107 #define TEST_FUNCTION do_test ()
108 #include "../test-skeleton.c"
109