1 /* Test compilation of truncated regular expressions.
2    Copyright (C) 2018-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 /* This test constructs various patterns in an attempt to trigger
20    over-reading the regular expression compiler, such as bug
21    23578.  */
22 
23 #include <array_length.h>
24 #include <errno.h>
25 #include <locale.h>
26 #include <regex.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <support/check.h>
31 #include <support/next_to_fault.h>
32 #include <support/support.h>
33 #include <support/test-driver.h>
34 #include <wchar.h>
35 
36 /* Locales to test.  */
37 static const char locales[][17] =
38   {
39     "C",
40     "C.UTF-8",
41     "en_US.UTF-8",
42     "de_DE.ISO-8859-1",
43   };
44 
45 /* Syntax options.  Will be combined with other flags.  */
46 static const reg_syntax_t syntaxes[] =
47   {
48     RE_SYNTAX_EMACS,
49     RE_SYNTAX_AWK,
50     RE_SYNTAX_GNU_AWK,
51     RE_SYNTAX_POSIX_AWK,
52     RE_SYNTAX_GREP,
53     RE_SYNTAX_EGREP,
54     RE_SYNTAX_POSIX_EGREP,
55     RE_SYNTAX_POSIX_BASIC,
56     RE_SYNTAX_POSIX_EXTENDED,
57     RE_SYNTAX_POSIX_MINIMAL_EXTENDED,
58   };
59 
60 /* Trailing characters placed after the initial character.  */
61 static const char trailing_strings[][4] =
62   {
63     "",
64     "[",
65     "\\",
66     "[\\",
67     "(",
68     "(\\",
69     "\\(",
70   };
71 
72 static int
do_test(void)73 do_test (void)
74 {
75   /* Staging buffer for the constructed regular expression.  */
76   char buffer[16];
77 
78   /* Allocation used to detect over-reading by the regular expression
79      compiler.  */
80   struct support_next_to_fault ntf
81     = support_next_to_fault_allocate (sizeof (buffer));
82 
83   /* Arbitrary Unicode codepoint at which we stop generating
84      characters.  We do not probe the whole range because that would
85      take too long due to combinatorical exploision as the result of
86      combination with other flags.  */
87   static const wchar_t last_character = 0xfff;
88 
89   for (size_t locale_idx = 0; locale_idx < array_length (locales);
90        ++ locale_idx)
91     {
92       if (setlocale (LC_ALL, locales[locale_idx]) == NULL)
93         {
94           support_record_failure ();
95           printf ("error: setlocale (\"%s\"): %m", locales[locale_idx]);
96           continue;
97         }
98       if (test_verbose > 0)
99         printf ("info: testing locale \"%s\"\n", locales[locale_idx]);
100 
101       for (wchar_t wc = 0; wc <= last_character; ++wc)
102         {
103           char *after_wc;
104           if (wc == 0)
105             {
106               /* wcrtomb treats L'\0' in a special way.  */
107               *buffer = '\0';
108               after_wc = &buffer[1];
109             }
110           else
111             {
112               mbstate_t ps = { };
113               size_t ret = wcrtomb (buffer, wc, &ps);
114               if (ret == (size_t) -1)
115                 {
116                   /* EILSEQ means that the target character set
117                      cannot encode the character.  */
118                   if (errno != EILSEQ)
119                     {
120                       support_record_failure ();
121                       printf ("error: wcrtomb (0x%x) failed: %m\n",
122                               (unsigned) wc);
123                     }
124                   continue;
125                 }
126               TEST_VERIFY_EXIT (ret != 0);
127               after_wc = &buffer[ret];
128             }
129 
130           for (size_t trailing_idx = 0;
131                trailing_idx < array_length (trailing_strings);
132                ++trailing_idx)
133             {
134               char *after_trailing
135                 = stpcpy (after_wc, trailing_strings[trailing_idx]);
136 
137               for (int do_nul = 0; do_nul < 2; ++do_nul)
138                 {
139                   char *after_nul;
140                   if (do_nul)
141                     {
142                       *after_trailing = '\0';
143                       after_nul = &after_trailing[1];
144                     }
145                   else
146                     after_nul = after_trailing;
147 
148                   size_t length = after_nul - buffer;
149 
150                   /* Make sure that the faulting region starts
151                      after the used portion of the buffer.  */
152                   char *ntf_start = ntf.buffer + sizeof (buffer) - length;
153                   memcpy (ntf_start, buffer, length);
154 
155                   for (const reg_syntax_t *psyntax = syntaxes;
156                        psyntax < array_end (syntaxes); ++psyntax)
157                     for (int do_icase = 0; do_icase < 2; ++do_icase)
158                       {
159                         re_syntax_options = *psyntax;
160                         if (do_icase)
161                           re_syntax_options |= RE_ICASE;
162 
163                         regex_t reg;
164                         memset (&reg, 0, sizeof (reg));
165                         const char *msg = re_compile_pattern
166                           (ntf_start, length, &reg);
167                         if (msg != NULL)
168                           {
169                             if (test_verbose > 0)
170                               {
171                                 char *quoted = support_quote_blob
172                                   (buffer, length);
173                                 printf ("info: compilation failed for pattern"
174                                         " \"%s\", syntax 0x%lx: %s\n",
175                                         quoted, re_syntax_options, msg);
176                                 free (quoted);
177                               }
178                           }
179                         else
180                           regfree (&reg);
181                       }
182                 }
183             }
184         }
185     }
186 
187   support_next_to_fault_free (&ntf);
188 
189   return 0;
190 }
191 
192 #include <support/test-driver.c>
193