1 /* Copyright (C) 1991-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8 
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library; if not, see
16    <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef _LIBC
19 # include <stdint.h>
20 #endif
21 
22 struct STRUCT
23 {
24   const CHAR *pattern;
25   const CHAR *string;
26   bool no_leading_period;
27 };
28 
29 /* Match STRING against the file name pattern PATTERN, returning zero if
30    it matches, nonzero if not.  */
31 static int FCT (const CHAR *pattern, const CHAR *string,
32                 const CHAR *string_end, bool no_leading_period, int flags,
33                 struct STRUCT *ends);
34 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
35                 const CHAR *string_end, bool no_leading_period, int flags);
36 static const CHAR *END (const CHAR *patternp);
37 
38 static int
FCT(const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags,struct STRUCT * ends)39 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
40      bool no_leading_period, int flags, struct STRUCT *ends)
41 {
42   const CHAR *p = pattern, *n = string;
43   UCHAR c;
44 #ifdef _LIBC
45 # if WIDE_CHAR_VERSION
46   const char *collseq = (const char *)
47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
48 # else
49   const UCHAR *collseq = (const UCHAR *)
50     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
51 # endif
52 #endif
53 
54   while ((c = *p++) != L_('\0'))
55     {
56       bool new_no_leading_period = false;
57       c = FOLD (c);
58 
59       switch (c)
60         {
61         case L_('?'):
62           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
63             {
64               int res = EXT (c, p, n, string_end, no_leading_period, flags);
65               if (res != -1)
66                 return res;
67             }
68 
69           if (n == string_end)
70             return FNM_NOMATCH;
71           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
72             return FNM_NOMATCH;
73           else if (*n == L_('.') && no_leading_period)
74             return FNM_NOMATCH;
75           break;
76 
77         case L_('\\'):
78           if (!(flags & FNM_NOESCAPE))
79             {
80               c = *p++;
81               if (c == L_('\0'))
82                 /* Trailing \ loses.  */
83                 return FNM_NOMATCH;
84               c = FOLD (c);
85             }
86           if (n == string_end || FOLD ((UCHAR) *n) != c)
87             return FNM_NOMATCH;
88           break;
89 
90         case L_('*'):
91           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
92             {
93               int res = EXT (c, p, n, string_end, no_leading_period, flags);
94               if (res != -1)
95                 return res;
96             }
97           else if (ends != NULL)
98             {
99               ends->pattern = p - 1;
100               ends->string = n;
101               ends->no_leading_period = no_leading_period;
102               return 0;
103             }
104 
105           if (n != string_end && *n == L_('.') && no_leading_period)
106             return FNM_NOMATCH;
107 
108           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
109             {
110               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
111                 {
112                   const CHAR *endp = END (p);
113                   if (endp != p)
114                     {
115                       /* This is a pattern.  Skip over it.  */
116                       p = endp;
117                       continue;
118                     }
119                 }
120 
121               if (c == L_('?'))
122                 {
123                   /* A ? needs to match one character.  */
124                   if (n == string_end)
125                     /* There isn't another character; no match.  */
126                     return FNM_NOMATCH;
127                   else if (*n == L_('/')
128                            && __glibc_unlikely (flags & FNM_FILE_NAME))
129                     /* A slash does not match a wildcard under
130                        FNM_FILE_NAME.  */
131                     return FNM_NOMATCH;
132                   else
133                     /* One character of the string is consumed in matching
134                        this ? wildcard, so *??? won't match if there are
135                        less than three characters.  */
136                     ++n;
137                 }
138             }
139 
140           if (c == L_('\0'))
141             /* The wildcard(s) is/are the last element of the pattern.
142                If the name is a file name and contains another slash
143                this means it cannot match, unless the FNM_LEADING_DIR
144                flag is set.  */
145             {
146               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
147 
148               if (flags & FNM_FILE_NAME)
149                 {
150                   if (flags & FNM_LEADING_DIR)
151                     result = 0;
152                   else
153                     {
154                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
155                         result = 0;
156                     }
157                 }
158 
159               return result;
160             }
161           else
162             {
163               const CHAR *endp;
164               struct STRUCT end;
165 
166               end.pattern = NULL;
167               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
168                              string_end - n);
169               if (endp == NULL)
170                 endp = string_end;
171 
172               if (c == L_('[')
173                   || (__glibc_unlikely (flags & FNM_EXTMATCH)
174                       && (c == L_('@') || c == L_('+') || c == L_('!'))
175                       && *p == L_('(')))
176                 {
177                   int flags2 = ((flags & FNM_FILE_NAME)
178                                 ? flags : (flags & ~FNM_PERIOD));
179 
180                   for (--p; n < endp; ++n, no_leading_period = false)
181                     if (FCT (p, n, string_end, no_leading_period, flags2,
182                              &end) == 0)
183                       goto found;
184                 }
185               else if (c == L_('/') && (flags & FNM_FILE_NAME))
186                 {
187                   while (n < string_end && *n != L_('/'))
188                     ++n;
189                   if (n < string_end && *n == L_('/')
190                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
191                                NULL) == 0))
192                     return 0;
193                 }
194               else
195                 {
196                   int flags2 = ((flags & FNM_FILE_NAME)
197                                 ? flags : (flags & ~FNM_PERIOD));
198 
199                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
200                     c = *p;
201                   c = FOLD (c);
202                   for (--p; n < endp; ++n, no_leading_period = false)
203                     if (FOLD ((UCHAR) *n) == c
204                         && (FCT (p, n, string_end, no_leading_period, flags2,
205                                  &end) == 0))
206                       {
207                       found:
208                         if (end.pattern == NULL)
209                           return 0;
210                         break;
211                       }
212                   if (end.pattern != NULL)
213                     {
214                       p = end.pattern;
215                       n = end.string;
216                       no_leading_period = end.no_leading_period;
217                       continue;
218                     }
219                 }
220             }
221 
222           /* If we come here no match is possible with the wildcard.  */
223           return FNM_NOMATCH;
224 
225         case L_('['):
226           {
227             /* Nonzero if the sense of the character class is inverted.  */
228             const CHAR *p_init = p;
229             const CHAR *n_init = n;
230             bool not;
231             CHAR cold;
232             UCHAR fn;
233 
234             if (posixly_correct == 0)
235               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
236 
237             if (n == string_end)
238               return FNM_NOMATCH;
239 
240             if (*n == L_('.') && no_leading_period)
241               return FNM_NOMATCH;
242 
243             if (*n == L_('/') && (flags & FNM_FILE_NAME))
244               /* '/' cannot be matched.  */
245               return FNM_NOMATCH;
246 
247             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
248             if (not)
249               ++p;
250 
251             fn = FOLD ((UCHAR) *n);
252 
253             c = *p++;
254             for (;;)
255               {
256                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
257                   {
258                     if (*p == L_('\0'))
259                       return FNM_NOMATCH;
260                     c = FOLD ((UCHAR) *p);
261                     ++p;
262 
263                     goto normal_bracket;
264                   }
265                 else if (c == L_('[') && *p == L_(':'))
266                   {
267                     /* Leave room for the null.  */
268                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
269                     size_t c1 = 0;
270                     wctype_t wt;
271                     const CHAR *startp = p;
272 
273                     for (;;)
274                       {
275                         if (c1 == CHAR_CLASS_MAX_LENGTH)
276                           /* The name is too long and therefore the pattern
277                              is ill-formed.  */
278                           return FNM_NOMATCH;
279 
280                         c = *++p;
281                         if (c == L_(':') && p[1] == L_(']'))
282                           {
283                             p += 2;
284                             break;
285                           }
286                         if (c < L_('a') || c >= L_('z'))
287                           {
288                             /* This cannot possibly be a character class name.
289                                Match it as a normal range.  */
290                             p = startp;
291                             c = L_('[');
292                             goto normal_bracket;
293                           }
294                         str[c1++] = c;
295                       }
296                     str[c1] = L_('\0');
297 
298                     wt = IS_CHAR_CLASS (str);
299                     if (wt == 0)
300                       /* Invalid character class name.  */
301                       return FNM_NOMATCH;
302 
303 #if defined _LIBC && ! WIDE_CHAR_VERSION
304                     /* The following code is glibc specific but does
305                        there a good job in speeding up the code since
306                        we can avoid the btowc() call.  */
307                     if (_ISCTYPE ((UCHAR) *n, wt))
308                       goto matched;
309 #else
310                     if (iswctype (BTOWC ((UCHAR) *n), wt))
311                       goto matched;
312 #endif
313                     c = *p++;
314                   }
315 #ifdef _LIBC
316                 else if (c == L_('[') && *p == L_('='))
317                   {
318                     /* It's important that STR be a scalar variable rather
319                        than a one-element array, because GCC (at least 4.9.2
320                        -O2 on x86-64) can be confused by the array and
321                        diagnose a "used initialized" in a dead branch in the
322                        findidx function.  */
323                     UCHAR str;
324                     uint32_t nrules =
325                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
326                     const CHAR *startp = p;
327 
328                     c = *++p;
329                     if (c == L_('\0'))
330                       {
331                         p = startp;
332                         c = L_('[');
333                         goto normal_bracket;
334                       }
335                     str = c;
336 
337                     c = *++p;
338                     if (c != L_('=') || p[1] != L_(']'))
339                       {
340                         p = startp;
341                         c = L_('[');
342                         goto normal_bracket;
343                       }
344                     p += 2;
345 
346                     if (nrules == 0)
347                       {
348                         if ((UCHAR) *n == str)
349                           goto matched;
350                       }
351                     else
352                       {
353                         const int32_t *table;
354 # if WIDE_CHAR_VERSION
355                         const int32_t *weights;
356                         const wint_t *extra;
357 # else
358                         const unsigned char *weights;
359                         const unsigned char *extra;
360 # endif
361                         const int32_t *indirect;
362                         int32_t idx;
363                         const UCHAR *cp = (const UCHAR *) &str;
364 
365 # if WIDE_CHAR_VERSION
366                         table = (const int32_t *)
367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
368                         weights = (const int32_t *)
369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
370                         extra = (const wint_t *)
371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
372                         indirect = (const int32_t *)
373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
374 # else
375                         table = (const int32_t *)
376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
377                         weights = (const unsigned char *)
378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
379                         extra = (const unsigned char *)
380                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
381                         indirect = (const int32_t *)
382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
383 # endif
384 
385                         idx = FINDIDX (table, indirect, extra, &cp, 1);
386                         if (idx != 0)
387                           {
388                             /* We found a table entry.  Now see whether the
389                                character we are currently at has the same
390                                equivalence class value.  */
391                             int len = weights[idx & 0xffffff];
392                             int32_t idx2;
393                             const UCHAR *np = (const UCHAR *) n;
394 
395                             idx2 = FINDIDX (table, indirect, extra,
396                                             &np, string_end - n);
397                             if (idx2 != 0
398                                 && (idx >> 24) == (idx2 >> 24)
399                                 && len == weights[idx2 & 0xffffff])
400                               {
401                                 int cnt = 0;
402 
403                                 idx &= 0xffffff;
404                                 idx2 &= 0xffffff;
405 
406                                 while (cnt < len
407                                        && (weights[idx + 1 + cnt]
408                                            == weights[idx2 + 1 + cnt]))
409                                   ++cnt;
410 
411                                 if (cnt == len)
412                                   goto matched;
413                               }
414                           }
415                       }
416 
417                     c = *p++;
418                   }
419 #endif
420                 else if (c == L_('\0'))
421                   {
422                     /* [ unterminated, treat as normal character.  */
423                     p = p_init;
424                     n = n_init;
425                     c = L_('[');
426                     goto normal_match;
427                   }
428                 else
429                   {
430                     bool is_range = false;
431 
432 #ifdef _LIBC
433                     bool is_seqval = false;
434 
435                     if (c == L_('[') && *p == L_('.'))
436                       {
437                         uint32_t nrules =
438                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
439                         const CHAR *startp = p;
440                         size_t c1 = 0;
441 
442                         while (1)
443                           {
444                             c = *++p;
445                             if (c == L_('.') && p[1] == L_(']'))
446                               {
447                                 p += 2;
448                                 break;
449                               }
450                             if (c == '\0')
451                               return FNM_NOMATCH;
452                             ++c1;
453                           }
454 
455                         /* We have to handling the symbols differently in
456                            ranges since then the collation sequence is
457                            important.  */
458                         is_range = *p == L_('-') && p[1] != L_('\0');
459 
460                         if (nrules == 0)
461                           {
462                             /* There are no names defined in the collation
463                                data.  Therefore we only accept the trivial
464                                names consisting of the character itself.  */
465                             if (c1 != 1)
466                               return FNM_NOMATCH;
467 
468                             if (!is_range && *n == startp[1])
469                               goto matched;
470 
471                             cold = startp[1];
472                             c = *p++;
473                           }
474                         else
475                           {
476                             int32_t table_size;
477                             const int32_t *symb_table;
478                             const unsigned char *extra;
479                             int32_t idx;
480                             int32_t elem;
481 # if WIDE_CHAR_VERSION
482                             CHAR *wextra;
483 # endif
484 
485                             table_size =
486                               _NL_CURRENT_WORD (LC_COLLATE,
487                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
488                             symb_table = (const int32_t *)
489                               _NL_CURRENT (LC_COLLATE,
490                                            _NL_COLLATE_SYMB_TABLEMB);
491                             extra = (const unsigned char *)
492                               _NL_CURRENT (LC_COLLATE,
493                                            _NL_COLLATE_SYMB_EXTRAMB);
494 
495                             for (elem = 0; elem < table_size; elem++)
496                               if (symb_table[2 * elem] != 0)
497                                 {
498                                   idx = symb_table[2 * elem + 1];
499                                   /* Skip the name of collating element.  */
500                                   idx += 1 + extra[idx];
501 # if WIDE_CHAR_VERSION
502                                   /* Skip the byte sequence of the
503                                      collating element.  */
504                                   idx += 1 + extra[idx];
505                                   /* Adjust for the alignment.  */
506                                   idx = (idx + 3) & ~3;
507 
508                                   wextra = (CHAR *) &extra[idx + 4];
509 
510                                   if (/* Compare the length of the sequence.  */
511                                       c1 == wextra[0]
512                                       /* Compare the wide char sequence.  */
513                                       && (__wmemcmp (startp + 1, &wextra[1],
514                                                      c1)
515                                           == 0))
516                                     /* Yep, this is the entry.  */
517                                     break;
518 # else
519                                   if (/* Compare the length of the sequence.  */
520                                       c1 == extra[idx]
521                                       /* Compare the byte sequence.  */
522                                       && memcmp (startp + 1,
523                                                  &extra[idx + 1], c1) == 0)
524                                     /* Yep, this is the entry.  */
525                                     break;
526 # endif
527                                 }
528 
529                             if (elem < table_size)
530                               {
531                                 /* Compare the byte sequence but only if
532                                    this is not part of a range.  */
533                                 if (! is_range
534 
535 # if WIDE_CHAR_VERSION
536                                     && __wmemcmp (n, &wextra[1], c1) == 0
537 # else
538                                     && memcmp (n, &extra[idx + 1], c1) == 0
539 # endif
540                                     )
541                                   {
542                                     n += c1 - 1;
543                                     goto matched;
544                                   }
545 
546                                 /* Get the collation sequence value.  */
547                                 is_seqval = true;
548 # if WIDE_CHAR_VERSION
549                                 cold = wextra[1 + wextra[0]];
550 # else
551                                 idx += 1 + extra[idx];
552                                 /* Adjust for the alignment.  */
553                                 idx = (idx + 3) & ~3;
554                                 cold = *((int32_t *) &extra[idx]);
555 # endif
556 
557                                 c = *p++;
558                               }
559                             else if (c1 == 1)
560                               {
561                                 /* No valid character.  Match it as a
562                                    single byte.  */
563                                 if (!is_range && *n == startp[1])
564                                   goto matched;
565 
566                                 cold = startp[1];
567                                 c = *p++;
568                               }
569                             else
570                               return FNM_NOMATCH;
571                           }
572                       }
573                     else
574 #endif
575                       {
576                         c = FOLD (c);
577                       normal_bracket:
578 
579                         /* We have to handling the symbols differently in
580                            ranges since then the collation sequence is
581                            important.  */
582                         is_range = (*p == L_('-') && p[1] != L_('\0')
583                                     && p[1] != L_(']'));
584 
585                         if (!is_range && c == fn)
586                           goto matched;
587 
588 #if _LIBC
589                         /* This is needed if we goto normal_bracket; from
590                            outside of is_seqval's scope.  */
591                         is_seqval = false;
592 #endif
593                         cold = c;
594                         c = *p++;
595                       }
596 
597                     if (c == L_('-') && *p != L_(']'))
598                       {
599 #if _LIBC
600                         /* We have to find the collation sequence
601                            value for C.  Collation sequence is nothing
602                            we can regularly access.  The sequence
603                            value is defined by the order in which the
604                            definitions of the collation values for the
605                            various characters appear in the source
606                            file.  A strange concept, nowhere
607                            documented.  */
608                         uint32_t fcollseq;
609                         uint32_t lcollseq;
610                         UCHAR cend = *p++;
611 
612 # if WIDE_CHAR_VERSION
613                         /* Search in the 'names' array for the characters.  */
614                         fcollseq = __collseq_table_lookup (collseq, fn);
615                         if (fcollseq == ~((uint32_t) 0))
616                           /* XXX We don't know anything about the character
617                              we are supposed to match.  This means we are
618                              failing.  */
619                           goto range_not_matched;
620 
621                         if (is_seqval)
622                           lcollseq = cold;
623                         else
624                           lcollseq = __collseq_table_lookup (collseq, cold);
625 # else
626                         fcollseq = collseq[fn];
627                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
628 # endif
629 
630                         is_seqval = false;
631                         if (cend == L_('[') && *p == L_('.'))
632                           {
633                             uint32_t nrules =
634                               _NL_CURRENT_WORD (LC_COLLATE,
635                                                 _NL_COLLATE_NRULES);
636                             const CHAR *startp = p;
637                             size_t c1 = 0;
638 
639                             while (1)
640                               {
641                                 c = *++p;
642                                 if (c == L_('.') && p[1] == L_(']'))
643                                   {
644                                     p += 2;
645                                     break;
646                                   }
647                                 if (c == '\0')
648                                   return FNM_NOMATCH;
649                                 ++c1;
650                               }
651 
652                             if (nrules == 0)
653                               {
654                                 /* There are no names defined in the
655                                    collation data.  Therefore we only
656                                    accept the trivial names consisting
657                                    of the character itself.  */
658                                 if (c1 != 1)
659                                   return FNM_NOMATCH;
660 
661                                 cend = startp[1];
662                               }
663                             else
664                               {
665                                 int32_t table_size;
666                                 const int32_t *symb_table;
667                                 const unsigned char *extra;
668                                 int32_t idx;
669                                 int32_t elem;
670 # if WIDE_CHAR_VERSION
671                                 CHAR *wextra;
672 # endif
673 
674                                 table_size =
675                                   _NL_CURRENT_WORD (LC_COLLATE,
676                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
677                                 symb_table = (const int32_t *)
678                                   _NL_CURRENT (LC_COLLATE,
679                                                _NL_COLLATE_SYMB_TABLEMB);
680                                 extra = (const unsigned char *)
681                                   _NL_CURRENT (LC_COLLATE,
682                                                _NL_COLLATE_SYMB_EXTRAMB);
683 
684                                 for (elem = 0; elem < table_size; elem++)
685                                   if (symb_table[2 * elem] != 0)
686                                     {
687                                       idx = symb_table[2 * elem + 1];
688                                       /* Skip the name of collating
689                                          element.  */
690                                       idx += 1 + extra[idx];
691 # if WIDE_CHAR_VERSION
692                                       /* Skip the byte sequence of the
693                                          collating element.  */
694                                       idx += 1 + extra[idx];
695                                       /* Adjust for the alignment.  */
696                                       idx = (idx + 3) & ~3;
697 
698                                       wextra = (CHAR *) &extra[idx + 4];
699 
700                                       if (/* Compare the length of the
701                                              sequence.  */
702                                           c1 == wextra[0]
703                                           /* Compare the wide char sequence.  */
704                                           && (__wmemcmp (startp + 1,
705                                                          &wextra[1], c1)
706                                               == 0))
707                                         /* Yep, this is the entry.  */
708                                         break;
709 # else
710                                       if (/* Compare the length of the
711                                              sequence.  */
712                                           c1 == extra[idx]
713                                           /* Compare the byte sequence.  */
714                                           && memcmp (startp + 1,
715                                                      &extra[idx + 1], c1) == 0)
716                                         /* Yep, this is the entry.  */
717                                         break;
718 # endif
719                                     }
720 
721                                 if (elem < table_size)
722                                   {
723                                     /* Get the collation sequence value.  */
724                                     is_seqval = true;
725 # if WIDE_CHAR_VERSION
726                                     cend = wextra[1 + wextra[0]];
727 # else
728                                     idx += 1 + extra[idx];
729                                     /* Adjust for the alignment.  */
730                                     idx = (idx + 3) & ~3;
731                                     cend = *((int32_t *) &extra[idx]);
732 # endif
733                                   }
734                                 else if (c1 == 1)
735                                   {
736                                     cend = startp[1];
737                                     c = *p++;
738                                   }
739                                 else
740                                   return FNM_NOMATCH;
741                               }
742                           }
743                         else
744                           {
745                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
746                               cend = *p++;
747                             if (cend == L_('\0'))
748                               return FNM_NOMATCH;
749                             cend = FOLD (cend);
750                           }
751 
752                         /* XXX It is not entirely clear to me how to handle
753                            characters which are not mentioned in the
754                            collation specification.  */
755                         if (
756 # if WIDE_CHAR_VERSION
757                             lcollseq == 0xffffffff ||
758 # endif
759                             lcollseq <= fcollseq)
760                           {
761                             /* We have to look at the upper bound.  */
762                             uint32_t hcollseq;
763 
764                             if (is_seqval)
765                               hcollseq = cend;
766                             else
767                               {
768 # if WIDE_CHAR_VERSION
769                                 hcollseq =
770                                   __collseq_table_lookup (collseq, cend);
771                                 if (hcollseq == ~((uint32_t) 0))
772                                   {
773                                     /* Hum, no information about the upper
774                                        bound.  The matching succeeds if the
775                                        lower bound is matched exactly.  */
776                                     if (lcollseq != fcollseq)
777                                       goto range_not_matched;
778 
779                                     goto matched;
780                                   }
781 # else
782                                 hcollseq = collseq[cend];
783 # endif
784                               }
785 
786                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
787                               goto matched;
788                           }
789 # if WIDE_CHAR_VERSION
790                       range_not_matched:
791 # endif
792 #else
793                         /* We use a boring value comparison of the character
794                            values.  This is better than comparing using
795                            'strcoll' since the latter would have surprising
796                            and sometimes fatal consequences.  */
797                         UCHAR cend = *p++;
798 
799                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
800                           cend = *p++;
801                         if (cend == L_('\0'))
802                           return FNM_NOMATCH;
803 
804                         /* It is a range.  */
805                         if ((UCHAR) cold <= fn && fn <= cend)
806                           goto matched;
807 #endif
808 
809                         c = *p++;
810                       }
811                   }
812 
813                 if (c == L_(']'))
814                   break;
815               }
816 
817             if (!not)
818               return FNM_NOMATCH;
819             break;
820 
821           matched:
822             /* Skip the rest of the [...] that already matched.  */
823             while ((c = *p++) != L_(']'))
824               {
825                 if (c == L_('\0'))
826 		  {
827 		    /* [ unterminated, treat as normal character.  */
828 		    p = p_init;
829 		    n = n_init;
830 		    c = L_('[');
831 		    goto normal_match;
832 		  }
833 
834                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
835                   {
836                     if (*p == L_('\0'))
837                       return FNM_NOMATCH;
838                     /* XXX 1003.2d11 is unclear if this is right.  */
839                     ++p;
840                   }
841                 else if (c == L_('[') && *p == L_(':'))
842                   {
843                     int c1 = 0;
844                     const CHAR *startp = p;
845 
846                     while (1)
847                       {
848                         c = *++p;
849                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
850                           return FNM_NOMATCH;
851 
852                         if (*p == L_(':') && p[1] == L_(']'))
853                           break;
854 
855                         if (c < L_('a') || c >= L_('z'))
856                           {
857                             p = startp - 2;
858                             break;
859                           }
860                       }
861                     p += 2;
862                   }
863                 else if (c == L_('[') && *p == L_('='))
864                   {
865                     c = *++p;
866                     if (c == L_('\0'))
867                       return FNM_NOMATCH;
868                     c = *++p;
869                     if (c != L_('=') || p[1] != L_(']'))
870                       return FNM_NOMATCH;
871                     p += 2;
872                   }
873                 else if (c == L_('[') && *p == L_('.'))
874                   {
875                     while (1)
876                       {
877                         c = *++p;
878                         if (c == L_('\0'))
879                           return FNM_NOMATCH;
880 
881                         if (c == L_('.') && p[1] == L_(']'))
882                           break;
883                       }
884                     p += 2;
885                   }
886               }
887             if (not)
888               return FNM_NOMATCH;
889           }
890           break;
891 
892         case L_('+'):
893         case L_('@'):
894         case L_('!'):
895           if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
896             {
897               int res = EXT (c, p, n, string_end, no_leading_period, flags);
898               if (res != -1)
899                 return res;
900             }
901           goto normal_match;
902 
903         case L_('/'):
904           if (NO_LEADING_PERIOD (flags))
905             {
906               if (n == string_end || c != (UCHAR) *n)
907                 return FNM_NOMATCH;
908 
909               new_no_leading_period = true;
910               break;
911             }
912           FALLTHROUGH;
913         default:
914         normal_match:
915           if (n == string_end || c != FOLD ((UCHAR) *n))
916             return FNM_NOMATCH;
917         }
918 
919       no_leading_period = new_no_leading_period;
920       ++n;
921     }
922 
923   if (n == string_end)
924     return 0;
925 
926   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
927     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
928     return 0;
929 
930   return FNM_NOMATCH;
931 }
932 
933 
934 static const CHAR *
END(const CHAR * pattern)935 END (const CHAR *pattern)
936 {
937   const CHAR *p = pattern;
938 
939   while (1)
940     if (*++p == L_('\0'))
941       /* This is an invalid pattern.  */
942       return pattern;
943     else if (*p == L_('['))
944       {
945         /* Handle brackets special.  */
946         if (posixly_correct == 0)
947           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
948 
949         /* Skip the not sign.  We have to recognize it because of a possibly
950            following ']'.  */
951         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
952           ++p;
953         /* A leading ']' is recognized as such.  */
954         if (*p == L_(']'))
955           ++p;
956         /* Skip over all characters of the list.  */
957         while (*p != L_(']'))
958           if (*p++ == L_('\0'))
959             /* This is no valid pattern.  */
960             return pattern;
961       }
962     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
963               || *p == L_('!')) && p[1] == L_('('))
964       {
965         p = END (p + 1);
966         if (*p == L_('\0'))
967           /* This is an invalid pattern.  */
968           return pattern;
969       }
970     else if (*p == L_(')'))
971       break;
972 
973   return p + 1;
974 }
975 
976 #if WIDE_CHAR_VERSION
977 # define PATTERN_PREFIX pattern_list
978 #else
979 # define PATTERN_PREFIX wpattern_list
980 #endif
981 
982 #define PASTE(a,b)                 PASTE1(a,b)
983 #define PASTE1(a,b)                a##b
984 
985 #define DYNARRAY_STRUCT            PATTERN_PREFIX
986 #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
987 #define DYNARRAY_ELEMENT           CHAR *
988 #define DYNARRAY_PREFIX            PASTE(PATTERN_PREFIX,_)
989 #define DYNARRAY_INITIAL_SIZE      8
990 #include <malloc/dynarray-skeleton.c>
991 
992 static int
EXT(INT opt,const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)993 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
994      bool no_leading_period, int flags)
995 {
996   const CHAR *startp;
997   ptrdiff_t level;
998   struct PATTERN_PREFIX list;
999   size_t pattern_len = STRLEN (pattern);
1000   size_t pattern_i = 0;
1001   const CHAR *p;
1002   const CHAR *rs;
1003   int retval = 0;
1004 
1005   PASTE (PATTERN_PREFIX, _init) (&list);
1006 
1007   /* Parse the pattern.  Store the individual parts in the list.  */
1008   level = 0;
1009   for (startp = p = pattern + 1; level >= 0; ++p)
1010     if (*p == L_('\0'))
1011       {
1012         /* This is an invalid pattern.  */
1013         retval = -1;
1014         goto out;
1015       }
1016     else if (*p == L_('['))
1017       {
1018         /* Handle brackets special.  */
1019         if (posixly_correct == 0)
1020           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1021 
1022         /* Skip the not sign.  We have to recognize it because of a possibly
1023            following ']'.  */
1024         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1025           ++p;
1026         /* A leading ']' is recognized as such.  */
1027         if (*p == L_(']'))
1028           ++p;
1029         /* Skip over all characters of the list.  */
1030         while (*p != L_(']'))
1031           if (*p++ == L_('\0'))
1032             {
1033               /* This is no valid pattern.  */
1034               retval = -1;
1035               goto out;
1036             }
1037       }
1038     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1039               || *p == L_('!')) && p[1] == L_('('))
1040       /* Remember the nesting level.  */
1041       ++level;
1042     else if (*p == L_(')') || *p == L_('|'))
1043       {
1044         if (level == 0)
1045           {
1046             size_t slen = opt == L_('?') || opt == L_('@')
1047 			  ? pattern_len : p - startp + 1;
1048             CHAR *newp = malloc (slen * sizeof (CHAR));
1049             if (newp != NULL)
1050               {
1051                 *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
1052                 PASTE (PATTERN_PREFIX,_add) (&list, newp);
1053               }
1054             if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
1055               {
1056                 retval = -2;
1057                 goto out;
1058               }
1059 
1060             if (*p == L_('|'))
1061               startp = p + 1;
1062           }
1063         if (*p == L_(')'))
1064 	  level--;
1065       }
1066   assert (p[-1] == L_(')'));
1067 
1068   switch (opt)
1069     {
1070     case L_('*'):
1071       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1072         goto success;
1073       FALLTHROUGH;
1074     case L_('+'):
1075       for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1076         {
1077           for (rs = string; rs <= string_end; ++rs)
1078             /* First match the prefix with the current pattern with the
1079                current pattern.  */
1080             if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1081                      rs, no_leading_period,
1082                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1083                      NULL) == 0
1084                 /* This was successful.  Now match the rest with the rest
1085                    of the pattern.  */
1086                 && (FCT (p, rs, string_end,
1087                          rs == string
1088                          ? no_leading_period
1089                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1090                          flags & FNM_FILE_NAME
1091                          ? flags : flags & ~FNM_PERIOD, NULL) == 0
1092                     /* This didn't work.  Try the whole pattern.  */
1093                     || (rs != string
1094                         && FCT (pattern - 1, rs, string_end,
1095                                 rs == string
1096                                 ? no_leading_period
1097                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1098                                 flags & FNM_FILE_NAME
1099                                 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1100               /* It worked.  Signal success.  */
1101               goto success;
1102         }
1103 
1104       /* None of the patterns lead to a match.  */
1105       retval = FNM_NOMATCH;
1106       break;
1107 
1108     case L_('?'):
1109       if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1110         goto success;
1111       FALLTHROUGH;
1112     case L_('@'):
1113       for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1114         {
1115           /* I cannot believe it but `strcat' is actually acceptable
1116              here.  Match the entire string with the prefix from the
1117              pattern list and the rest of the pattern following the
1118              pattern list.  */
1119           if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1120                    string, string_end, no_leading_period,
1121                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1122                    NULL) == 0)
1123             /* It worked.  Signal success.  */
1124             goto success;
1125         }
1126 
1127       /* None of the patterns lead to a match.  */
1128       retval = FNM_NOMATCH;
1129       break;
1130 
1131     case L_('!'):
1132       for (rs = string; rs <= string_end; ++rs)
1133         {
1134 	  size_t runp_i;
1135 
1136           for (runp_i = pattern_i;
1137                runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1138                runp_i++)
1139             {
1140               if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1141                        no_leading_period,
1142                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1143                        NULL) == 0)
1144               break;
1145             }
1146 
1147           /* If none of the patterns matched see whether the rest does.  */
1148           if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1149               && (FCT (p, rs, string_end,
1150                        rs == string
1151                        ? no_leading_period
1152                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1153                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1154                        NULL) == 0))
1155             /* This is successful.  */
1156             goto success;
1157         }
1158 
1159       /* None of the patterns together with the rest of the pattern
1160          lead to a match.  */
1161       retval = FNM_NOMATCH;
1162       break;
1163 
1164     default:
1165       assert (! "Invalid extended matching operator");
1166       retval = -1;
1167       break;
1168     }
1169 
1170  success:
1171  out:
1172   PASTE (PATTERN_PREFIX, _free) (&list);
1173 
1174   return retval;
1175 }
1176 
1177 #undef PATTERN_PREFIX
1178 #undef PASTE
1179 #undef PASTE1
1180 
1181 #undef FOLD
1182 #undef CHAR
1183 #undef UCHAR
1184 #undef INT
1185 #undef FCT
1186 #undef EXT
1187 #undef END
1188 #undef STRUCT
1189 #undef MEMPCPY
1190 #undef MEMCHR
1191 #undef STRLEN
1192 #undef STRCAT
1193 #undef L_
1194 #undef BTOWC
1195 #undef WIDE_CHAR_VERSION
1196 #undef FINDIDX
1197