1 /* Copyright (C) 1991-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18 #ifdef _LIBC
19 # include <stdint.h>
20 #endif
21
22 struct STRUCT
23 {
24 const CHAR *pattern;
25 const CHAR *string;
26 bool no_leading_period;
27 };
28
29 /* Match STRING against the file name pattern PATTERN, returning zero if
30 it matches, nonzero if not. */
31 static int FCT (const CHAR *pattern, const CHAR *string,
32 const CHAR *string_end, bool no_leading_period, int flags,
33 struct STRUCT *ends);
34 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
35 const CHAR *string_end, bool no_leading_period, int flags);
36 static const CHAR *END (const CHAR *patternp);
37
38 static int
FCT(const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags,struct STRUCT * ends)39 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
40 bool no_leading_period, int flags, struct STRUCT *ends)
41 {
42 const CHAR *p = pattern, *n = string;
43 UCHAR c;
44 #ifdef _LIBC
45 # if WIDE_CHAR_VERSION
46 const char *collseq = (const char *)
47 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
48 # else
49 const UCHAR *collseq = (const UCHAR *)
50 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
51 # endif
52 #endif
53
54 while ((c = *p++) != L_('\0'))
55 {
56 bool new_no_leading_period = false;
57 c = FOLD (c);
58
59 switch (c)
60 {
61 case L_('?'):
62 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
63 {
64 int res = EXT (c, p, n, string_end, no_leading_period, flags);
65 if (res != -1)
66 return res;
67 }
68
69 if (n == string_end)
70 return FNM_NOMATCH;
71 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
72 return FNM_NOMATCH;
73 else if (*n == L_('.') && no_leading_period)
74 return FNM_NOMATCH;
75 break;
76
77 case L_('\\'):
78 if (!(flags & FNM_NOESCAPE))
79 {
80 c = *p++;
81 if (c == L_('\0'))
82 /* Trailing \ loses. */
83 return FNM_NOMATCH;
84 c = FOLD (c);
85 }
86 if (n == string_end || FOLD ((UCHAR) *n) != c)
87 return FNM_NOMATCH;
88 break;
89
90 case L_('*'):
91 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
92 {
93 int res = EXT (c, p, n, string_end, no_leading_period, flags);
94 if (res != -1)
95 return res;
96 }
97 else if (ends != NULL)
98 {
99 ends->pattern = p - 1;
100 ends->string = n;
101 ends->no_leading_period = no_leading_period;
102 return 0;
103 }
104
105 if (n != string_end && *n == L_('.') && no_leading_period)
106 return FNM_NOMATCH;
107
108 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
109 {
110 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
111 {
112 const CHAR *endp = END (p);
113 if (endp != p)
114 {
115 /* This is a pattern. Skip over it. */
116 p = endp;
117 continue;
118 }
119 }
120
121 if (c == L_('?'))
122 {
123 /* A ? needs to match one character. */
124 if (n == string_end)
125 /* There isn't another character; no match. */
126 return FNM_NOMATCH;
127 else if (*n == L_('/')
128 && __glibc_unlikely (flags & FNM_FILE_NAME))
129 /* A slash does not match a wildcard under
130 FNM_FILE_NAME. */
131 return FNM_NOMATCH;
132 else
133 /* One character of the string is consumed in matching
134 this ? wildcard, so *??? won't match if there are
135 less than three characters. */
136 ++n;
137 }
138 }
139
140 if (c == L_('\0'))
141 /* The wildcard(s) is/are the last element of the pattern.
142 If the name is a file name and contains another slash
143 this means it cannot match, unless the FNM_LEADING_DIR
144 flag is set. */
145 {
146 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
147
148 if (flags & FNM_FILE_NAME)
149 {
150 if (flags & FNM_LEADING_DIR)
151 result = 0;
152 else
153 {
154 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
155 result = 0;
156 }
157 }
158
159 return result;
160 }
161 else
162 {
163 const CHAR *endp;
164 struct STRUCT end;
165
166 end.pattern = NULL;
167 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
168 string_end - n);
169 if (endp == NULL)
170 endp = string_end;
171
172 if (c == L_('[')
173 || (__glibc_unlikely (flags & FNM_EXTMATCH)
174 && (c == L_('@') || c == L_('+') || c == L_('!'))
175 && *p == L_('(')))
176 {
177 int flags2 = ((flags & FNM_FILE_NAME)
178 ? flags : (flags & ~FNM_PERIOD));
179
180 for (--p; n < endp; ++n, no_leading_period = false)
181 if (FCT (p, n, string_end, no_leading_period, flags2,
182 &end) == 0)
183 goto found;
184 }
185 else if (c == L_('/') && (flags & FNM_FILE_NAME))
186 {
187 while (n < string_end && *n != L_('/'))
188 ++n;
189 if (n < string_end && *n == L_('/')
190 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
191 NULL) == 0))
192 return 0;
193 }
194 else
195 {
196 int flags2 = ((flags & FNM_FILE_NAME)
197 ? flags : (flags & ~FNM_PERIOD));
198
199 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
200 c = *p;
201 c = FOLD (c);
202 for (--p; n < endp; ++n, no_leading_period = false)
203 if (FOLD ((UCHAR) *n) == c
204 && (FCT (p, n, string_end, no_leading_period, flags2,
205 &end) == 0))
206 {
207 found:
208 if (end.pattern == NULL)
209 return 0;
210 break;
211 }
212 if (end.pattern != NULL)
213 {
214 p = end.pattern;
215 n = end.string;
216 no_leading_period = end.no_leading_period;
217 continue;
218 }
219 }
220 }
221
222 /* If we come here no match is possible with the wildcard. */
223 return FNM_NOMATCH;
224
225 case L_('['):
226 {
227 /* Nonzero if the sense of the character class is inverted. */
228 const CHAR *p_init = p;
229 const CHAR *n_init = n;
230 bool not;
231 CHAR cold;
232 UCHAR fn;
233
234 if (posixly_correct == 0)
235 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
236
237 if (n == string_end)
238 return FNM_NOMATCH;
239
240 if (*n == L_('.') && no_leading_period)
241 return FNM_NOMATCH;
242
243 if (*n == L_('/') && (flags & FNM_FILE_NAME))
244 /* '/' cannot be matched. */
245 return FNM_NOMATCH;
246
247 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
248 if (not)
249 ++p;
250
251 fn = FOLD ((UCHAR) *n);
252
253 c = *p++;
254 for (;;)
255 {
256 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
257 {
258 if (*p == L_('\0'))
259 return FNM_NOMATCH;
260 c = FOLD ((UCHAR) *p);
261 ++p;
262
263 goto normal_bracket;
264 }
265 else if (c == L_('[') && *p == L_(':'))
266 {
267 /* Leave room for the null. */
268 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
269 size_t c1 = 0;
270 wctype_t wt;
271 const CHAR *startp = p;
272
273 for (;;)
274 {
275 if (c1 == CHAR_CLASS_MAX_LENGTH)
276 /* The name is too long and therefore the pattern
277 is ill-formed. */
278 return FNM_NOMATCH;
279
280 c = *++p;
281 if (c == L_(':') && p[1] == L_(']'))
282 {
283 p += 2;
284 break;
285 }
286 if (c < L_('a') || c >= L_('z'))
287 {
288 /* This cannot possibly be a character class name.
289 Match it as a normal range. */
290 p = startp;
291 c = L_('[');
292 goto normal_bracket;
293 }
294 str[c1++] = c;
295 }
296 str[c1] = L_('\0');
297
298 wt = IS_CHAR_CLASS (str);
299 if (wt == 0)
300 /* Invalid character class name. */
301 return FNM_NOMATCH;
302
303 #if defined _LIBC && ! WIDE_CHAR_VERSION
304 /* The following code is glibc specific but does
305 there a good job in speeding up the code since
306 we can avoid the btowc() call. */
307 if (_ISCTYPE ((UCHAR) *n, wt))
308 goto matched;
309 #else
310 if (iswctype (BTOWC ((UCHAR) *n), wt))
311 goto matched;
312 #endif
313 c = *p++;
314 }
315 #ifdef _LIBC
316 else if (c == L_('[') && *p == L_('='))
317 {
318 /* It's important that STR be a scalar variable rather
319 than a one-element array, because GCC (at least 4.9.2
320 -O2 on x86-64) can be confused by the array and
321 diagnose a "used initialized" in a dead branch in the
322 findidx function. */
323 UCHAR str;
324 uint32_t nrules =
325 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
326 const CHAR *startp = p;
327
328 c = *++p;
329 if (c == L_('\0'))
330 {
331 p = startp;
332 c = L_('[');
333 goto normal_bracket;
334 }
335 str = c;
336
337 c = *++p;
338 if (c != L_('=') || p[1] != L_(']'))
339 {
340 p = startp;
341 c = L_('[');
342 goto normal_bracket;
343 }
344 p += 2;
345
346 if (nrules == 0)
347 {
348 if ((UCHAR) *n == str)
349 goto matched;
350 }
351 else
352 {
353 const int32_t *table;
354 # if WIDE_CHAR_VERSION
355 const int32_t *weights;
356 const wint_t *extra;
357 # else
358 const unsigned char *weights;
359 const unsigned char *extra;
360 # endif
361 const int32_t *indirect;
362 int32_t idx;
363 const UCHAR *cp = (const UCHAR *) &str;
364
365 # if WIDE_CHAR_VERSION
366 table = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
368 weights = (const int32_t *)
369 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
370 extra = (const wint_t *)
371 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
372 indirect = (const int32_t *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
374 # else
375 table = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
377 weights = (const unsigned char *)
378 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
379 extra = (const unsigned char *)
380 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
381 indirect = (const int32_t *)
382 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
383 # endif
384
385 idx = FINDIDX (table, indirect, extra, &cp, 1);
386 if (idx != 0)
387 {
388 /* We found a table entry. Now see whether the
389 character we are currently at has the same
390 equivalence class value. */
391 int len = weights[idx & 0xffffff];
392 int32_t idx2;
393 const UCHAR *np = (const UCHAR *) n;
394
395 idx2 = FINDIDX (table, indirect, extra,
396 &np, string_end - n);
397 if (idx2 != 0
398 && (idx >> 24) == (idx2 >> 24)
399 && len == weights[idx2 & 0xffffff])
400 {
401 int cnt = 0;
402
403 idx &= 0xffffff;
404 idx2 &= 0xffffff;
405
406 while (cnt < len
407 && (weights[idx + 1 + cnt]
408 == weights[idx2 + 1 + cnt]))
409 ++cnt;
410
411 if (cnt == len)
412 goto matched;
413 }
414 }
415 }
416
417 c = *p++;
418 }
419 #endif
420 else if (c == L_('\0'))
421 {
422 /* [ unterminated, treat as normal character. */
423 p = p_init;
424 n = n_init;
425 c = L_('[');
426 goto normal_match;
427 }
428 else
429 {
430 bool is_range = false;
431
432 #ifdef _LIBC
433 bool is_seqval = false;
434
435 if (c == L_('[') && *p == L_('.'))
436 {
437 uint32_t nrules =
438 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
439 const CHAR *startp = p;
440 size_t c1 = 0;
441
442 while (1)
443 {
444 c = *++p;
445 if (c == L_('.') && p[1] == L_(']'))
446 {
447 p += 2;
448 break;
449 }
450 if (c == '\0')
451 return FNM_NOMATCH;
452 ++c1;
453 }
454
455 /* We have to handling the symbols differently in
456 ranges since then the collation sequence is
457 important. */
458 is_range = *p == L_('-') && p[1] != L_('\0');
459
460 if (nrules == 0)
461 {
462 /* There are no names defined in the collation
463 data. Therefore we only accept the trivial
464 names consisting of the character itself. */
465 if (c1 != 1)
466 return FNM_NOMATCH;
467
468 if (!is_range && *n == startp[1])
469 goto matched;
470
471 cold = startp[1];
472 c = *p++;
473 }
474 else
475 {
476 int32_t table_size;
477 const int32_t *symb_table;
478 const unsigned char *extra;
479 int32_t idx;
480 int32_t elem;
481 # if WIDE_CHAR_VERSION
482 CHAR *wextra;
483 # endif
484
485 table_size =
486 _NL_CURRENT_WORD (LC_COLLATE,
487 _NL_COLLATE_SYMB_HASH_SIZEMB);
488 symb_table = (const int32_t *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_TABLEMB);
491 extra = (const unsigned char *)
492 _NL_CURRENT (LC_COLLATE,
493 _NL_COLLATE_SYMB_EXTRAMB);
494
495 for (elem = 0; elem < table_size; elem++)
496 if (symb_table[2 * elem] != 0)
497 {
498 idx = symb_table[2 * elem + 1];
499 /* Skip the name of collating element. */
500 idx += 1 + extra[idx];
501 # if WIDE_CHAR_VERSION
502 /* Skip the byte sequence of the
503 collating element. */
504 idx += 1 + extra[idx];
505 /* Adjust for the alignment. */
506 idx = (idx + 3) & ~3;
507
508 wextra = (CHAR *) &extra[idx + 4];
509
510 if (/* Compare the length of the sequence. */
511 c1 == wextra[0]
512 /* Compare the wide char sequence. */
513 && (__wmemcmp (startp + 1, &wextra[1],
514 c1)
515 == 0))
516 /* Yep, this is the entry. */
517 break;
518 # else
519 if (/* Compare the length of the sequence. */
520 c1 == extra[idx]
521 /* Compare the byte sequence. */
522 && memcmp (startp + 1,
523 &extra[idx + 1], c1) == 0)
524 /* Yep, this is the entry. */
525 break;
526 # endif
527 }
528
529 if (elem < table_size)
530 {
531 /* Compare the byte sequence but only if
532 this is not part of a range. */
533 if (! is_range
534
535 # if WIDE_CHAR_VERSION
536 && __wmemcmp (n, &wextra[1], c1) == 0
537 # else
538 && memcmp (n, &extra[idx + 1], c1) == 0
539 # endif
540 )
541 {
542 n += c1 - 1;
543 goto matched;
544 }
545
546 /* Get the collation sequence value. */
547 is_seqval = true;
548 # if WIDE_CHAR_VERSION
549 cold = wextra[1 + wextra[0]];
550 # else
551 idx += 1 + extra[idx];
552 /* Adjust for the alignment. */
553 idx = (idx + 3) & ~3;
554 cold = *((int32_t *) &extra[idx]);
555 # endif
556
557 c = *p++;
558 }
559 else if (c1 == 1)
560 {
561 /* No valid character. Match it as a
562 single byte. */
563 if (!is_range && *n == startp[1])
564 goto matched;
565
566 cold = startp[1];
567 c = *p++;
568 }
569 else
570 return FNM_NOMATCH;
571 }
572 }
573 else
574 #endif
575 {
576 c = FOLD (c);
577 normal_bracket:
578
579 /* We have to handling the symbols differently in
580 ranges since then the collation sequence is
581 important. */
582 is_range = (*p == L_('-') && p[1] != L_('\0')
583 && p[1] != L_(']'));
584
585 if (!is_range && c == fn)
586 goto matched;
587
588 #if _LIBC
589 /* This is needed if we goto normal_bracket; from
590 outside of is_seqval's scope. */
591 is_seqval = false;
592 #endif
593 cold = c;
594 c = *p++;
595 }
596
597 if (c == L_('-') && *p != L_(']'))
598 {
599 #if _LIBC
600 /* We have to find the collation sequence
601 value for C. Collation sequence is nothing
602 we can regularly access. The sequence
603 value is defined by the order in which the
604 definitions of the collation values for the
605 various characters appear in the source
606 file. A strange concept, nowhere
607 documented. */
608 uint32_t fcollseq;
609 uint32_t lcollseq;
610 UCHAR cend = *p++;
611
612 # if WIDE_CHAR_VERSION
613 /* Search in the 'names' array for the characters. */
614 fcollseq = __collseq_table_lookup (collseq, fn);
615 if (fcollseq == ~((uint32_t) 0))
616 /* XXX We don't know anything about the character
617 we are supposed to match. This means we are
618 failing. */
619 goto range_not_matched;
620
621 if (is_seqval)
622 lcollseq = cold;
623 else
624 lcollseq = __collseq_table_lookup (collseq, cold);
625 # else
626 fcollseq = collseq[fn];
627 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
628 # endif
629
630 is_seqval = false;
631 if (cend == L_('[') && *p == L_('.'))
632 {
633 uint32_t nrules =
634 _NL_CURRENT_WORD (LC_COLLATE,
635 _NL_COLLATE_NRULES);
636 const CHAR *startp = p;
637 size_t c1 = 0;
638
639 while (1)
640 {
641 c = *++p;
642 if (c == L_('.') && p[1] == L_(']'))
643 {
644 p += 2;
645 break;
646 }
647 if (c == '\0')
648 return FNM_NOMATCH;
649 ++c1;
650 }
651
652 if (nrules == 0)
653 {
654 /* There are no names defined in the
655 collation data. Therefore we only
656 accept the trivial names consisting
657 of the character itself. */
658 if (c1 != 1)
659 return FNM_NOMATCH;
660
661 cend = startp[1];
662 }
663 else
664 {
665 int32_t table_size;
666 const int32_t *symb_table;
667 const unsigned char *extra;
668 int32_t idx;
669 int32_t elem;
670 # if WIDE_CHAR_VERSION
671 CHAR *wextra;
672 # endif
673
674 table_size =
675 _NL_CURRENT_WORD (LC_COLLATE,
676 _NL_COLLATE_SYMB_HASH_SIZEMB);
677 symb_table = (const int32_t *)
678 _NL_CURRENT (LC_COLLATE,
679 _NL_COLLATE_SYMB_TABLEMB);
680 extra = (const unsigned char *)
681 _NL_CURRENT (LC_COLLATE,
682 _NL_COLLATE_SYMB_EXTRAMB);
683
684 for (elem = 0; elem < table_size; elem++)
685 if (symb_table[2 * elem] != 0)
686 {
687 idx = symb_table[2 * elem + 1];
688 /* Skip the name of collating
689 element. */
690 idx += 1 + extra[idx];
691 # if WIDE_CHAR_VERSION
692 /* Skip the byte sequence of the
693 collating element. */
694 idx += 1 + extra[idx];
695 /* Adjust for the alignment. */
696 idx = (idx + 3) & ~3;
697
698 wextra = (CHAR *) &extra[idx + 4];
699
700 if (/* Compare the length of the
701 sequence. */
702 c1 == wextra[0]
703 /* Compare the wide char sequence. */
704 && (__wmemcmp (startp + 1,
705 &wextra[1], c1)
706 == 0))
707 /* Yep, this is the entry. */
708 break;
709 # else
710 if (/* Compare the length of the
711 sequence. */
712 c1 == extra[idx]
713 /* Compare the byte sequence. */
714 && memcmp (startp + 1,
715 &extra[idx + 1], c1) == 0)
716 /* Yep, this is the entry. */
717 break;
718 # endif
719 }
720
721 if (elem < table_size)
722 {
723 /* Get the collation sequence value. */
724 is_seqval = true;
725 # if WIDE_CHAR_VERSION
726 cend = wextra[1 + wextra[0]];
727 # else
728 idx += 1 + extra[idx];
729 /* Adjust for the alignment. */
730 idx = (idx + 3) & ~3;
731 cend = *((int32_t *) &extra[idx]);
732 # endif
733 }
734 else if (c1 == 1)
735 {
736 cend = startp[1];
737 c = *p++;
738 }
739 else
740 return FNM_NOMATCH;
741 }
742 }
743 else
744 {
745 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
746 cend = *p++;
747 if (cend == L_('\0'))
748 return FNM_NOMATCH;
749 cend = FOLD (cend);
750 }
751
752 /* XXX It is not entirely clear to me how to handle
753 characters which are not mentioned in the
754 collation specification. */
755 if (
756 # if WIDE_CHAR_VERSION
757 lcollseq == 0xffffffff ||
758 # endif
759 lcollseq <= fcollseq)
760 {
761 /* We have to look at the upper bound. */
762 uint32_t hcollseq;
763
764 if (is_seqval)
765 hcollseq = cend;
766 else
767 {
768 # if WIDE_CHAR_VERSION
769 hcollseq =
770 __collseq_table_lookup (collseq, cend);
771 if (hcollseq == ~((uint32_t) 0))
772 {
773 /* Hum, no information about the upper
774 bound. The matching succeeds if the
775 lower bound is matched exactly. */
776 if (lcollseq != fcollseq)
777 goto range_not_matched;
778
779 goto matched;
780 }
781 # else
782 hcollseq = collseq[cend];
783 # endif
784 }
785
786 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
787 goto matched;
788 }
789 # if WIDE_CHAR_VERSION
790 range_not_matched:
791 # endif
792 #else
793 /* We use a boring value comparison of the character
794 values. This is better than comparing using
795 'strcoll' since the latter would have surprising
796 and sometimes fatal consequences. */
797 UCHAR cend = *p++;
798
799 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
800 cend = *p++;
801 if (cend == L_('\0'))
802 return FNM_NOMATCH;
803
804 /* It is a range. */
805 if ((UCHAR) cold <= fn && fn <= cend)
806 goto matched;
807 #endif
808
809 c = *p++;
810 }
811 }
812
813 if (c == L_(']'))
814 break;
815 }
816
817 if (!not)
818 return FNM_NOMATCH;
819 break;
820
821 matched:
822 /* Skip the rest of the [...] that already matched. */
823 while ((c = *p++) != L_(']'))
824 {
825 if (c == L_('\0'))
826 {
827 /* [ unterminated, treat as normal character. */
828 p = p_init;
829 n = n_init;
830 c = L_('[');
831 goto normal_match;
832 }
833
834 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
835 {
836 if (*p == L_('\0'))
837 return FNM_NOMATCH;
838 /* XXX 1003.2d11 is unclear if this is right. */
839 ++p;
840 }
841 else if (c == L_('[') && *p == L_(':'))
842 {
843 int c1 = 0;
844 const CHAR *startp = p;
845
846 while (1)
847 {
848 c = *++p;
849 if (++c1 == CHAR_CLASS_MAX_LENGTH)
850 return FNM_NOMATCH;
851
852 if (*p == L_(':') && p[1] == L_(']'))
853 break;
854
855 if (c < L_('a') || c >= L_('z'))
856 {
857 p = startp - 2;
858 break;
859 }
860 }
861 p += 2;
862 }
863 else if (c == L_('[') && *p == L_('='))
864 {
865 c = *++p;
866 if (c == L_('\0'))
867 return FNM_NOMATCH;
868 c = *++p;
869 if (c != L_('=') || p[1] != L_(']'))
870 return FNM_NOMATCH;
871 p += 2;
872 }
873 else if (c == L_('[') && *p == L_('.'))
874 {
875 while (1)
876 {
877 c = *++p;
878 if (c == L_('\0'))
879 return FNM_NOMATCH;
880
881 if (c == L_('.') && p[1] == L_(']'))
882 break;
883 }
884 p += 2;
885 }
886 }
887 if (not)
888 return FNM_NOMATCH;
889 }
890 break;
891
892 case L_('+'):
893 case L_('@'):
894 case L_('!'):
895 if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(')
896 {
897 int res = EXT (c, p, n, string_end, no_leading_period, flags);
898 if (res != -1)
899 return res;
900 }
901 goto normal_match;
902
903 case L_('/'):
904 if (NO_LEADING_PERIOD (flags))
905 {
906 if (n == string_end || c != (UCHAR) *n)
907 return FNM_NOMATCH;
908
909 new_no_leading_period = true;
910 break;
911 }
912 FALLTHROUGH;
913 default:
914 normal_match:
915 if (n == string_end || c != FOLD ((UCHAR) *n))
916 return FNM_NOMATCH;
917 }
918
919 no_leading_period = new_no_leading_period;
920 ++n;
921 }
922
923 if (n == string_end)
924 return 0;
925
926 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
927 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
928 return 0;
929
930 return FNM_NOMATCH;
931 }
932
933
934 static const CHAR *
END(const CHAR * pattern)935 END (const CHAR *pattern)
936 {
937 const CHAR *p = pattern;
938
939 while (1)
940 if (*++p == L_('\0'))
941 /* This is an invalid pattern. */
942 return pattern;
943 else if (*p == L_('['))
944 {
945 /* Handle brackets special. */
946 if (posixly_correct == 0)
947 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
948
949 /* Skip the not sign. We have to recognize it because of a possibly
950 following ']'. */
951 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
952 ++p;
953 /* A leading ']' is recognized as such. */
954 if (*p == L_(']'))
955 ++p;
956 /* Skip over all characters of the list. */
957 while (*p != L_(']'))
958 if (*p++ == L_('\0'))
959 /* This is no valid pattern. */
960 return pattern;
961 }
962 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
963 || *p == L_('!')) && p[1] == L_('('))
964 {
965 p = END (p + 1);
966 if (*p == L_('\0'))
967 /* This is an invalid pattern. */
968 return pattern;
969 }
970 else if (*p == L_(')'))
971 break;
972
973 return p + 1;
974 }
975
976 #if WIDE_CHAR_VERSION
977 # define PATTERN_PREFIX pattern_list
978 #else
979 # define PATTERN_PREFIX wpattern_list
980 #endif
981
982 #define PASTE(a,b) PASTE1(a,b)
983 #define PASTE1(a,b) a##b
984
985 #define DYNARRAY_STRUCT PATTERN_PREFIX
986 #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr)
987 #define DYNARRAY_ELEMENT CHAR *
988 #define DYNARRAY_PREFIX PASTE(PATTERN_PREFIX,_)
989 #define DYNARRAY_INITIAL_SIZE 8
990 #include <malloc/dynarray-skeleton.c>
991
992 static int
EXT(INT opt,const CHAR * pattern,const CHAR * string,const CHAR * string_end,bool no_leading_period,int flags)993 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
994 bool no_leading_period, int flags)
995 {
996 const CHAR *startp;
997 ptrdiff_t level;
998 struct PATTERN_PREFIX list;
999 size_t pattern_len = STRLEN (pattern);
1000 size_t pattern_i = 0;
1001 const CHAR *p;
1002 const CHAR *rs;
1003 int retval = 0;
1004
1005 PASTE (PATTERN_PREFIX, _init) (&list);
1006
1007 /* Parse the pattern. Store the individual parts in the list. */
1008 level = 0;
1009 for (startp = p = pattern + 1; level >= 0; ++p)
1010 if (*p == L_('\0'))
1011 {
1012 /* This is an invalid pattern. */
1013 retval = -1;
1014 goto out;
1015 }
1016 else if (*p == L_('['))
1017 {
1018 /* Handle brackets special. */
1019 if (posixly_correct == 0)
1020 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1021
1022 /* Skip the not sign. We have to recognize it because of a possibly
1023 following ']'. */
1024 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1025 ++p;
1026 /* A leading ']' is recognized as such. */
1027 if (*p == L_(']'))
1028 ++p;
1029 /* Skip over all characters of the list. */
1030 while (*p != L_(']'))
1031 if (*p++ == L_('\0'))
1032 {
1033 /* This is no valid pattern. */
1034 retval = -1;
1035 goto out;
1036 }
1037 }
1038 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1039 || *p == L_('!')) && p[1] == L_('('))
1040 /* Remember the nesting level. */
1041 ++level;
1042 else if (*p == L_(')') || *p == L_('|'))
1043 {
1044 if (level == 0)
1045 {
1046 size_t slen = opt == L_('?') || opt == L_('@')
1047 ? pattern_len : p - startp + 1;
1048 CHAR *newp = malloc (slen * sizeof (CHAR));
1049 if (newp != NULL)
1050 {
1051 *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0');
1052 PASTE (PATTERN_PREFIX,_add) (&list, newp);
1053 }
1054 if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list))
1055 {
1056 retval = -2;
1057 goto out;
1058 }
1059
1060 if (*p == L_('|'))
1061 startp = p + 1;
1062 }
1063 if (*p == L_(')'))
1064 level--;
1065 }
1066 assert (p[-1] == L_(')'));
1067
1068 switch (opt)
1069 {
1070 case L_('*'):
1071 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1072 goto success;
1073 FALLTHROUGH;
1074 case L_('+'):
1075 for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++)
1076 {
1077 for (rs = string; rs <= string_end; ++rs)
1078 /* First match the prefix with the current pattern with the
1079 current pattern. */
1080 if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string,
1081 rs, no_leading_period,
1082 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1083 NULL) == 0
1084 /* This was successful. Now match the rest with the rest
1085 of the pattern. */
1086 && (FCT (p, rs, string_end,
1087 rs == string
1088 ? no_leading_period
1089 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1090 flags & FNM_FILE_NAME
1091 ? flags : flags & ~FNM_PERIOD, NULL) == 0
1092 /* This didn't work. Try the whole pattern. */
1093 || (rs != string
1094 && FCT (pattern - 1, rs, string_end,
1095 rs == string
1096 ? no_leading_period
1097 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1098 flags & FNM_FILE_NAME
1099 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1100 /* It worked. Signal success. */
1101 goto success;
1102 }
1103
1104 /* None of the patterns lead to a match. */
1105 retval = FNM_NOMATCH;
1106 break;
1107
1108 case L_('?'):
1109 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1110 goto success;
1111 FALLTHROUGH;
1112 case L_('@'):
1113 for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++)
1114 {
1115 /* I cannot believe it but `strcat' is actually acceptable
1116 here. Match the entire string with the prefix from the
1117 pattern list and the rest of the pattern following the
1118 pattern list. */
1119 if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p),
1120 string, string_end, no_leading_period,
1121 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1122 NULL) == 0)
1123 /* It worked. Signal success. */
1124 goto success;
1125 }
1126
1127 /* None of the patterns lead to a match. */
1128 retval = FNM_NOMATCH;
1129 break;
1130
1131 case L_('!'):
1132 for (rs = string; rs <= string_end; ++rs)
1133 {
1134 size_t runp_i;
1135
1136 for (runp_i = pattern_i;
1137 runp_i != PASTE (PATTERN_PREFIX, _size) (&list);
1138 runp_i++)
1139 {
1140 if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs,
1141 no_leading_period,
1142 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1143 NULL) == 0)
1144 break;
1145 }
1146
1147 /* If none of the patterns matched see whether the rest does. */
1148 if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list)
1149 && (FCT (p, rs, string_end,
1150 rs == string
1151 ? no_leading_period
1152 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1153 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1154 NULL) == 0))
1155 /* This is successful. */
1156 goto success;
1157 }
1158
1159 /* None of the patterns together with the rest of the pattern
1160 lead to a match. */
1161 retval = FNM_NOMATCH;
1162 break;
1163
1164 default:
1165 assert (! "Invalid extended matching operator");
1166 retval = -1;
1167 break;
1168 }
1169
1170 success:
1171 out:
1172 PASTE (PATTERN_PREFIX, _free) (&list);
1173
1174 return retval;
1175 }
1176
1177 #undef PATTERN_PREFIX
1178 #undef PASTE
1179 #undef PASTE1
1180
1181 #undef FOLD
1182 #undef CHAR
1183 #undef UCHAR
1184 #undef INT
1185 #undef FCT
1186 #undef EXT
1187 #undef END
1188 #undef STRUCT
1189 #undef MEMPCPY
1190 #undef MEMCHR
1191 #undef STRLEN
1192 #undef STRCAT
1193 #undef L_
1194 #undef BTOWC
1195 #undef WIDE_CHAR_VERSION
1196 #undef FINDIDX
1197