1 /* Internal functions for the *scanf* implementation.
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdbool.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <wctype.h>
31 #include <libc-diag.h>
32 #include <libc-lock.h>
33 #include <locale/localeinfo.h>
34 #include <scratch_buffer.h>
35
36 #ifdef __GNUC__
37 # define HAVE_LONGLONG
38 # define LONGLONG long long
39 #else
40 # define LONGLONG long
41 #endif
42
43 /* Determine whether we have to handle `long long' at all. */
44 #if LONG_MAX == LONG_LONG_MAX
45 # define need_longlong 0
46 #else
47 # define need_longlong 1
48 #endif
49
50 /* Determine whether we have to handle `long'. */
51 #if INT_MAX == LONG_MAX
52 # define need_long 0
53 #else
54 # define need_long 1
55 #endif
56
57 /* Those are flags in the conversion format. */
58 #define LONG 0x0001 /* l: long or double */
59 #define LONGDBL 0x0002 /* L: long long or long double */
60 #define SHORT 0x0004 /* h: short */
61 #define SUPPRESS 0x0008 /* *: suppress assignment */
62 #define POINTER 0x0010 /* weird %p pointer (`fake hex') */
63 #define NOSKIP 0x0020 /* do not skip blanks */
64 #define NUMBER_SIGNED 0x0040 /* signed integer */
65 #define GROUP 0x0080 /* ': group numbers */
66 #define GNU_MALLOC 0x0100 /* a: malloc strings */
67 #define CHAR 0x0200 /* hh: char */
68 #define I18N 0x0400 /* I: use locale's digits */
69 #define HEXA_FLOAT 0x0800 /* hexadecimal float */
70 #define READ_POINTER 0x1000 /* this is a pointer value */
71 #define POSIX_MALLOC 0x2000 /* m: malloc strings */
72 #define MALLOC (GNU_MALLOC | POSIX_MALLOC)
73
74 #include <locale/localeinfo.h>
75 #include <libioP.h>
76
77 #ifdef COMPILE_WSCANF
78 # define ungetc(c, s) ((void) (c == WEOF \
79 || (--read_in, \
80 _IO_sputbackwc (s, c))))
81 # define ungetc_not_eof(c, s) ((void) (--read_in, \
82 _IO_sputbackwc (s, c)))
83 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
84 : ((c = _IO_getwc_unlocked (s)), \
85 (void) (c != WEOF \
86 ? ++read_in \
87 : (size_t) (inchar_errno = errno)), c))
88
89 # define ISSPACE(Ch) iswspace (Ch)
90 # define ISDIGIT(Ch) iswdigit (Ch)
91 # define ISXDIGIT(Ch) iswxdigit (Ch)
92 # define TOLOWER(Ch) towlower (Ch)
93 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
94 # define __strtoll_internal __wcstoll_internal
95 # define __strtoull_internal __wcstoull_internal
96 # define __strtol_internal __wcstol_internal
97 # define __strtoul_internal __wcstoul_internal
98 # define __strtold_internal __wcstold_internal
99 # define __strtod_internal __wcstod_internal
100 # define __strtof_internal __wcstof_internal
101 # if __HAVE_FLOAT128_UNLIKE_LDBL
102 # define __strtof128_internal __wcstof128_internal
103 # endif
104
105 # define L_(Str) L##Str
106 # define CHAR_T wchar_t
107 # define UCHAR_T unsigned int
108 # define WINT_T wint_t
109 # undef EOF
110 # define EOF WEOF
111 #else
112 # define ungetc(c, s) ((void) ((int) c == EOF \
113 || (--read_in, \
114 _IO_sputbackc (s, (unsigned char) c))))
115 # define ungetc_not_eof(c, s) ((void) (--read_in, \
116 _IO_sputbackc (s, (unsigned char) c)))
117 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
118 : ((c = _IO_getc_unlocked (s)), \
119 (void) (c != EOF \
120 ? ++read_in \
121 : (size_t) (inchar_errno = errno)), c))
122 # define ISSPACE(Ch) __isspace_l (Ch, loc)
123 # define ISDIGIT(Ch) __isdigit_l (Ch, loc)
124 # define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
125 # define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
126 # define ORIENT if (_IO_vtable_offset (s) == 0 \
127 && _IO_fwide (s, -1) != -1) \
128 return EOF
129
130 # define L_(Str) Str
131 # define CHAR_T char
132 # define UCHAR_T unsigned char
133 # define WINT_T int
134 #endif
135
136 #include "printf-parse.h" /* Use read_int. */
137
138 #define encode_error() do { \
139 __set_errno (EILSEQ); \
140 goto errout; \
141 } while (0)
142 #define conv_error() do { \
143 goto errout; \
144 } while (0)
145 #define input_error() do { \
146 if (done == 0) done = EOF; \
147 goto errout; \
148 } while (0)
149 #define add_ptr_to_free(ptr) \
150 do \
151 { \
152 if (ptrs_to_free == NULL \
153 || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \
154 / sizeof (ptrs_to_free->ptrs[0]))) \
155 { \
156 struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \
157 new_ptrs->count = 0; \
158 new_ptrs->next = ptrs_to_free; \
159 ptrs_to_free = new_ptrs; \
160 } \
161 ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \
162 } \
163 while (0)
164 #define ARGCHECK(s, format) \
165 do \
166 { \
167 /* Check file argument for consistence. */ \
168 CHECK_FILE (s, EOF); \
169 if (s->_flags & _IO_NO_READS) \
170 { \
171 __set_errno (EBADF); \
172 return EOF; \
173 } \
174 else if (format == NULL) \
175 { \
176 __set_errno (EINVAL); \
177 return EOF; \
178 } \
179 } while (0)
180 #define LOCK_STREAM(S) \
181 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
182 _IO_flockfile (S)
183 #define UNLOCK_STREAM(S) \
184 _IO_funlockfile (S); \
185 __libc_cleanup_region_end (0)
186
187 struct ptrs_to_free
188 {
189 size_t count;
190 struct ptrs_to_free *next;
191 char **ptrs[32];
192 };
193
194 struct char_buffer {
195 CHAR_T *current;
196 CHAR_T *end;
197 struct scratch_buffer scratch;
198 };
199
200 /* Returns a pointer to the first CHAR_T object in the buffer. Only
201 valid if char_buffer_add (BUFFER, CH) has been called and
202 char_buffer_error (BUFFER) is false. */
203 static inline CHAR_T *
char_buffer_start(const struct char_buffer * buffer)204 char_buffer_start (const struct char_buffer *buffer)
205 {
206 return (CHAR_T *) buffer->scratch.data;
207 }
208
209 /* Returns the number of CHAR_T objects in the buffer. Only valid if
210 char_buffer_error (BUFFER) is false. */
211 static inline size_t
char_buffer_size(const struct char_buffer * buffer)212 char_buffer_size (const struct char_buffer *buffer)
213 {
214 return buffer->current - char_buffer_start (buffer);
215 }
216
217 /* Reinitializes BUFFER->current and BUFFER->end to cover the entire
218 scratch buffer. */
219 static inline void
char_buffer_rewind(struct char_buffer * buffer)220 char_buffer_rewind (struct char_buffer *buffer)
221 {
222 buffer->current = char_buffer_start (buffer);
223 buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T);
224 }
225
226 /* Returns true if a previous call to char_buffer_add (BUFFER, CH)
227 failed. */
228 static inline bool
char_buffer_error(const struct char_buffer * buffer)229 char_buffer_error (const struct char_buffer *buffer)
230 {
231 return __glibc_unlikely (buffer->current == NULL);
232 }
233
234 /* Slow path for char_buffer_add. */
235 static void
char_buffer_add_slow(struct char_buffer * buffer,CHAR_T ch)236 char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch)
237 {
238 if (char_buffer_error (buffer))
239 return;
240 size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data;
241 if (!scratch_buffer_grow_preserve (&buffer->scratch))
242 {
243 buffer->current = NULL;
244 buffer->end = NULL;
245 return;
246 }
247 char_buffer_rewind (buffer);
248 buffer->current += offset;
249 *buffer->current++ = ch;
250 }
251
252 /* Adds CH to BUFFER. This function does not report any errors, check
253 for them with char_buffer_error. */
254 static inline void
255 char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
256 __attribute__ ((always_inline));
257 static inline void
char_buffer_add(struct char_buffer * buffer,CHAR_T ch)258 char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
259 {
260 if (__glibc_unlikely (buffer->current == buffer->end))
261 char_buffer_add_slow (buffer, ch);
262 else
263 *buffer->current++ = ch;
264 }
265
266 /* Read formatted input from S according to the format string
267 FORMAT, using the argument list in ARG.
268 Return the number of assignments made, or -1 for an input error. */
269 #ifdef COMPILE_WSCANF
270 int
__vfwscanf_internal(FILE * s,const wchar_t * format,va_list argptr,unsigned int mode_flags)271 __vfwscanf_internal (FILE *s, const wchar_t *format, va_list argptr,
272 unsigned int mode_flags)
273 #else
274 int
275 __vfscanf_internal (FILE *s, const char *format, va_list argptr,
276 unsigned int mode_flags)
277 #endif
278 {
279 va_list arg;
280 const UCHAR_T *f = (const UCHAR_T *) format;
281 UCHAR_T fc; /* Current character of the format. */
282 WINT_T done = 0; /* Assignments done. */
283 size_t read_in = 0; /* Chars read in. */
284 WINT_T c = 0; /* Last char read. */
285 int width; /* Maximum field width. */
286 int flags; /* Modifiers for current format element. */
287 #ifndef COMPILE_WSCANF
288 locale_t loc = _NL_CURRENT_LOCALE;
289 struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
290 #endif
291
292 /* Errno of last failed inchar call. */
293 int inchar_errno = 0;
294 /* Status for reading F-P nums. */
295 char got_digit, got_dot, got_e, got_sign;
296 /* If a [...] is a [^...]. */
297 CHAR_T not_in;
298 #define exp_char not_in
299 /* Base for integral numbers. */
300 int base;
301 /* Decimal point character. */
302 #ifdef COMPILE_WSCANF
303 wint_t decimal;
304 #else
305 const char *decimal;
306 #endif
307 /* The thousands character of the current locale. */
308 #ifdef COMPILE_WSCANF
309 wint_t thousands;
310 #else
311 const char *thousands;
312 #endif
313 struct ptrs_to_free *ptrs_to_free = NULL;
314 /* State for the conversions. */
315 mbstate_t state;
316 /* Integral holding variables. */
317 union
318 {
319 long long int q;
320 unsigned long long int uq;
321 long int l;
322 unsigned long int ul;
323 } num;
324 /* Character-buffer pointer. */
325 char *str = NULL;
326 wchar_t *wstr = NULL;
327 char **strptr = NULL;
328 ssize_t strsize = 0;
329 /* We must not react on white spaces immediately because they can
330 possibly be matched even if in the input stream no character is
331 available anymore. */
332 int skip_space = 0;
333 /* Workspace. */
334 CHAR_T *tw; /* Temporary pointer. */
335 struct char_buffer charbuf;
336 scratch_buffer_init (&charbuf.scratch);
337
338 #ifdef __va_copy
339 __va_copy (arg, argptr);
340 #else
341 arg = (va_list) argptr;
342 #endif
343
344 #ifdef ORIENT
345 ORIENT;
346 #endif
347
348 ARGCHECK (s, format);
349
350 {
351 #ifndef COMPILE_WSCANF
352 struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
353 #endif
354
355 /* Figure out the decimal point character. */
356 #ifdef COMPILE_WSCANF
357 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
358 #else
359 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
360 #endif
361 /* Figure out the thousands separator character. */
362 #ifdef COMPILE_WSCANF
363 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
364 #else
365 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
366 if (*thousands == '\0')
367 thousands = NULL;
368 #endif
369 }
370
371 /* Lock the stream. */
372 LOCK_STREAM (s);
373
374
375 #ifndef COMPILE_WSCANF
376 /* From now on we use `state' to convert the format string. */
377 memset (&state, '\0', sizeof (state));
378 #endif
379
380 /* Run through the format string. */
381 while (*f != '\0')
382 {
383 unsigned int argpos;
384 /* Extract the next argument, which is of type TYPE.
385 For a %N$... spec, this is the Nth argument from the beginning;
386 otherwise it is the next argument after the state now in ARG. */
387 #ifdef __va_copy
388 # define ARG(type) (argpos == 0 ? va_arg (arg, type) \
389 : ({ unsigned int pos = argpos; \
390 va_list arg; \
391 __va_copy (arg, argptr); \
392 while (--pos > 0) \
393 (void) va_arg (arg, void *); \
394 va_arg (arg, type); \
395 }))
396 #else
397 # if 0
398 /* XXX Possible optimization. */
399 # define ARG(type) (argpos == 0 ? va_arg (arg, type) \
400 : ({ va_list arg = (va_list) argptr; \
401 arg = (va_list) ((char *) arg \
402 + (argpos - 1) \
403 * __va_rounded_size (void *)); \
404 va_arg (arg, type); \
405 }))
406 # else
407 # define ARG(type) (argpos == 0 ? va_arg (arg, type) \
408 : ({ unsigned int pos = argpos; \
409 va_list arg = (va_list) argptr; \
410 while (--pos > 0) \
411 (void) va_arg (arg, void *); \
412 va_arg (arg, type); \
413 }))
414 # endif
415 #endif
416
417 #ifndef COMPILE_WSCANF
418 if (!isascii (*f))
419 {
420 /* Non-ASCII, may be a multibyte. */
421 int len = __mbrlen ((const char *) f, strlen ((const char *) f),
422 &state);
423 if (len > 0)
424 {
425 do
426 {
427 c = inchar ();
428 if (__glibc_unlikely (c == EOF))
429 input_error ();
430 else if (c != *f++)
431 {
432 ungetc_not_eof (c, s);
433 conv_error ();
434 }
435 }
436 while (--len > 0);
437 continue;
438 }
439 }
440 #endif
441
442 fc = *f++;
443 if (fc != '%')
444 {
445 /* Remember to skip spaces. */
446 if (ISSPACE (fc))
447 {
448 skip_space = 1;
449 continue;
450 }
451
452 /* Read a character. */
453 c = inchar ();
454
455 /* Characters other than format specs must just match. */
456 if (__glibc_unlikely (c == EOF))
457 input_error ();
458
459 /* We saw white space char as the last character in the format
460 string. Now it's time to skip all leading white space. */
461 if (skip_space)
462 {
463 while (ISSPACE (c))
464 if (__glibc_unlikely (inchar () == EOF))
465 input_error ();
466 skip_space = 0;
467 }
468
469 if (__glibc_unlikely (c != fc))
470 {
471 ungetc (c, s);
472 conv_error ();
473 }
474
475 continue;
476 }
477
478 /* This is the start of the conversion string. */
479 flags = 0;
480
481 /* Initialize state of modifiers. */
482 argpos = 0;
483
484 /* Prepare temporary buffer. */
485 char_buffer_rewind (&charbuf);
486
487 /* Check for a positional parameter specification. */
488 if (ISDIGIT (*f))
489 {
490 argpos = read_int (&f);
491 if (*f == L_('$'))
492 ++f;
493 else
494 {
495 /* Oops; that was actually the field width. */
496 width = argpos;
497 argpos = 0;
498 goto got_width;
499 }
500 }
501
502 /* Check for the assignment-suppressing, the number grouping flag,
503 and the signal to use the locale's digit representation. */
504 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
505 switch (*f++)
506 {
507 case L_('*'):
508 flags |= SUPPRESS;
509 break;
510 case L_('\''):
511 #ifdef COMPILE_WSCANF
512 if (thousands != L'\0')
513 #else
514 if (thousands != NULL)
515 #endif
516 flags |= GROUP;
517 break;
518 case L_('I'):
519 flags |= I18N;
520 break;
521 }
522
523 /* Find the maximum field width. */
524 width = 0;
525 if (ISDIGIT (*f))
526 width = read_int (&f);
527 got_width:
528 if (width == 0)
529 width = -1;
530
531 /* Check for type modifiers. */
532 switch (*f++)
533 {
534 case L_('h'):
535 /* ints are short ints or chars. */
536 if (*f == L_('h'))
537 {
538 ++f;
539 flags |= CHAR;
540 }
541 else
542 flags |= SHORT;
543 break;
544 case L_('l'):
545 if (*f == L_('l'))
546 {
547 /* A double `l' is equivalent to an `L'. */
548 ++f;
549 flags |= LONGDBL | LONG;
550 }
551 else
552 /* ints are long ints. */
553 flags |= LONG;
554 break;
555 case L_('q'):
556 case L_('L'):
557 /* doubles are long doubles, and ints are long long ints. */
558 flags |= LONGDBL | LONG;
559 break;
560 case L_('a'):
561 /* The `a' is used as a flag only if followed by `s', `S' or
562 `['. */
563 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
564 {
565 --f;
566 break;
567 }
568 /* In __isoc99_*scanf %as, %aS and %a[ extension is not
569 supported at all. */
570 if (__glibc_likely ((mode_flags & SCANF_ISOC99_A) != 0))
571 {
572 --f;
573 break;
574 }
575 /* String conversions (%s, %[) take a `char **'
576 arg and fill it in with a malloc'd pointer. */
577 flags |= GNU_MALLOC;
578 break;
579 case L_('m'):
580 flags |= POSIX_MALLOC;
581 if (*f == L_('l'))
582 {
583 ++f;
584 flags |= LONG;
585 }
586 break;
587 case L_('z'):
588 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
589 flags |= LONGDBL;
590 else if (sizeof (size_t) > sizeof (unsigned int))
591 flags |= LONG;
592 break;
593 case L_('j'):
594 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
595 flags |= LONGDBL;
596 else if (sizeof (uintmax_t) > sizeof (unsigned int))
597 flags |= LONG;
598 break;
599 case L_('t'):
600 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
601 flags |= LONGDBL;
602 else if (sizeof (ptrdiff_t) > sizeof (int))
603 flags |= LONG;
604 break;
605 default:
606 /* Not a recognized modifier. Backup. */
607 --f;
608 break;
609 }
610
611 /* End of the format string? */
612 if (__glibc_unlikely (*f == L_('\0')))
613 conv_error ();
614
615 /* Find the conversion specifier. */
616 fc = *f++;
617 if (skip_space || (fc != L_('[') && fc != L_('c')
618 && fc != L_('C') && fc != L_('n')))
619 {
620 /* Eat whitespace. */
621 int save_errno = errno;
622 __set_errno (0);
623 do
624 /* We add the additional test for EOF here since otherwise
625 inchar will restore the old errno value which might be
626 EINTR but does not indicate an interrupt since nothing
627 was read at this time. */
628 if (__builtin_expect ((c == EOF || inchar () == EOF)
629 && errno == EINTR, 0))
630 input_error ();
631 while (ISSPACE (c));
632 __set_errno (save_errno);
633 ungetc (c, s);
634 skip_space = 0;
635 }
636
637 switch (fc)
638 {
639 case L_('%'): /* Must match a literal '%'. */
640 c = inchar ();
641 if (__glibc_unlikely (c == EOF))
642 input_error ();
643 if (__glibc_unlikely (c != fc))
644 {
645 ungetc_not_eof (c, s);
646 conv_error ();
647 }
648 break;
649
650 case L_('n'): /* Answer number of assignments done. */
651 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
652 with the 'n' conversion specifier. */
653 if (!(flags & SUPPRESS))
654 {
655 /* Don't count the read-ahead. */
656 if (need_longlong && (flags & LONGDBL))
657 *ARG (long long int *) = read_in;
658 else if (need_long && (flags & LONG))
659 *ARG (long int *) = read_in;
660 else if (flags & SHORT)
661 *ARG (short int *) = read_in;
662 else if (!(flags & CHAR))
663 *ARG (int *) = read_in;
664 else
665 *ARG (char *) = read_in;
666
667 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
668 /* We have a severe problem here. The ISO C standard
669 contradicts itself in explaining the effect of the %n
670 format in `scanf'. While in ISO C:1990 and the ISO C
671 Amendement 1:1995 the result is described as
672
673 Execution of a %n directive does not effect the
674 assignment count returned at the completion of
675 execution of the f(w)scanf function.
676
677 in ISO C Corrigendum 1:1994 the following was added:
678
679 Subclause 7.9.6.2
680 Add the following fourth example:
681 In:
682 #include <stdio.h>
683 int d1, d2, n1, n2, i;
684 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
685 the value 123 is assigned to d1 and the value3 to n1.
686 Because %n can never get an input failure the value
687 of 3 is also assigned to n2. The value of d2 is not
688 affected. The value 3 is assigned to i.
689
690 We go for now with the historically correct code from ISO C,
691 i.e., we don't count the %n assignments. When it ever
692 should proof to be wrong just remove the #ifdef above. */
693 ++done;
694 #endif
695 }
696 break;
697
698 case L_('c'): /* Match characters. */
699 if ((flags & LONG) == 0)
700 {
701 if (width == -1)
702 width = 1;
703
704 #define STRING_ARG(Str, Type, Width) \
705 do if (!(flags & SUPPRESS)) \
706 { \
707 if (flags & MALLOC) \
708 { \
709 /* The string is to be stored in a malloc'd buffer. */ \
710 /* For %mS using char ** is actually wrong, but \
711 shouldn't make a difference on any arch glibc \
712 supports and would unnecessarily complicate \
713 things. */ \
714 strptr = ARG (char **); \
715 if (strptr == NULL) \
716 conv_error (); \
717 /* Allocate an initial buffer. */ \
718 strsize = Width; \
719 *strptr = (char *) malloc (strsize * sizeof (Type)); \
720 Str = (Type *) *strptr; \
721 if (Str != NULL) \
722 add_ptr_to_free (strptr); \
723 else if (flags & POSIX_MALLOC) \
724 { \
725 done = EOF; \
726 goto errout; \
727 } \
728 } \
729 else \
730 Str = ARG (Type *); \
731 if (Str == NULL) \
732 conv_error (); \
733 } while (0)
734 #ifdef COMPILE_WSCANF
735 STRING_ARG (str, char, 100);
736 #else
737 STRING_ARG (str, char, (width > 1024 ? 1024 : width));
738 #endif
739
740 c = inchar ();
741 if (__glibc_unlikely (c == EOF))
742 input_error ();
743
744 #ifdef COMPILE_WSCANF
745 /* We have to convert the wide character(s) into multibyte
746 characters and store the result. */
747 memset (&state, '\0', sizeof (state));
748
749 do
750 {
751 size_t n;
752
753 if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC)
754 && *strptr + strsize - str <= MB_LEN_MAX)
755 {
756 /* We have to enlarge the buffer if the `m' flag
757 was given. */
758 size_t strleng = str - *strptr;
759 char *newstr;
760
761 newstr = (char *) realloc (*strptr, strsize * 2);
762 if (newstr == NULL)
763 {
764 /* Can't allocate that much. Last-ditch effort. */
765 newstr = (char *) realloc (*strptr,
766 strleng + MB_LEN_MAX);
767 if (newstr == NULL)
768 {
769 /* c can't have `a' flag, only `m'. */
770 done = EOF;
771 goto errout;
772 }
773 else
774 {
775 *strptr = newstr;
776 str = newstr + strleng;
777 strsize = strleng + MB_LEN_MAX;
778 }
779 }
780 else
781 {
782 *strptr = newstr;
783 str = newstr + strleng;
784 strsize *= 2;
785 }
786 }
787
788 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
789 if (__glibc_unlikely (n == (size_t) -1))
790 /* No valid wide character. */
791 input_error ();
792
793 /* Increment the output pointer. Even if we don't
794 write anything. */
795 str += n;
796 }
797 while (--width > 0 && inchar () != EOF);
798 #else
799 if (!(flags & SUPPRESS))
800 {
801 do
802 {
803 if ((flags & MALLOC)
804 && (char *) str == *strptr + strsize)
805 {
806 /* Enlarge the buffer. */
807 size_t newsize
808 = strsize
809 + (strsize >= width ? width - 1 : strsize);
810
811 str = (char *) realloc (*strptr, newsize);
812 if (str == NULL)
813 {
814 /* Can't allocate that much. Last-ditch
815 effort. */
816 str = (char *) realloc (*strptr, strsize + 1);
817 if (str == NULL)
818 {
819 /* c can't have `a' flag, only `m'. */
820 done = EOF;
821 goto errout;
822 }
823 else
824 {
825 *strptr = (char *) str;
826 str += strsize;
827 ++strsize;
828 }
829 }
830 else
831 {
832 *strptr = (char *) str;
833 str += strsize;
834 strsize = newsize;
835 }
836 }
837 *str++ = c;
838 }
839 while (--width > 0 && inchar () != EOF);
840 }
841 else
842 while (--width > 0 && inchar () != EOF);
843 #endif
844
845 if (!(flags & SUPPRESS))
846 {
847 if ((flags & MALLOC) && str - *strptr != strsize)
848 {
849 char *cp = (char *) realloc (*strptr, str - *strptr);
850 if (cp != NULL)
851 *strptr = cp;
852 }
853 strptr = NULL;
854 ++done;
855 }
856
857 break;
858 }
859 /* FALLTHROUGH */
860 case L_('C'):
861 if (width == -1)
862 width = 1;
863
864 STRING_ARG (wstr, wchar_t, (width > 1024 ? 1024 : width));
865
866 c = inchar ();
867 if (__glibc_unlikely (c == EOF))
868 input_error ();
869
870 #ifdef COMPILE_WSCANF
871 /* Just store the incoming wide characters. */
872 if (!(flags & SUPPRESS))
873 {
874 do
875 {
876 if ((flags & MALLOC)
877 && wstr == (wchar_t *) *strptr + strsize)
878 {
879 size_t newsize
880 = strsize + (strsize > width ? width - 1 : strsize);
881 /* Enlarge the buffer. */
882 wstr = (wchar_t *) realloc (*strptr,
883 newsize * sizeof (wchar_t));
884 if (wstr == NULL)
885 {
886 /* Can't allocate that much. Last-ditch effort. */
887 wstr = (wchar_t *) realloc (*strptr,
888 (strsize + 1)
889 * sizeof (wchar_t));
890 if (wstr == NULL)
891 {
892 /* C or lc can't have `a' flag, only `m'
893 flag. */
894 done = EOF;
895 goto errout;
896 }
897 else
898 {
899 *strptr = (char *) wstr;
900 wstr += strsize;
901 ++strsize;
902 }
903 }
904 else
905 {
906 *strptr = (char *) wstr;
907 wstr += strsize;
908 strsize = newsize;
909 }
910 }
911 *wstr++ = c;
912 }
913 while (--width > 0 && inchar () != EOF);
914 }
915 else
916 while (--width > 0 && inchar () != EOF);
917 #else
918 {
919 /* We have to convert the multibyte input sequence to wide
920 characters. */
921 char buf[1];
922 mbstate_t cstate;
923
924 memset (&cstate, '\0', sizeof (cstate));
925
926 do
927 {
928 /* This is what we present the mbrtowc function first. */
929 buf[0] = c;
930
931 if (!(flags & SUPPRESS) && (flags & MALLOC)
932 && wstr == (wchar_t *) *strptr + strsize)
933 {
934 size_t newsize
935 = strsize + (strsize > width ? width - 1 : strsize);
936 /* Enlarge the buffer. */
937 wstr = (wchar_t *) realloc (*strptr,
938 newsize * sizeof (wchar_t));
939 if (wstr == NULL)
940 {
941 /* Can't allocate that much. Last-ditch effort. */
942 wstr = (wchar_t *) realloc (*strptr,
943 ((strsize + 1)
944 * sizeof (wchar_t)));
945 if (wstr == NULL)
946 {
947 /* C or lc can't have `a' flag, only `m' flag. */
948 done = EOF;
949 goto errout;
950 }
951 else
952 {
953 *strptr = (char *) wstr;
954 wstr += strsize;
955 ++strsize;
956 }
957 }
958 else
959 {
960 *strptr = (char *) wstr;
961 wstr += strsize;
962 strsize = newsize;
963 }
964 }
965
966 while (1)
967 {
968 size_t n;
969
970 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
971 buf, 1, &cstate);
972
973 if (n == (size_t) -2)
974 {
975 /* Possibly correct character, just not enough
976 input. */
977 if (__glibc_unlikely (inchar () == EOF))
978 encode_error ();
979
980 buf[0] = c;
981 continue;
982 }
983
984 if (__glibc_unlikely (n != 1))
985 encode_error ();
986
987 /* We have a match. */
988 break;
989 }
990
991 /* Advance the result pointer. */
992 ++wstr;
993 }
994 while (--width > 0 && inchar () != EOF);
995 }
996 #endif
997
998 if (!(flags & SUPPRESS))
999 {
1000 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1001 {
1002 wchar_t *cp = (wchar_t *) realloc (*strptr,
1003 ((wstr
1004 - (wchar_t *) *strptr)
1005 * sizeof (wchar_t)));
1006 if (cp != NULL)
1007 *strptr = (char *) cp;
1008 }
1009 strptr = NULL;
1010
1011 ++done;
1012 }
1013
1014 break;
1015
1016 case L_('s'): /* Read a string. */
1017 if (!(flags & LONG))
1018 {
1019 STRING_ARG (str, char, 100);
1020
1021 c = inchar ();
1022 if (__glibc_unlikely (c == EOF))
1023 input_error ();
1024
1025 #ifdef COMPILE_WSCANF
1026 memset (&state, '\0', sizeof (state));
1027 #endif
1028
1029 do
1030 {
1031 if (ISSPACE (c))
1032 {
1033 ungetc_not_eof (c, s);
1034 break;
1035 }
1036
1037 #ifdef COMPILE_WSCANF
1038 /* This is quite complicated. We have to convert the
1039 wide characters into multibyte characters and then
1040 store them. */
1041 {
1042 size_t n;
1043
1044 if (!(flags & SUPPRESS) && (flags & MALLOC)
1045 && *strptr + strsize - str <= MB_LEN_MAX)
1046 {
1047 /* We have to enlarge the buffer if the `a' or `m'
1048 flag was given. */
1049 size_t strleng = str - *strptr;
1050 char *newstr;
1051
1052 newstr = (char *) realloc (*strptr, strsize * 2);
1053 if (newstr == NULL)
1054 {
1055 /* Can't allocate that much. Last-ditch
1056 effort. */
1057 newstr = (char *) realloc (*strptr,
1058 strleng + MB_LEN_MAX);
1059 if (newstr == NULL)
1060 {
1061 if (flags & POSIX_MALLOC)
1062 {
1063 done = EOF;
1064 goto errout;
1065 }
1066 /* We lose. Oh well. Terminate the
1067 string and stop converting,
1068 so at least we don't skip any input. */
1069 ((char *) (*strptr))[strleng] = '\0';
1070 strptr = NULL;
1071 ++done;
1072 conv_error ();
1073 }
1074 else
1075 {
1076 *strptr = newstr;
1077 str = newstr + strleng;
1078 strsize = strleng + MB_LEN_MAX;
1079 }
1080 }
1081 else
1082 {
1083 *strptr = newstr;
1084 str = newstr + strleng;
1085 strsize *= 2;
1086 }
1087 }
1088
1089 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
1090 &state);
1091 if (__glibc_unlikely (n == (size_t) -1))
1092 encode_error ();
1093
1094 assert (n <= MB_LEN_MAX);
1095 str += n;
1096 }
1097 #else
1098 /* This is easy. */
1099 if (!(flags & SUPPRESS))
1100 {
1101 *str++ = c;
1102 if ((flags & MALLOC)
1103 && (char *) str == *strptr + strsize)
1104 {
1105 /* Enlarge the buffer. */
1106 str = (char *) realloc (*strptr, 2 * strsize);
1107 if (str == NULL)
1108 {
1109 /* Can't allocate that much. Last-ditch
1110 effort. */
1111 str = (char *) realloc (*strptr, strsize + 1);
1112 if (str == NULL)
1113 {
1114 if (flags & POSIX_MALLOC)
1115 {
1116 done = EOF;
1117 goto errout;
1118 }
1119 /* We lose. Oh well. Terminate the
1120 string and stop converting,
1121 so at least we don't skip any input. */
1122 ((char *) (*strptr))[strsize - 1] = '\0';
1123 strptr = NULL;
1124 ++done;
1125 conv_error ();
1126 }
1127 else
1128 {
1129 *strptr = (char *) str;
1130 str += strsize;
1131 ++strsize;
1132 }
1133 }
1134 else
1135 {
1136 *strptr = (char *) str;
1137 str += strsize;
1138 strsize *= 2;
1139 }
1140 }
1141 }
1142 #endif
1143 }
1144 while ((width <= 0 || --width > 0) && inchar () != EOF);
1145
1146 if (!(flags & SUPPRESS))
1147 {
1148 #ifdef COMPILE_WSCANF
1149 /* We have to emit the code to get into the initial
1150 state. */
1151 char buf[MB_LEN_MAX];
1152 size_t n = __wcrtomb (buf, L'\0', &state);
1153 if (n > 0 && (flags & MALLOC)
1154 && str + n >= *strptr + strsize)
1155 {
1156 /* Enlarge the buffer. */
1157 size_t strleng = str - *strptr;
1158 char *newstr;
1159
1160 newstr = (char *) realloc (*strptr, strleng + n + 1);
1161 if (newstr == NULL)
1162 {
1163 if (flags & POSIX_MALLOC)
1164 {
1165 done = EOF;
1166 goto errout;
1167 }
1168 /* We lose. Oh well. Terminate the string
1169 and stop converting, so at least we don't
1170 skip any input. */
1171 ((char *) (*strptr))[strleng] = '\0';
1172 strptr = NULL;
1173 ++done;
1174 conv_error ();
1175 }
1176 else
1177 {
1178 *strptr = newstr;
1179 str = newstr + strleng;
1180 strsize = strleng + n + 1;
1181 }
1182 }
1183
1184 str = __mempcpy (str, buf, n);
1185 #endif
1186 *str++ = '\0';
1187
1188 if ((flags & MALLOC) && str - *strptr != strsize)
1189 {
1190 char *cp = (char *) realloc (*strptr, str - *strptr);
1191 if (cp != NULL)
1192 *strptr = cp;
1193 }
1194 strptr = NULL;
1195
1196 ++done;
1197 }
1198 break;
1199 }
1200 /* FALLTHROUGH */
1201
1202 case L_('S'):
1203 {
1204 #ifndef COMPILE_WSCANF
1205 mbstate_t cstate;
1206 #endif
1207
1208 /* Wide character string. */
1209 STRING_ARG (wstr, wchar_t, 100);
1210
1211 c = inchar ();
1212 if (__builtin_expect (c == EOF, 0))
1213 input_error ();
1214
1215 #ifndef COMPILE_WSCANF
1216 memset (&cstate, '\0', sizeof (cstate));
1217 #endif
1218
1219 do
1220 {
1221 if (ISSPACE (c))
1222 {
1223 ungetc_not_eof (c, s);
1224 break;
1225 }
1226
1227 #ifdef COMPILE_WSCANF
1228 /* This is easy. */
1229 if (!(flags & SUPPRESS))
1230 {
1231 *wstr++ = c;
1232 if ((flags & MALLOC)
1233 && wstr == (wchar_t *) *strptr + strsize)
1234 {
1235 /* Enlarge the buffer. */
1236 wstr = (wchar_t *) realloc (*strptr,
1237 (2 * strsize)
1238 * sizeof (wchar_t));
1239 if (wstr == NULL)
1240 {
1241 /* Can't allocate that much. Last-ditch
1242 effort. */
1243 wstr = (wchar_t *) realloc (*strptr,
1244 (strsize + 1)
1245 * sizeof (wchar_t));
1246 if (wstr == NULL)
1247 {
1248 if (flags & POSIX_MALLOC)
1249 {
1250 done = EOF;
1251 goto errout;
1252 }
1253 /* We lose. Oh well. Terminate the string
1254 and stop converting, so at least we don't
1255 skip any input. */
1256 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1257 strptr = NULL;
1258 ++done;
1259 conv_error ();
1260 }
1261 else
1262 {
1263 *strptr = (char *) wstr;
1264 wstr += strsize;
1265 ++strsize;
1266 }
1267 }
1268 else
1269 {
1270 *strptr = (char *) wstr;
1271 wstr += strsize;
1272 strsize *= 2;
1273 }
1274 }
1275 }
1276 #else
1277 {
1278 char buf[1];
1279
1280 buf[0] = c;
1281
1282 while (1)
1283 {
1284 size_t n;
1285
1286 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1287 buf, 1, &cstate);
1288
1289 if (n == (size_t) -2)
1290 {
1291 /* Possibly correct character, just not enough
1292 input. */
1293 if (__glibc_unlikely (inchar () == EOF))
1294 encode_error ();
1295
1296 buf[0] = c;
1297 continue;
1298 }
1299
1300 if (__glibc_unlikely (n != 1))
1301 encode_error ();
1302
1303 /* We have a match. */
1304 ++wstr;
1305 break;
1306 }
1307
1308 if (!(flags & SUPPRESS) && (flags & MALLOC)
1309 && wstr == (wchar_t *) *strptr + strsize)
1310 {
1311 /* Enlarge the buffer. */
1312 wstr = (wchar_t *) realloc (*strptr,
1313 (2 * strsize
1314 * sizeof (wchar_t)));
1315 if (wstr == NULL)
1316 {
1317 /* Can't allocate that much. Last-ditch effort. */
1318 wstr = (wchar_t *) realloc (*strptr,
1319 ((strsize + 1)
1320 * sizeof (wchar_t)));
1321 if (wstr == NULL)
1322 {
1323 if (flags & POSIX_MALLOC)
1324 {
1325 done = EOF;
1326 goto errout;
1327 }
1328 /* We lose. Oh well. Terminate the
1329 string and stop converting, so at
1330 least we don't skip any input. */
1331 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1332 strptr = NULL;
1333 ++done;
1334 conv_error ();
1335 }
1336 else
1337 {
1338 *strptr = (char *) wstr;
1339 wstr += strsize;
1340 ++strsize;
1341 }
1342 }
1343 else
1344 {
1345 *strptr = (char *) wstr;
1346 wstr += strsize;
1347 strsize *= 2;
1348 }
1349 }
1350 }
1351 #endif
1352 }
1353 while ((width <= 0 || --width > 0) && inchar () != EOF);
1354
1355 if (!(flags & SUPPRESS))
1356 {
1357 *wstr++ = L'\0';
1358
1359 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1360 {
1361 wchar_t *cp = (wchar_t *) realloc (*strptr,
1362 ((wstr
1363 - (wchar_t *) *strptr)
1364 * sizeof (wchar_t)));
1365 if (cp != NULL)
1366 *strptr = (char *) cp;
1367 }
1368 strptr = NULL;
1369
1370 ++done;
1371 }
1372 }
1373 break;
1374
1375 case L_('x'): /* Hexadecimal integer. */
1376 case L_('X'): /* Ditto. */
1377 base = 16;
1378 goto number;
1379
1380 case L_('o'): /* Octal integer. */
1381 base = 8;
1382 goto number;
1383
1384 case L_('u'): /* Unsigned decimal integer. */
1385 base = 10;
1386 goto number;
1387
1388 case L_('d'): /* Signed decimal integer. */
1389 base = 10;
1390 flags |= NUMBER_SIGNED;
1391 goto number;
1392
1393 case L_('i'): /* Generic number. */
1394 base = 0;
1395 flags |= NUMBER_SIGNED;
1396
1397 number:
1398 c = inchar ();
1399 if (__glibc_unlikely (c == EOF))
1400 input_error ();
1401
1402 /* Check for a sign. */
1403 if (c == L_('-') || c == L_('+'))
1404 {
1405 char_buffer_add (&charbuf, c);
1406 if (width > 0)
1407 --width;
1408 c = inchar ();
1409 }
1410
1411 /* Look for a leading indication of base. */
1412 if (width != 0 && c == L_('0'))
1413 {
1414 if (width > 0)
1415 --width;
1416
1417 char_buffer_add (&charbuf, c);
1418 c = inchar ();
1419
1420 if (width != 0 && TOLOWER (c) == L_('x'))
1421 {
1422 if (base == 0)
1423 base = 16;
1424 if (base == 16)
1425 {
1426 if (width > 0)
1427 --width;
1428 c = inchar ();
1429 }
1430 }
1431 else if (base == 0)
1432 base = 8;
1433 }
1434
1435 if (base == 0)
1436 base = 10;
1437
1438 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1439 {
1440 int from_level;
1441 int to_level;
1442 int level;
1443 #ifdef COMPILE_WSCANF
1444 const wchar_t *wcdigits[10];
1445 const wchar_t *wcdigits_extended[10];
1446 #else
1447 const char *mbdigits[10];
1448 const char *mbdigits_extended[10];
1449 #endif
1450 /* "to_inpunct" is a map from ASCII digits to their
1451 equivalent in locale. This is defined for locales
1452 which use an extra digits set. */
1453 wctrans_t map = __wctrans ("to_inpunct");
1454 int n;
1455
1456 from_level = 0;
1457 #ifdef COMPILE_WSCANF
1458 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1459 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1460 #else
1461 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1462 #endif
1463
1464 /* Get the alternative digit forms if there are any. */
1465 if (__glibc_unlikely (map != NULL))
1466 {
1467 /* Adding new level for extra digits set in locale file. */
1468 ++to_level;
1469
1470 for (n = 0; n < 10; ++n)
1471 {
1472 #ifdef COMPILE_WSCANF
1473 wcdigits[n] = (const wchar_t *)
1474 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1475
1476 wchar_t *wc_extended = (wchar_t *)
1477 alloca ((to_level + 2) * sizeof (wchar_t));
1478 __wmemcpy (wc_extended, wcdigits[n], to_level);
1479 wc_extended[to_level] = __towctrans (L'0' + n, map);
1480 wc_extended[to_level + 1] = '\0';
1481 wcdigits_extended[n] = wc_extended;
1482 #else
1483 mbdigits[n]
1484 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1485
1486 /* Get the equivalent wide char in map. */
1487 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1488
1489 /* Convert it to multibyte representation. */
1490 mbstate_t state;
1491 memset (&state, '\0', sizeof (state));
1492
1493 char extra_mbdigit[MB_LEN_MAX];
1494 size_t mblen
1495 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1496
1497 if (mblen == (size_t) -1)
1498 {
1499 /* Ignore this new level. */
1500 map = NULL;
1501 break;
1502 }
1503
1504 /* Calculate the length of mbdigits[n]. */
1505 const char *last_char = mbdigits[n];
1506 for (level = 0; level < to_level; ++level)
1507 last_char = strchr (last_char, '\0') + 1;
1508
1509 size_t mbdigits_len = last_char - mbdigits[n];
1510
1511 /* Allocate memory for extended multibyte digit. */
1512 char *mb_extended;
1513 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1514
1515 /* And get the mbdigits + extra_digit string. */
1516 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1517 mbdigits_len),
1518 extra_mbdigit, mblen) = '\0';
1519 mbdigits_extended[n] = mb_extended;
1520 #endif
1521 }
1522 }
1523
1524 /* Read the number into workspace. */
1525 while (c != EOF && width != 0)
1526 {
1527 /* In this round we get the pointer to the digit strings
1528 and also perform the first round of comparisons. */
1529 for (n = 0; n < 10; ++n)
1530 {
1531 /* Get the string for the digits with value N. */
1532 #ifdef COMPILE_WSCANF
1533
1534 /* wcdigits_extended[] is fully set in the loop
1535 above, but the test for "map != NULL" is done
1536 inside the loop here and outside the loop there. */
1537 DIAG_PUSH_NEEDS_COMMENT;
1538 DIAG_IGNORE_NEEDS_COMMENT (4.7, "-Wmaybe-uninitialized");
1539
1540 if (__glibc_unlikely (map != NULL))
1541 wcdigits[n] = wcdigits_extended[n];
1542 else
1543 wcdigits[n] = (const wchar_t *)
1544 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1545 wcdigits[n] += from_level;
1546
1547 DIAG_POP_NEEDS_COMMENT;
1548
1549 if (c == (wint_t) *wcdigits[n])
1550 {
1551 to_level = from_level;
1552 break;
1553 }
1554
1555 /* Advance the pointer to the next string. */
1556 ++wcdigits[n];
1557 #else
1558 const char *cmpp;
1559 int avail = width > 0 ? width : INT_MAX;
1560
1561 if (__glibc_unlikely (map != NULL))
1562 mbdigits[n] = mbdigits_extended[n];
1563 else
1564 mbdigits[n]
1565 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1566
1567 for (level = 0; level < from_level; level++)
1568 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1569
1570 cmpp = mbdigits[n];
1571 while ((unsigned char) *cmpp == c && avail >= 0)
1572 {
1573 if (*++cmpp == '\0')
1574 break;
1575 else
1576 {
1577 if (avail == 0 || inchar () == EOF)
1578 break;
1579 --avail;
1580 }
1581 }
1582
1583 if (*cmpp == '\0')
1584 {
1585 if (width > 0)
1586 width = avail;
1587 to_level = from_level;
1588 break;
1589 }
1590
1591 /* We are pushing all read characters back. */
1592 if (cmpp > mbdigits[n])
1593 {
1594 ungetc (c, s);
1595 while (--cmpp > mbdigits[n])
1596 ungetc_not_eof ((unsigned char) *cmpp, s);
1597 c = (unsigned char) *cmpp;
1598 }
1599
1600 /* Advance the pointer to the next string. */
1601 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1602 #endif
1603 }
1604
1605 if (n == 10)
1606 {
1607 /* Have not yet found the digit. */
1608 for (level = from_level + 1; level <= to_level; ++level)
1609 {
1610 /* Search all ten digits of this level. */
1611 for (n = 0; n < 10; ++n)
1612 {
1613 #ifdef COMPILE_WSCANF
1614 if (c == (wint_t) *wcdigits[n])
1615 break;
1616
1617 /* Advance the pointer to the next string. */
1618 ++wcdigits[n];
1619 #else
1620 const char *cmpp;
1621 int avail = width > 0 ? width : INT_MAX;
1622
1623 cmpp = mbdigits[n];
1624 while ((unsigned char) *cmpp == c && avail >= 0)
1625 {
1626 if (*++cmpp == '\0')
1627 break;
1628 else
1629 {
1630 if (avail == 0 || inchar () == EOF)
1631 break;
1632 --avail;
1633 }
1634 }
1635
1636 if (*cmpp == '\0')
1637 {
1638 if (width > 0)
1639 width = avail;
1640 break;
1641 }
1642
1643 /* We are pushing all read characters back. */
1644 if (cmpp > mbdigits[n])
1645 {
1646 ungetc (c, s);
1647 while (--cmpp > mbdigits[n])
1648 ungetc_not_eof ((unsigned char) *cmpp, s);
1649 c = (unsigned char) *cmpp;
1650 }
1651
1652 /* Advance the pointer to the next string. */
1653 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1654 #endif
1655 }
1656
1657 if (n < 10)
1658 {
1659 /* Found it. */
1660 from_level = level;
1661 to_level = level;
1662 break;
1663 }
1664 }
1665 }
1666
1667 if (n < 10)
1668 c = L_('0') + n;
1669 else if (flags & GROUP)
1670 {
1671 /* Try matching against the thousands separator. */
1672 #ifdef COMPILE_WSCANF
1673 if (c != thousands)
1674 break;
1675 #else
1676 const char *cmpp = thousands;
1677 int avail = width > 0 ? width : INT_MAX;
1678
1679 while ((unsigned char) *cmpp == c && avail >= 0)
1680 {
1681 char_buffer_add (&charbuf, c);
1682 if (*++cmpp == '\0')
1683 break;
1684 else
1685 {
1686 if (avail == 0 || inchar () == EOF)
1687 break;
1688 --avail;
1689 }
1690 }
1691
1692 if (char_buffer_error (&charbuf))
1693 {
1694 __set_errno (ENOMEM);
1695 done = EOF;
1696 goto errout;
1697 }
1698
1699 if (*cmpp != '\0')
1700 {
1701 /* We are pushing all read characters back. */
1702 if (cmpp > thousands)
1703 {
1704 charbuf.current -= cmpp - thousands;
1705 ungetc (c, s);
1706 while (--cmpp > thousands)
1707 ungetc_not_eof ((unsigned char) *cmpp, s);
1708 c = (unsigned char) *cmpp;
1709 }
1710 break;
1711 }
1712
1713 if (width > 0)
1714 width = avail;
1715
1716 /* The last thousands character will be added back by
1717 the char_buffer_add below. */
1718 --charbuf.current;
1719 #endif
1720 }
1721 else
1722 break;
1723
1724 char_buffer_add (&charbuf, c);
1725 if (width > 0)
1726 --width;
1727
1728 c = inchar ();
1729 }
1730 }
1731 else
1732 /* Read the number into workspace. */
1733 while (c != EOF && width != 0)
1734 {
1735 if (base == 16)
1736 {
1737 if (!ISXDIGIT (c))
1738 break;
1739 }
1740 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1741 {
1742 if (base == 10 && (flags & GROUP))
1743 {
1744 /* Try matching against the thousands separator. */
1745 #ifdef COMPILE_WSCANF
1746 if (c != thousands)
1747 break;
1748 #else
1749 const char *cmpp = thousands;
1750 int avail = width > 0 ? width : INT_MAX;
1751
1752 while ((unsigned char) *cmpp == c && avail >= 0)
1753 {
1754 char_buffer_add (&charbuf, c);
1755 if (*++cmpp == '\0')
1756 break;
1757 else
1758 {
1759 if (avail == 0 || inchar () == EOF)
1760 break;
1761 --avail;
1762 }
1763 }
1764
1765 if (char_buffer_error (&charbuf))
1766 {
1767 __set_errno (ENOMEM);
1768 done = EOF;
1769 goto errout;
1770 }
1771
1772 if (*cmpp != '\0')
1773 {
1774 /* We are pushing all read characters back. */
1775 if (cmpp > thousands)
1776 {
1777 charbuf.current -= cmpp - thousands;
1778 ungetc (c, s);
1779 while (--cmpp > thousands)
1780 ungetc_not_eof ((unsigned char) *cmpp, s);
1781 c = (unsigned char) *cmpp;
1782 }
1783 break;
1784 }
1785
1786 if (width > 0)
1787 width = avail;
1788
1789 /* The last thousands character will be added back by
1790 the char_buffer_add below. */
1791 --charbuf.current;
1792 #endif
1793 }
1794 else
1795 break;
1796 }
1797 char_buffer_add (&charbuf, c);
1798 if (width > 0)
1799 --width;
1800
1801 c = inchar ();
1802 }
1803
1804 if (char_buffer_error (&charbuf))
1805 {
1806 __set_errno (ENOMEM);
1807 done = EOF;
1808 goto errout;
1809 }
1810
1811 if (char_buffer_size (&charbuf) == 0
1812 || (char_buffer_size (&charbuf) == 1
1813 && (char_buffer_start (&charbuf)[0] == L_('+')
1814 || char_buffer_start (&charbuf)[0] == L_('-'))))
1815 {
1816 /* There was no number. If we are supposed to read a pointer
1817 we must recognize "(nil)" as well. */
1818 if (__builtin_expect (char_buffer_size (&charbuf) == 0
1819 && (flags & READ_POINTER)
1820 && (width < 0 || width >= 5)
1821 && c == '('
1822 && TOLOWER (inchar ()) == L_('n')
1823 && TOLOWER (inchar ()) == L_('i')
1824 && TOLOWER (inchar ()) == L_('l')
1825 && inchar () == L_(')'), 1))
1826 /* We must produce the value of a NULL pointer. A single
1827 '0' digit is enough. */
1828 char_buffer_add (&charbuf, L_('0'));
1829 else
1830 {
1831 /* The last read character is not part of the number
1832 anymore. */
1833 ungetc (c, s);
1834
1835 conv_error ();
1836 }
1837 }
1838 else
1839 /* The just read character is not part of the number anymore. */
1840 ungetc (c, s);
1841
1842 /* Convert the number. */
1843 char_buffer_add (&charbuf, L_('\0'));
1844 if (char_buffer_error (&charbuf))
1845 {
1846 __set_errno (ENOMEM);
1847 done = EOF;
1848 goto errout;
1849 }
1850 if (need_longlong && (flags & LONGDBL))
1851 {
1852 if (flags & NUMBER_SIGNED)
1853 num.q = __strtoll_internal
1854 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1855 else
1856 num.uq = __strtoull_internal
1857 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1858 }
1859 else
1860 {
1861 if (flags & NUMBER_SIGNED)
1862 num.l = __strtol_internal
1863 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1864 else
1865 num.ul = __strtoul_internal
1866 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1867 }
1868 if (__glibc_unlikely (char_buffer_start (&charbuf) == tw))
1869 conv_error ();
1870
1871 if (!(flags & SUPPRESS))
1872 {
1873 if (flags & NUMBER_SIGNED)
1874 {
1875 if (need_longlong && (flags & LONGDBL))
1876 *ARG (LONGLONG int *) = num.q;
1877 else if (need_long && (flags & LONG))
1878 *ARG (long int *) = num.l;
1879 else if (flags & SHORT)
1880 *ARG (short int *) = (short int) num.l;
1881 else if (!(flags & CHAR))
1882 *ARG (int *) = (int) num.l;
1883 else
1884 *ARG (signed char *) = (signed char) num.ul;
1885 }
1886 else
1887 {
1888 if (need_longlong && (flags & LONGDBL))
1889 *ARG (unsigned LONGLONG int *) = num.uq;
1890 else if (need_long && (flags & LONG))
1891 *ARG (unsigned long int *) = num.ul;
1892 else if (flags & SHORT)
1893 *ARG (unsigned short int *)
1894 = (unsigned short int) num.ul;
1895 else if (!(flags & CHAR))
1896 *ARG (unsigned int *) = (unsigned int) num.ul;
1897 else
1898 *ARG (unsigned char *) = (unsigned char) num.ul;
1899 }
1900 ++done;
1901 }
1902 break;
1903
1904 case L_('e'): /* Floating-point numbers. */
1905 case L_('E'):
1906 case L_('f'):
1907 case L_('F'):
1908 case L_('g'):
1909 case L_('G'):
1910 case L_('a'):
1911 case L_('A'):
1912 c = inchar ();
1913 if (width > 0)
1914 --width;
1915 if (__glibc_unlikely (c == EOF))
1916 input_error ();
1917
1918 got_digit = got_dot = got_e = got_sign = 0;
1919
1920 /* Check for a sign. */
1921 if (c == L_('-') || c == L_('+'))
1922 {
1923 got_sign = 1;
1924 char_buffer_add (&charbuf, c);
1925 if (__glibc_unlikely (width == 0 || inchar () == EOF))
1926 /* EOF is only an input error before we read any chars. */
1927 conv_error ();
1928 if (width > 0)
1929 --width;
1930 }
1931
1932 /* Take care for the special arguments "nan" and "inf". */
1933 if (TOLOWER (c) == L_('n'))
1934 {
1935 /* Maybe "nan". */
1936 char_buffer_add (&charbuf, c);
1937 if (__builtin_expect (width == 0
1938 || inchar () == EOF
1939 || TOLOWER (c) != L_('a'), 0))
1940 conv_error ();
1941 if (width > 0)
1942 --width;
1943 char_buffer_add (&charbuf, c);
1944 if (__builtin_expect (width == 0
1945 || inchar () == EOF
1946 || TOLOWER (c) != L_('n'), 0))
1947 conv_error ();
1948 if (width > 0)
1949 --width;
1950 char_buffer_add (&charbuf, c);
1951 /* It is "nan". */
1952 goto scan_float;
1953 }
1954 else if (TOLOWER (c) == L_('i'))
1955 {
1956 /* Maybe "inf" or "infinity". */
1957 char_buffer_add (&charbuf, c);
1958 if (__builtin_expect (width == 0
1959 || inchar () == EOF
1960 || TOLOWER (c) != L_('n'), 0))
1961 conv_error ();
1962 if (width > 0)
1963 --width;
1964 char_buffer_add (&charbuf, c);
1965 if (__builtin_expect (width == 0
1966 || inchar () == EOF
1967 || TOLOWER (c) != L_('f'), 0))
1968 conv_error ();
1969 if (width > 0)
1970 --width;
1971 char_buffer_add (&charbuf, c);
1972 /* It is as least "inf". */
1973 if (width != 0 && inchar () != EOF)
1974 {
1975 if (TOLOWER (c) == L_('i'))
1976 {
1977 if (width > 0)
1978 --width;
1979 /* Now we have to read the rest as well. */
1980 char_buffer_add (&charbuf, c);
1981 if (__builtin_expect (width == 0
1982 || inchar () == EOF
1983 || TOLOWER (c) != L_('n'), 0))
1984 conv_error ();
1985 if (width > 0)
1986 --width;
1987 char_buffer_add (&charbuf, c);
1988 if (__builtin_expect (width == 0
1989 || inchar () == EOF
1990 || TOLOWER (c) != L_('i'), 0))
1991 conv_error ();
1992 if (width > 0)
1993 --width;
1994 char_buffer_add (&charbuf, c);
1995 if (__builtin_expect (width == 0
1996 || inchar () == EOF
1997 || TOLOWER (c) != L_('t'), 0))
1998 conv_error ();
1999 if (width > 0)
2000 --width;
2001 char_buffer_add (&charbuf, c);
2002 if (__builtin_expect (width == 0
2003 || inchar () == EOF
2004 || TOLOWER (c) != L_('y'), 0))
2005 conv_error ();
2006 if (width > 0)
2007 --width;
2008 char_buffer_add (&charbuf, c);
2009 }
2010 else
2011 /* Never mind. */
2012 ungetc (c, s);
2013 }
2014 goto scan_float;
2015 }
2016
2017 exp_char = L_('e');
2018 if (width != 0 && c == L_('0'))
2019 {
2020 char_buffer_add (&charbuf, c);
2021 c = inchar ();
2022 if (width > 0)
2023 --width;
2024 if (width != 0 && TOLOWER (c) == L_('x'))
2025 {
2026 /* It is a number in hexadecimal format. */
2027 char_buffer_add (&charbuf, c);
2028
2029 flags |= HEXA_FLOAT;
2030 exp_char = L_('p');
2031
2032 /* Grouping is not allowed. */
2033 flags &= ~GROUP;
2034 c = inchar ();
2035 if (width > 0)
2036 --width;
2037 }
2038 else
2039 got_digit = 1;
2040 }
2041
2042 while (1)
2043 {
2044 if (char_buffer_error (&charbuf))
2045 {
2046 __set_errno (ENOMEM);
2047 done = EOF;
2048 goto errout;
2049 }
2050 if (ISDIGIT (c))
2051 {
2052 char_buffer_add (&charbuf, c);
2053 got_digit = 1;
2054 }
2055 else if (!got_e && (flags & HEXA_FLOAT) && ISXDIGIT (c))
2056 {
2057 char_buffer_add (&charbuf, c);
2058 got_digit = 1;
2059 }
2060 else if (got_e && charbuf.current[-1] == exp_char
2061 && (c == L_('-') || c == L_('+')))
2062 char_buffer_add (&charbuf, c);
2063 else if (got_digit && !got_e
2064 && (CHAR_T) TOLOWER (c) == exp_char)
2065 {
2066 char_buffer_add (&charbuf, exp_char);
2067 got_e = got_dot = 1;
2068 }
2069 else
2070 {
2071 #ifdef COMPILE_WSCANF
2072 if (! got_dot && c == decimal)
2073 {
2074 char_buffer_add (&charbuf, c);
2075 got_dot = 1;
2076 }
2077 else if ((flags & GROUP) != 0 && ! got_dot && c == thousands)
2078 char_buffer_add (&charbuf, c);
2079 else
2080 {
2081 /* The last read character is not part of the number
2082 anymore. */
2083 ungetc (c, s);
2084 break;
2085 }
2086 #else
2087 const char *cmpp = decimal;
2088 int avail = width > 0 ? width : INT_MAX;
2089
2090 if (! got_dot)
2091 {
2092 while ((unsigned char) *cmpp == c && avail >= 0)
2093 if (*++cmpp == '\0')
2094 break;
2095 else
2096 {
2097 if (avail == 0 || inchar () == EOF)
2098 break;
2099 --avail;
2100 }
2101 }
2102
2103 if (*cmpp == '\0')
2104 {
2105 /* Add all the characters. */
2106 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
2107 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2108 if (width > 0)
2109 width = avail;
2110 got_dot = 1;
2111 }
2112 else
2113 {
2114 /* Figure out whether it is a thousands separator.
2115 There is one problem: we possibly read more than
2116 one character. We cannot push them back but since
2117 we know that parts of the `decimal' string matched,
2118 we can compare against it. */
2119 const char *cmp2p = thousands;
2120
2121 if ((flags & GROUP) != 0 && ! got_dot)
2122 {
2123 while (cmp2p - thousands < cmpp - decimal
2124 && *cmp2p == decimal[cmp2p - thousands])
2125 ++cmp2p;
2126 if (cmp2p - thousands == cmpp - decimal)
2127 {
2128 while ((unsigned char) *cmp2p == c && avail >= 0)
2129 if (*++cmp2p == '\0')
2130 break;
2131 else
2132 {
2133 if (avail == 0 || inchar () == EOF)
2134 break;
2135 --avail;
2136 }
2137 }
2138 }
2139
2140 if (cmp2p != NULL && *cmp2p == '\0')
2141 {
2142 /* Add all the characters. */
2143 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
2144 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2145 if (width > 0)
2146 width = avail;
2147 }
2148 else
2149 {
2150 /* The last read character is not part of the number
2151 anymore. */
2152 ungetc (c, s);
2153 break;
2154 }
2155 }
2156 #endif
2157 }
2158
2159 if (width == 0 || inchar () == EOF)
2160 break;
2161
2162 if (width > 0)
2163 --width;
2164 }
2165
2166 if (char_buffer_error (&charbuf))
2167 {
2168 __set_errno (ENOMEM);
2169 done = EOF;
2170 goto errout;
2171 }
2172
2173 wctrans_t map;
2174 if (__builtin_expect ((flags & I18N) != 0, 0)
2175 /* Hexadecimal floats make no sense, fixing localized
2176 digits with ASCII letters. */
2177 && !(flags & HEXA_FLOAT)
2178 /* Minimum requirement. */
2179 && (char_buffer_size (&charbuf) == got_sign || got_dot)
2180 && (map = __wctrans ("to_inpunct")) != NULL)
2181 {
2182 /* Reget the first character. */
2183 inchar ();
2184
2185 /* Localized digits, decimal points, and thousands
2186 separator. */
2187 wint_t wcdigits[12];
2188
2189 /* First get decimal equivalent to check if we read it
2190 or not. */
2191 wcdigits[11] = __towctrans (L'.', map);
2192
2193 /* If we have not read any character or have just read
2194 locale decimal point which matches the decimal point
2195 for localized FP numbers, then we may have localized
2196 digits. Note, we test GOT_DOT above. */
2197 #ifdef COMPILE_WSCANF
2198 if (char_buffer_size (&charbuf) == got_sign
2199 || (char_buffer_size (&charbuf) == got_sign + 1
2200 && wcdigits[11] == decimal))
2201 #else
2202 char mbdigits[12][MB_LEN_MAX + 1];
2203
2204 mbstate_t state;
2205 memset (&state, '\0', sizeof (state));
2206
2207 bool match_so_far = char_buffer_size (&charbuf) == got_sign;
2208 size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
2209 if (mblen != (size_t) -1)
2210 {
2211 mbdigits[11][mblen] = '\0';
2212 match_so_far |=
2213 (char_buffer_size (&charbuf) == strlen (decimal) + got_sign
2214 && strcmp (decimal, mbdigits[11]) == 0);
2215 }
2216 else
2217 {
2218 size_t decimal_len = strlen (decimal);
2219 /* This should always be the case but the data comes
2220 from a file. */
2221 if (decimal_len <= MB_LEN_MAX)
2222 {
2223 match_so_far |= (char_buffer_size (&charbuf)
2224 == decimal_len + got_sign);
2225 memcpy (mbdigits[11], decimal, decimal_len + 1);
2226 }
2227 else
2228 match_so_far = false;
2229 }
2230
2231 if (match_so_far)
2232 #endif
2233 {
2234 bool have_locthousands = (flags & GROUP) != 0;
2235
2236 /* Now get the digits and the thousands-sep equivalents. */
2237 for (int n = 0; n < 11; ++n)
2238 {
2239 if (n < 10)
2240 wcdigits[n] = __towctrans (L'0' + n, map);
2241 else if (n == 10)
2242 {
2243 wcdigits[10] = __towctrans (L',', map);
2244 have_locthousands &= wcdigits[10] != L'\0';
2245 }
2246
2247 #ifndef COMPILE_WSCANF
2248 memset (&state, '\0', sizeof (state));
2249
2250 size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
2251 &state);
2252 if (mblen == (size_t) -1)
2253 {
2254 if (n == 10)
2255 {
2256 if (have_locthousands)
2257 {
2258 size_t thousands_len = strlen (thousands);
2259 if (thousands_len <= MB_LEN_MAX)
2260 memcpy (mbdigits[10], thousands,
2261 thousands_len + 1);
2262 else
2263 have_locthousands = false;
2264 }
2265 }
2266 else
2267 /* Ignore checking against localized digits. */
2268 goto no_i18nflt;
2269 }
2270 else
2271 mbdigits[n][mblen] = '\0';
2272 #endif
2273 }
2274
2275 /* Start checking against localized digits, if
2276 conversion is done correctly. */
2277 while (1)
2278 {
2279 if (char_buffer_error (&charbuf))
2280 {
2281 __set_errno (ENOMEM);
2282 done = EOF;
2283 goto errout;
2284 }
2285 if (got_e && charbuf.current[-1] == exp_char
2286 && (c == L_('-') || c == L_('+')))
2287 char_buffer_add (&charbuf, c);
2288 else if (char_buffer_size (&charbuf) > got_sign && !got_e
2289 && (CHAR_T) TOLOWER (c) == exp_char)
2290 {
2291 char_buffer_add (&charbuf, exp_char);
2292 got_e = got_dot = 1;
2293 }
2294 else
2295 {
2296 /* Check against localized digits, decimal point,
2297 and thousands separator. */
2298 int n;
2299 for (n = 0; n < 12; ++n)
2300 {
2301 #ifdef COMPILE_WSCANF
2302 if (c == wcdigits[n])
2303 {
2304 if (n < 10)
2305 char_buffer_add (&charbuf, L_('0') + n);
2306 else if (n == 11 && !got_dot)
2307 {
2308 char_buffer_add (&charbuf, decimal);
2309 got_dot = 1;
2310 }
2311 else if (n == 10 && have_locthousands
2312 && ! got_dot)
2313 char_buffer_add (&charbuf, thousands);
2314 else
2315 /* The last read character is not part
2316 of the number anymore. */
2317 n = 12;
2318
2319 break;
2320 }
2321 #else
2322 const char *cmpp = mbdigits[n];
2323 int avail = width > 0 ? width : INT_MAX;
2324
2325 while ((unsigned char) *cmpp == c && avail >= 0)
2326 if (*++cmpp == '\0')
2327 break;
2328 else
2329 {
2330 if (avail == 0 || inchar () == EOF)
2331 break;
2332 --avail;
2333 }
2334 if (*cmpp == '\0')
2335 {
2336 if (width > 0)
2337 width = avail;
2338
2339 if (n < 10)
2340 char_buffer_add (&charbuf, L_('0') + n);
2341 else if (n == 11 && !got_dot)
2342 {
2343 /* Add all the characters. */
2344 for (cmpp = decimal; *cmpp != '\0';
2345 ++cmpp)
2346 char_buffer_add (&charbuf,
2347 (unsigned char) *cmpp);
2348
2349 got_dot = 1;
2350 }
2351 else if (n == 10 && (flags & GROUP) != 0
2352 && ! got_dot)
2353 {
2354 /* Add all the characters. */
2355 for (cmpp = thousands; *cmpp != '\0';
2356 ++cmpp)
2357 char_buffer_add (&charbuf,
2358 (unsigned char) *cmpp);
2359 }
2360 else
2361 /* The last read character is not part
2362 of the number anymore. */
2363 n = 12;
2364
2365 break;
2366 }
2367
2368 /* We are pushing all read characters back. */
2369 if (cmpp > mbdigits[n])
2370 {
2371 ungetc (c, s);
2372 while (--cmpp > mbdigits[n])
2373 ungetc_not_eof ((unsigned char) *cmpp, s);
2374 c = (unsigned char) *cmpp;
2375 }
2376 #endif
2377 }
2378
2379 if (n >= 12)
2380 {
2381 /* The last read character is not part
2382 of the number anymore. */
2383 ungetc (c, s);
2384 break;
2385 }
2386 }
2387
2388 if (width == 0 || inchar () == EOF)
2389 break;
2390
2391 if (width > 0)
2392 --width;
2393 }
2394 }
2395
2396 #ifndef COMPILE_WSCANF
2397 no_i18nflt:
2398 ;
2399 #endif
2400 }
2401
2402 if (char_buffer_error (&charbuf))
2403 {
2404 __set_errno (ENOMEM);
2405 done = EOF;
2406 goto errout;
2407 }
2408
2409 /* Have we read any character? If we try to read a number
2410 in hexadecimal notation and we have read only the `0x'
2411 prefix this is an error. */
2412 if (__glibc_unlikely (char_buffer_size (&charbuf) == got_sign
2413 || ((flags & HEXA_FLOAT)
2414 && (char_buffer_size (&charbuf)
2415 == 2 + got_sign))))
2416 conv_error ();
2417
2418 scan_float:
2419 /* Convert the number. */
2420 char_buffer_add (&charbuf, L_('\0'));
2421 if (char_buffer_error (&charbuf))
2422 {
2423 __set_errno (ENOMEM);
2424 done = EOF;
2425 goto errout;
2426 }
2427 #if __HAVE_FLOAT128_UNLIKE_LDBL
2428 if ((flags & LONGDBL) \
2429 && (mode_flags & SCANF_LDBL_USES_FLOAT128) != 0)
2430 {
2431 _Float128 d = __strtof128_internal
2432 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2433 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2434 *ARG (_Float128 *) = d;
2435 }
2436 else
2437 #endif
2438 if ((flags & LONGDBL) \
2439 && __glibc_likely ((mode_flags & SCANF_LDBL_IS_DBL) == 0))
2440 {
2441 long double d = __strtold_internal
2442 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2443 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2444 *ARG (long double *) = d;
2445 }
2446 else if (flags & (LONG | LONGDBL))
2447 {
2448 double d = __strtod_internal
2449 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2450 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2451 *ARG (double *) = d;
2452 }
2453 else
2454 {
2455 float d = __strtof_internal
2456 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2457 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2458 *ARG (float *) = d;
2459 }
2460
2461 if (__glibc_unlikely (tw == char_buffer_start (&charbuf)))
2462 conv_error ();
2463
2464 if (!(flags & SUPPRESS))
2465 ++done;
2466 break;
2467
2468 case L_('['): /* Character class. */
2469 if (flags & LONG)
2470 STRING_ARG (wstr, wchar_t, 100);
2471 else
2472 STRING_ARG (str, char, 100);
2473
2474 if (*f == L_('^'))
2475 {
2476 ++f;
2477 not_in = 1;
2478 }
2479 else
2480 not_in = 0;
2481
2482
2483 #ifdef COMPILE_WSCANF
2484 /* Find the beginning and the end of the scanlist. We are not
2485 creating a lookup table since it would have to be too large.
2486 Instead we search each time through the string. This is not
2487 a constant lookup time but who uses this feature deserves to
2488 be punished. */
2489 tw = (wchar_t *) f; /* Marks the beginning. */
2490
2491 if (*f == L']')
2492 ++f;
2493
2494 while ((fc = *f++) != L'\0' && fc != L']');
2495
2496 if (__glibc_unlikely (fc == L'\0'))
2497 conv_error ();
2498 wchar_t *twend = (wchar_t *) f - 1;
2499 #else
2500 /* Fill WP with byte flags indexed by character.
2501 We will use this flag map for matching input characters. */
2502 if (!scratch_buffer_set_array_size
2503 (&charbuf.scratch, UCHAR_MAX + 1, 1))
2504 {
2505 done = EOF;
2506 goto errout;
2507 }
2508 memset (charbuf.scratch.data, '\0', UCHAR_MAX + 1);
2509
2510 fc = *f;
2511 if (fc == ']' || fc == '-')
2512 {
2513 /* If ] or - appears before any char in the set, it is not
2514 the terminator or separator, but the first char in the
2515 set. */
2516 ((char *)charbuf.scratch.data)[fc] = 1;
2517 ++f;
2518 }
2519
2520 while ((fc = *f++) != '\0' && fc != ']')
2521 if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f)
2522 {
2523 /* Add all characters from the one before the '-'
2524 up to (but not including) the next format char. */
2525 for (fc = f[-2]; fc < *f; ++fc)
2526 ((char *)charbuf.scratch.data)[fc] = 1;
2527 }
2528 else
2529 /* Add the character to the flag map. */
2530 ((char *)charbuf.scratch.data)[fc] = 1;
2531
2532 if (__glibc_unlikely (fc == '\0'))
2533 conv_error();
2534 #endif
2535
2536 if (flags & LONG)
2537 {
2538 size_t now = read_in;
2539 #ifdef COMPILE_WSCANF
2540 if (__glibc_unlikely (inchar () == WEOF))
2541 input_error ();
2542
2543 do
2544 {
2545 wchar_t *runp;
2546
2547 /* Test whether it's in the scanlist. */
2548 runp = tw;
2549 while (runp < twend)
2550 {
2551 if (runp[0] == L'-' && runp[1] != '\0'
2552 && runp + 1 != twend
2553 && runp != tw
2554 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2555 {
2556 /* Match against all characters in between the
2557 first and last character of the sequence. */
2558 wchar_t wc;
2559
2560 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2561 if ((wint_t) wc == c)
2562 break;
2563
2564 if (wc <= runp[1] && !not_in)
2565 break;
2566 if (wc <= runp[1] && not_in)
2567 {
2568 /* The current character is not in the
2569 scanset. */
2570 ungetc (c, s);
2571 goto out;
2572 }
2573
2574 runp += 2;
2575 }
2576 else
2577 {
2578 if ((wint_t) *runp == c && !not_in)
2579 break;
2580 if ((wint_t) *runp == c && not_in)
2581 {
2582 ungetc (c, s);
2583 goto out;
2584 }
2585
2586 ++runp;
2587 }
2588 }
2589
2590 if (runp == twend && !not_in)
2591 {
2592 ungetc (c, s);
2593 goto out;
2594 }
2595
2596 if (!(flags & SUPPRESS))
2597 {
2598 *wstr++ = c;
2599
2600 if ((flags & MALLOC)
2601 && wstr == (wchar_t *) *strptr + strsize)
2602 {
2603 /* Enlarge the buffer. */
2604 wstr = (wchar_t *) realloc (*strptr,
2605 (2 * strsize)
2606 * sizeof (wchar_t));
2607 if (wstr == NULL)
2608 {
2609 /* Can't allocate that much. Last-ditch
2610 effort. */
2611 wstr = (wchar_t *)
2612 realloc (*strptr, (strsize + 1)
2613 * sizeof (wchar_t));
2614 if (wstr == NULL)
2615 {
2616 if (flags & POSIX_MALLOC)
2617 {
2618 done = EOF;
2619 goto errout;
2620 }
2621 /* We lose. Oh well. Terminate the string
2622 and stop converting, so at least we don't
2623 skip any input. */
2624 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2625 strptr = NULL;
2626 ++done;
2627 conv_error ();
2628 }
2629 else
2630 {
2631 *strptr = (char *) wstr;
2632 wstr += strsize;
2633 ++strsize;
2634 }
2635 }
2636 else
2637 {
2638 *strptr = (char *) wstr;
2639 wstr += strsize;
2640 strsize *= 2;
2641 }
2642 }
2643 }
2644 }
2645 while ((width < 0 || --width > 0) && inchar () != WEOF);
2646 out:
2647 #else
2648 char buf[MB_LEN_MAX];
2649 size_t cnt = 0;
2650 mbstate_t cstate;
2651
2652 if (__glibc_unlikely (inchar () == EOF))
2653 input_error ();
2654
2655 memset (&cstate, '\0', sizeof (cstate));
2656
2657 do
2658 {
2659 if (((char *) charbuf.scratch.data)[c] == not_in)
2660 {
2661 ungetc_not_eof (c, s);
2662 break;
2663 }
2664
2665 /* This is easy. */
2666 if (!(flags & SUPPRESS))
2667 {
2668 size_t n;
2669
2670 /* Convert it into a wide character. */
2671 buf[0] = c;
2672 n = __mbrtowc (wstr, buf, 1, &cstate);
2673
2674 if (n == (size_t) -2)
2675 {
2676 /* Possibly correct character, just not enough
2677 input. */
2678 ++cnt;
2679 assert (cnt < MB_LEN_MAX);
2680 continue;
2681 }
2682 cnt = 0;
2683
2684 ++wstr;
2685 if ((flags & MALLOC)
2686 && wstr == (wchar_t *) *strptr + strsize)
2687 {
2688 /* Enlarge the buffer. */
2689 wstr = (wchar_t *) realloc (*strptr,
2690 (2 * strsize
2691 * sizeof (wchar_t)));
2692 if (wstr == NULL)
2693 {
2694 /* Can't allocate that much. Last-ditch
2695 effort. */
2696 wstr = (wchar_t *)
2697 realloc (*strptr, ((strsize + 1)
2698 * sizeof (wchar_t)));
2699 if (wstr == NULL)
2700 {
2701 if (flags & POSIX_MALLOC)
2702 {
2703 done = EOF;
2704 goto errout;
2705 }
2706 /* We lose. Oh well. Terminate the
2707 string and stop converting,
2708 so at least we don't skip any input. */
2709 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2710 strptr = NULL;
2711 ++done;
2712 conv_error ();
2713 }
2714 else
2715 {
2716 *strptr = (char *) wstr;
2717 wstr += strsize;
2718 ++strsize;
2719 }
2720 }
2721 else
2722 {
2723 *strptr = (char *) wstr;
2724 wstr += strsize;
2725 strsize *= 2;
2726 }
2727 }
2728 }
2729
2730 if (width >= 0 && --width <= 0)
2731 break;
2732 }
2733 while (inchar () != EOF);
2734
2735 if (__glibc_unlikely (cnt != 0))
2736 /* We stopped in the middle of recognizing another
2737 character. That's a problem. */
2738 encode_error ();
2739 #endif
2740
2741 if (__glibc_unlikely (now == read_in))
2742 /* We haven't succesfully read any character. */
2743 conv_error ();
2744
2745 if (!(flags & SUPPRESS))
2746 {
2747 *wstr++ = L'\0';
2748
2749 if ((flags & MALLOC)
2750 && wstr - (wchar_t *) *strptr != strsize)
2751 {
2752 wchar_t *cp = (wchar_t *)
2753 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2754 * sizeof (wchar_t)));
2755 if (cp != NULL)
2756 *strptr = (char *) cp;
2757 }
2758 strptr = NULL;
2759
2760 ++done;
2761 }
2762 }
2763 else
2764 {
2765 size_t now = read_in;
2766
2767 if (__glibc_unlikely (inchar () == EOF))
2768 input_error ();
2769
2770 #ifdef COMPILE_WSCANF
2771
2772 memset (&state, '\0', sizeof (state));
2773
2774 do
2775 {
2776 wchar_t *runp;
2777 size_t n;
2778
2779 /* Test whether it's in the scanlist. */
2780 runp = tw;
2781 while (runp < twend)
2782 {
2783 if (runp[0] == L'-' && runp[1] != '\0'
2784 && runp + 1 != twend
2785 && runp != tw
2786 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2787 {
2788 /* Match against all characters in between the
2789 first and last character of the sequence. */
2790 wchar_t wc;
2791
2792 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2793 if ((wint_t) wc == c)
2794 break;
2795
2796 if (wc <= runp[1] && !not_in)
2797 break;
2798 if (wc <= runp[1] && not_in)
2799 {
2800 /* The current character is not in the
2801 scanset. */
2802 ungetc (c, s);
2803 goto out2;
2804 }
2805
2806 runp += 2;
2807 }
2808 else
2809 {
2810 if ((wint_t) *runp == c && !not_in)
2811 break;
2812 if ((wint_t) *runp == c && not_in)
2813 {
2814 ungetc (c, s);
2815 goto out2;
2816 }
2817
2818 ++runp;
2819 }
2820 }
2821
2822 if (runp == twend && !not_in)
2823 {
2824 ungetc (c, s);
2825 goto out2;
2826 }
2827
2828 if (!(flags & SUPPRESS))
2829 {
2830 if ((flags & MALLOC)
2831 && *strptr + strsize - str <= MB_LEN_MAX)
2832 {
2833 /* Enlarge the buffer. */
2834 size_t strleng = str - *strptr;
2835 char *newstr;
2836
2837 newstr = (char *) realloc (*strptr, 2 * strsize);
2838 if (newstr == NULL)
2839 {
2840 /* Can't allocate that much. Last-ditch
2841 effort. */
2842 newstr = (char *) realloc (*strptr,
2843 strleng + MB_LEN_MAX);
2844 if (newstr == NULL)
2845 {
2846 if (flags & POSIX_MALLOC)
2847 {
2848 done = EOF;
2849 goto errout;
2850 }
2851 /* We lose. Oh well. Terminate the string
2852 and stop converting, so at least we don't
2853 skip any input. */
2854 ((char *) (*strptr))[strleng] = '\0';
2855 strptr = NULL;
2856 ++done;
2857 conv_error ();
2858 }
2859 else
2860 {
2861 *strptr = newstr;
2862 str = newstr + strleng;
2863 strsize = strleng + MB_LEN_MAX;
2864 }
2865 }
2866 else
2867 {
2868 *strptr = newstr;
2869 str = newstr + strleng;
2870 strsize *= 2;
2871 }
2872 }
2873 }
2874
2875 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2876 if (__glibc_unlikely (n == (size_t) -1))
2877 encode_error ();
2878
2879 assert (n <= MB_LEN_MAX);
2880 str += n;
2881 }
2882 while ((width < 0 || --width > 0) && inchar () != WEOF);
2883 out2:
2884 #else
2885 do
2886 {
2887 if (((char *) charbuf.scratch.data)[c] == not_in)
2888 {
2889 ungetc_not_eof (c, s);
2890 break;
2891 }
2892
2893 /* This is easy. */
2894 if (!(flags & SUPPRESS))
2895 {
2896 *str++ = c;
2897 if ((flags & MALLOC)
2898 && (char *) str == *strptr + strsize)
2899 {
2900 /* Enlarge the buffer. */
2901 size_t newsize = 2 * strsize;
2902
2903 allocagain:
2904 str = (char *) realloc (*strptr, newsize);
2905 if (str == NULL)
2906 {
2907 /* Can't allocate that much. Last-ditch
2908 effort. */
2909 if (newsize > strsize + 1)
2910 {
2911 newsize = strsize + 1;
2912 goto allocagain;
2913 }
2914 if (flags & POSIX_MALLOC)
2915 {
2916 done = EOF;
2917 goto errout;
2918 }
2919 /* We lose. Oh well. Terminate the
2920 string and stop converting,
2921 so at least we don't skip any input. */
2922 ((char *) (*strptr))[strsize - 1] = '\0';
2923 strptr = NULL;
2924 ++done;
2925 conv_error ();
2926 }
2927 else
2928 {
2929 *strptr = (char *) str;
2930 str += strsize;
2931 strsize = newsize;
2932 }
2933 }
2934 }
2935 }
2936 while ((width < 0 || --width > 0) && inchar () != EOF);
2937 #endif
2938
2939 if (__glibc_unlikely (now == read_in))
2940 /* We haven't succesfully read any character. */
2941 conv_error ();
2942
2943 if (!(flags & SUPPRESS))
2944 {
2945 #ifdef COMPILE_WSCANF
2946 /* We have to emit the code to get into the initial
2947 state. */
2948 char buf[MB_LEN_MAX];
2949 size_t n = __wcrtomb (buf, L'\0', &state);
2950 if (n > 0 && (flags & MALLOC)
2951 && str + n >= *strptr + strsize)
2952 {
2953 /* Enlarge the buffer. */
2954 size_t strleng = str - *strptr;
2955 char *newstr;
2956
2957 newstr = (char *) realloc (*strptr, strleng + n + 1);
2958 if (newstr == NULL)
2959 {
2960 if (flags & POSIX_MALLOC)
2961 {
2962 done = EOF;
2963 goto errout;
2964 }
2965 /* We lose. Oh well. Terminate the string
2966 and stop converting, so at least we don't
2967 skip any input. */
2968 ((char *) (*strptr))[strleng] = '\0';
2969 strptr = NULL;
2970 ++done;
2971 conv_error ();
2972 }
2973 else
2974 {
2975 *strptr = newstr;
2976 str = newstr + strleng;
2977 strsize = strleng + n + 1;
2978 }
2979 }
2980
2981 str = __mempcpy (str, buf, n);
2982 #endif
2983 *str++ = '\0';
2984
2985 if ((flags & MALLOC) && str - *strptr != strsize)
2986 {
2987 char *cp = (char *) realloc (*strptr, str - *strptr);
2988 if (cp != NULL)
2989 *strptr = cp;
2990 }
2991 strptr = NULL;
2992
2993 ++done;
2994 }
2995 }
2996 break;
2997
2998 case L_('p'): /* Generic pointer. */
2999 base = 16;
3000 /* A PTR must be the same size as a `long int'. */
3001 flags &= ~(SHORT|LONGDBL);
3002 if (need_long)
3003 flags |= LONG;
3004 flags |= READ_POINTER;
3005 goto number;
3006
3007 default:
3008 /* If this is an unknown format character punt. */
3009 conv_error ();
3010 }
3011 }
3012
3013 /* The last thing we saw int the format string was a white space.
3014 Consume the last white spaces. */
3015 if (skip_space)
3016 {
3017 do
3018 c = inchar ();
3019 while (ISSPACE (c));
3020 ungetc (c, s);
3021 }
3022
3023 errout:
3024 /* Unlock stream. */
3025 UNLOCK_STREAM (s);
3026
3027 scratch_buffer_free (&charbuf.scratch);
3028
3029 if (__glibc_unlikely (done == EOF))
3030 {
3031 if (__glibc_unlikely (ptrs_to_free != NULL))
3032 {
3033 struct ptrs_to_free *p = ptrs_to_free;
3034 while (p != NULL)
3035 {
3036 for (size_t cnt = 0; cnt < p->count; ++cnt)
3037 {
3038 free (*p->ptrs[cnt]);
3039 *p->ptrs[cnt] = NULL;
3040 }
3041 p = p->next;
3042 ptrs_to_free = p;
3043 }
3044 }
3045 }
3046 else if (__glibc_unlikely (strptr != NULL))
3047 {
3048 free (*strptr);
3049 *strptr = NULL;
3050 }
3051 return done;
3052 }
3053