1 /* Convert string representing a number to integer value, using given locale.
2    Copyright (C) 1997-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 
20 #if HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 
24 #ifdef _LIBC
25 # define USE_NUMBER_GROUPING
26 # define HAVE_LIMITS_H
27 #endif
28 
29 #include <ctype.h>
30 #include <errno.h>
31 #ifndef __set_errno
32 # define __set_errno(Val) errno = (Val)
33 #endif
34 
35 #ifdef HAVE_LIMITS_H
36 # include <limits.h>
37 #endif
38 
39 #include <stddef.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <locale.h>
43 #include <stdint.h>
44 #include <bits/wordsize.h>
45 
46 #ifdef USE_NUMBER_GROUPING
47 # include "../locale/localeinfo.h"
48 #endif
49 
50 /* Nonzero if we are defining `strtoul' or `strtoull', operating on
51    unsigned integers.  */
52 #ifndef UNSIGNED
53 # define UNSIGNED 0
54 # define INT LONG int
55 #else
56 # define INT unsigned LONG int
57 #endif
58 
59 /* Determine the name.  */
60 #if UNSIGNED
61 # ifdef USE_WIDE_CHAR
62 #  ifdef QUAD
63 #   define strtol_l wcstoull_l
64 #  else
65 #   define strtol_l wcstoul_l
66 #  endif
67 # else
68 #  ifdef QUAD
69 #   define strtol_l strtoull_l
70 #  else
71 #   define strtol_l strtoul_l
72 #  endif
73 # endif
74 #else
75 # ifdef USE_WIDE_CHAR
76 #  ifdef QUAD
77 #   define strtol_l wcstoll_l
78 #  else
79 #   define strtol_l wcstol_l
80 #  endif
81 # else
82 #  ifdef QUAD
83 #   define strtol_l strtoll_l
84 #  else
85 #   define strtol_l strtol_l
86 #  endif
87 # endif
88 #endif
89 
90 #define __strtol_l __strtol_l2(strtol_l)
91 #define __strtol_l2(name) __strtol_l3(name)
92 #define __strtol_l3(name) __##name
93 
94 
95 /* If QUAD is defined, we are defining `strtoll' or `strtoull',
96    operating on `long long int's.  */
97 #ifdef QUAD
98 # define LONG long long
99 # define STRTOL_LONG_MIN LONG_LONG_MIN
100 # define STRTOL_LONG_MAX LONG_LONG_MAX
101 # define STRTOL_ULONG_MAX ULONG_LONG_MAX
102 #else
103 # define LONG long
104 
105 # ifndef ULONG_MAX
106 #  define ULONG_MAX ((unsigned long int) ~(unsigned long int) 0)
107 # endif
108 # ifndef LONG_MAX
109 #  define LONG_MAX ((long int) (ULONG_MAX >> 1))
110 # endif
111 # define STRTOL_LONG_MIN LONG_MIN
112 # define STRTOL_LONG_MAX LONG_MAX
113 # define STRTOL_ULONG_MAX ULONG_MAX
114 #endif
115 
116 
117 /* We use this code for the extended locale handling where the
118    function gets as an additional argument the locale which has to be
119    used.  To access the values we have to redefine the _NL_CURRENT and
120    _NL_CURRENT_WORD macros.  */
121 #undef _NL_CURRENT
122 #define _NL_CURRENT(category, item) \
123   (current->values[_NL_ITEM_INDEX (item)].string)
124 #undef _NL_CURRENT_WORD
125 #define _NL_CURRENT_WORD(category, item) \
126   ((uint32_t) current->values[_NL_ITEM_INDEX (item)].word)
127 
128 #if defined _LIBC || defined HAVE_WCHAR_H
129 # include <wchar.h>
130 #endif
131 
132 #ifdef USE_WIDE_CHAR
133 # include <wctype.h>
134 # define L_(Ch) L##Ch
135 # define UCHAR_TYPE wint_t
136 # define STRING_TYPE wchar_t
137 # define ISSPACE(Ch) __iswspace_l ((Ch), loc)
138 # define ISALPHA(Ch) __iswalpha_l ((Ch), _nl_C_locobj_ptr)
139 # define TOUPPER(Ch) __towupper_l ((Ch), _nl_C_locobj_ptr)
140 #else
141 # if defined _LIBC \
142    || defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
143 #  define IN_CTYPE_DOMAIN(c) 1
144 # else
145 #  define IN_CTYPE_DOMAIN(c) isascii(c)
146 # endif
147 # define L_(Ch) Ch
148 # define UCHAR_TYPE unsigned char
149 # define STRING_TYPE char
150 # define ISSPACE(Ch) __isspace_l ((Ch), loc)
151 # define ISALPHA(Ch) __isalpha_l ((Ch), _nl_C_locobj_ptr)
152 # define TOUPPER(Ch) __toupper_l ((Ch), _nl_C_locobj_ptr)
153 #endif
154 
155 #define INTERNAL(X) INTERNAL1(X)
156 #define INTERNAL1(X) __##X##_internal
157 #define WEAKNAME(X) WEAKNAME1(X)
158 
159 #ifdef USE_NUMBER_GROUPING
160 /* This file defines a function to check for correct grouping.  */
161 # include "grouping.h"
162 #endif
163 
164 
165 /* Define tables of maximum values and remainders in order to detect
166    overflow.  Do this at compile-time in order to avoid the runtime
167    overhead of the division.  */
168 extern const unsigned long __strtol_ul_max_tab[] attribute_hidden;
169 extern const unsigned char __strtol_ul_rem_tab[] attribute_hidden;
170 #if defined(QUAD) && __WORDSIZE == 32
171 extern const unsigned long long __strtol_ull_max_tab[] attribute_hidden;
172 extern const unsigned char __strtol_ull_rem_tab[] attribute_hidden;
173 #endif
174 
175 #define DEF(TYPE, NAME)							   \
176   const TYPE NAME[] attribute_hidden =					   \
177   {									   \
178     F(2), F(3), F(4), F(5), F(6), F(7), F(8), F(9), F(10), 		   \
179     F(11), F(12), F(13), F(14), F(15), F(16), F(17), F(18), F(19), F(20),  \
180     F(21), F(22), F(23), F(24), F(25), F(26), F(27), F(28), F(29), F(30),  \
181     F(31), F(32), F(33), F(34), F(35), F(36)				   \
182   }
183 
184 #if !UNSIGNED && !defined (USE_WIDE_CHAR) && !defined (QUAD)
185 # define F(X)	ULONG_MAX / X
186   DEF (unsigned long, __strtol_ul_max_tab);
187 # undef F
188 # define F(X)	ULONG_MAX % X
189   DEF (unsigned char, __strtol_ul_rem_tab);
190 # undef F
191 #endif
192 #if !UNSIGNED && !defined (USE_WIDE_CHAR) && defined (QUAD) \
193     && __WORDSIZE == 32
194 # define F(X)	ULONG_LONG_MAX / X
195   DEF (unsigned long long, __strtol_ull_max_tab);
196 # undef F
197 # define F(X)	ULONG_LONG_MAX % X
198   DEF (unsigned char, __strtol_ull_rem_tab);
199 # undef F
200 #endif
201 #undef DEF
202 
203 /* Define some more readable aliases for these arrays which correspond
204    to how they'll be used in the function below.  */
205 #define jmax_tab	__strtol_ul_max_tab
206 #if defined(QUAD) && __WORDSIZE == 32
207 # define cutoff_tab	__strtol_ull_max_tab
208 # define cutlim_tab	__strtol_ull_rem_tab
209 #else
210 # define cutoff_tab	__strtol_ul_max_tab
211 # define cutlim_tab	__strtol_ul_rem_tab
212 #endif
213 
214 
215 /* Convert NPTR to an `unsigned long int' or `long int' in base BASE.
216    If BASE is 0 the base is determined by the presence of a leading
217    zero, indicating octal or a leading "0x" or "0X", indicating hexadecimal.
218    If BASE is < 2 or > 36, it is reset to 10.
219    If ENDPTR is not NULL, a pointer to the character after the last
220    one converted is stored in *ENDPTR.  */
221 
222 INT
INTERNAL(__strtol_l)223 INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
224 		       int base, int group, locale_t loc)
225 {
226   int negative;
227   unsigned LONG int cutoff;
228   unsigned int cutlim;
229   unsigned LONG int i;
230   const STRING_TYPE *s;
231   UCHAR_TYPE c;
232   const STRING_TYPE *save, *end;
233   int overflow;
234 #ifndef USE_WIDE_CHAR
235   size_t cnt;
236 #endif
237 
238 #ifdef USE_NUMBER_GROUPING
239   struct __locale_data *current = loc->__locales[LC_NUMERIC];
240   /* The thousands character of the current locale.  */
241 # ifdef USE_WIDE_CHAR
242   wchar_t thousands = L'\0';
243 # else
244   const char *thousands = NULL;
245   size_t thousands_len = 0;
246 # endif
247   /* The numeric grouping specification of the current locale,
248      in the format described in <locale.h>.  */
249   const char *grouping;
250 
251   if (__glibc_unlikely (group))
252     {
253       grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
254       if (*grouping <= 0 || *grouping == CHAR_MAX)
255 	grouping = NULL;
256       else
257 	{
258 	  /* Figure out the thousands separator character.  */
259 # ifdef USE_WIDE_CHAR
260 #  ifdef _LIBC
261 	  thousands = _NL_CURRENT_WORD (LC_NUMERIC,
262 					_NL_NUMERIC_THOUSANDS_SEP_WC);
263 #  endif
264 	  if (thousands == L'\0')
265 	    grouping = NULL;
266 # else
267 #  ifdef _LIBC
268 	  thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
269 #  endif
270 	  if (*thousands == '\0')
271 	    {
272 	      thousands = NULL;
273 	      grouping = NULL;
274 	    }
275 # endif
276 	}
277     }
278   else
279     grouping = NULL;
280 #endif
281 
282   if (base < 0 || base == 1 || base > 36)
283     {
284       __set_errno (EINVAL);
285       return 0;
286     }
287 
288   save = s = nptr;
289 
290   /* Skip white space.  */
291   while (ISSPACE (*s))
292     ++s;
293   if (__glibc_unlikely (*s == L_('\0')))
294     goto noconv;
295 
296   /* Check for a sign.  */
297   negative = 0;
298   if (*s == L_('-'))
299     {
300       negative = 1;
301       ++s;
302     }
303   else if (*s == L_('+'))
304     ++s;
305 
306   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
307   if (*s == L_('0'))
308     {
309       if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X'))
310 	{
311 	  s += 2;
312 	  base = 16;
313 	}
314       else if (base == 0)
315 	base = 8;
316     }
317   else if (base == 0)
318     base = 10;
319 
320   /* Save the pointer so we can check later if anything happened.  */
321   save = s;
322 
323 #ifdef USE_NUMBER_GROUPING
324   if (base != 10)
325     grouping = NULL;
326 
327   if (__glibc_unlikely (grouping != NULL))
328     {
329 # ifndef USE_WIDE_CHAR
330       thousands_len = strlen (thousands);
331 # endif
332 
333       /* Find the end of the digit string and check its grouping.  */
334       end = s;
335       if (
336 # ifdef USE_WIDE_CHAR
337 	  *s != thousands
338 # else
339 	  ({ for (cnt = 0; cnt < thousands_len; ++cnt)
340 	       if (thousands[cnt] != end[cnt])
341 		 break;
342 	     cnt < thousands_len; })
343 # endif
344 	  )
345 	{
346 	  for (c = *end; c != L_('\0'); c = *++end)
347 	    if (((STRING_TYPE) c < L_('0') || (STRING_TYPE) c > L_('9'))
348 # ifdef USE_WIDE_CHAR
349 		&& (wchar_t) c != thousands
350 # else
351 		&& ({ for (cnt = 0; cnt < thousands_len; ++cnt)
352 			if (thousands[cnt] != end[cnt])
353 			  break;
354 		      cnt < thousands_len; })
355 # endif
356 		&& (!ISALPHA (c)
357 		    || (int) (TOUPPER (c) - L_('A') + 10) >= base))
358 	      break;
359 
360 # ifdef USE_WIDE_CHAR
361 	  end = __correctly_grouped_prefixwc (s, end, thousands, grouping);
362 # else
363 	  end = __correctly_grouped_prefixmb (s, end, thousands, grouping);
364 # endif
365 	}
366     }
367   else
368 #endif
369     end = NULL;
370 
371   /* Avoid runtime division; lookup cutoff and limit.  */
372   cutoff = cutoff_tab[base - 2];
373   cutlim = cutlim_tab[base - 2];
374 
375   overflow = 0;
376   i = 0;
377   c = *s;
378   if (sizeof (long int) != sizeof (LONG int))
379     {
380       unsigned long int j = 0;
381       unsigned long int jmax = jmax_tab[base - 2];
382 
383       for (;c != L_('\0'); c = *++s)
384 	{
385 	  if (s == end)
386 	    break;
387 	  if (c >= L_('0') && c <= L_('9'))
388 	    c -= L_('0');
389 #ifdef USE_NUMBER_GROUPING
390 # ifdef USE_WIDE_CHAR
391 	  else if (grouping && (wchar_t) c == thousands)
392 	    continue;
393 # else
394 	  else if (thousands_len)
395 	    {
396 	      for (cnt = 0; cnt < thousands_len; ++cnt)
397 		if (thousands[cnt] != s[cnt])
398 		  break;
399 	      if (cnt == thousands_len)
400 		{
401 		  s += thousands_len - 1;
402 		  continue;
403 		}
404 	      if (ISALPHA (c))
405 		c = TOUPPER (c) - L_('A') + 10;
406 	      else
407 		break;
408 	    }
409 # endif
410 #endif
411 	  else if (ISALPHA (c))
412 	    c = TOUPPER (c) - L_('A') + 10;
413 	  else
414 	    break;
415 	  if ((int) c >= base)
416 	    break;
417 	  /* Note that we never can have an overflow.  */
418 	  else if (j >= jmax)
419 	    {
420 	      /* We have an overflow.  Now use the long representation.  */
421 	      i = (unsigned LONG int) j;
422 	      goto use_long;
423 	    }
424 	  else
425 	    j = j * (unsigned long int) base + c;
426 	}
427 
428       i = (unsigned LONG int) j;
429     }
430   else
431     for (;c != L_('\0'); c = *++s)
432       {
433 	if (s == end)
434 	  break;
435 	if (c >= L_('0') && c <= L_('9'))
436 	  c -= L_('0');
437 #ifdef USE_NUMBER_GROUPING
438 # ifdef USE_WIDE_CHAR
439 	else if (grouping && (wchar_t) c == thousands)
440 	  continue;
441 # else
442 	else if (thousands_len)
443 	  {
444 	    for (cnt = 0; cnt < thousands_len; ++cnt)
445 	      if (thousands[cnt] != s[cnt])
446 		break;
447 	    if (cnt == thousands_len)
448 	      {
449 		s += thousands_len - 1;
450 		continue;
451 	      }
452 	    if (ISALPHA (c))
453 	      c = TOUPPER (c) - L_('A') + 10;
454 	    else
455 	      break;
456 	  }
457 # endif
458 #endif
459 	else if (ISALPHA (c))
460 	  c = TOUPPER (c) - L_('A') + 10;
461 	else
462 	  break;
463 	if ((int) c >= base)
464 	  break;
465 	/* Check for overflow.  */
466 	if (i > cutoff || (i == cutoff && c > cutlim))
467 	  overflow = 1;
468 	else
469 	  {
470 	  use_long:
471 	    i *= (unsigned LONG int) base;
472 	    i += c;
473 	  }
474       }
475 
476   /* Check if anything actually happened.  */
477   if (s == save)
478     goto noconv;
479 
480   /* Store in ENDPTR the address of one character
481      past the last character we converted.  */
482   if (endptr != NULL)
483     *endptr = (STRING_TYPE *) s;
484 
485 #if !UNSIGNED
486   /* Check for a value that is within the range of
487      `unsigned LONG int', but outside the range of `LONG int'.  */
488   if (overflow == 0
489       && i > (negative
490 	      ? -((unsigned LONG int) (STRTOL_LONG_MIN + 1)) + 1
491 	      : (unsigned LONG int) STRTOL_LONG_MAX))
492     overflow = 1;
493 #endif
494 
495   if (__glibc_unlikely (overflow))
496     {
497       __set_errno (ERANGE);
498 #if UNSIGNED
499       return STRTOL_ULONG_MAX;
500 #else
501       return negative ? STRTOL_LONG_MIN : STRTOL_LONG_MAX;
502 #endif
503     }
504 
505   /* Return the result of the appropriate sign.  */
506   return negative ? -i : i;
507 
508 noconv:
509   /* We must handle a special case here: the base is 0 or 16 and the
510      first two characters are '0' and 'x', but the rest are no
511      hexadecimal digits.  This is no error case.  We return 0 and
512      ENDPTR points to the `x`.  */
513   if (endptr != NULL)
514     {
515       if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X')
516 	  && save[-2] == L_('0'))
517 	*endptr = (STRING_TYPE *) &save[-1];
518       else
519 	/*  There was no number to convert.  */
520 	*endptr = (STRING_TYPE *) nptr;
521     }
522 
523   return 0L;
524 }
525 #if defined _LIBC && !defined USE_WIDE_CHAR
526 libc_hidden_def (INTERNAL (__strtol_l))
527 #endif
528 
529 /* External user entry point.  */
530 
531 #if _LIBC - 0 == 0
532 
533 /* Prototype.  */
534 extern INT __strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr,
535 		       int base);
536 #endif
537 
538 
539 INT
540 #ifdef weak_function
541 weak_function
542 #endif
__strtol_l(const STRING_TYPE * nptr,STRING_TYPE ** endptr,int base,locale_t loc)543 __strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr,
544 	    int base, locale_t loc)
545 {
546   return INTERNAL (__strtol_l) (nptr, endptr, base, 0, loc);
547 }
548 libc_hidden_def (__strtol_l)
549 weak_alias (__strtol_l, strtol_l)
550