1 /* Quad-precision floating point e^x.
2    Copyright (C) 1999-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 /* The basic design here is from
20    Abraham Ziv, "Fast Evaluation of Elementary Mathematical Functions with
21    Correctly Rounded Last Bit", ACM Trans. Math. Soft., 17 (3), September 1991,
22    pp. 410-423.
23 
24    We work with number pairs where the first number is the high part and
25    the second one is the low part. Arithmetic with the high part numbers must
26    be exact, without any roundoff errors.
27 
28    The input value, X, is written as
29    X = n * ln(2)_0 + arg1[t1]_0 + arg2[t2]_0 + x
30        - n * ln(2)_1 + arg1[t1]_1 + arg2[t2]_1 + xl
31 
32    where:
33    - n is an integer, 16384 >= n >= -16495;
34    - ln(2)_0 is the first 93 bits of ln(2), and |ln(2)_0-ln(2)-ln(2)_1| < 2^-205
35    - t1 is an integer, 89 >= t1 >= -89
36    - t2 is an integer, 65 >= t2 >= -65
37    - |arg1[t1]-t1/256.0| < 2^-53
38    - |arg2[t2]-t2/32768.0| < 2^-53
39    - x + xl is whatever is left, |x + xl| < 2^-16 + 2^-53
40 
41    Then e^x is approximated as
42 
43    e^x = 2^n_1 ( 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
44 	       + 2^n_0 e^(arg1[t1]_0 + arg1[t1]_1) e^(arg2[t2]_0 + arg2[t2]_1)
45 		 * p (x + xl + n * ln(2)_1))
46    where:
47    - p(x) is a polynomial approximating e(x)-1
48    - e^(arg1[t1]_0 + arg1[t1]_1) is obtained from a table
49    - e^(arg2[t2]_0 + arg2[t2]_1) likewise
50    - n_1 + n_0 = n, so that |n_0| < -LDBL_MIN_EXP-1.
51 
52    If it happens that n_1 == 0 (this is the usual case), that multiplication
53    is omitted.
54    */
55 
56 #ifndef _GNU_SOURCE
57 #define _GNU_SOURCE
58 #endif
59 #include <float.h>
60 #include <ieee754.h>
61 #include <math.h>
62 #include <fenv.h>
63 #include <inttypes.h>
64 #include <math_private.h>
65 #include <fenv_private.h>
66 #include <libm-alias-finite.h>
67 
68 #include "t_expl.h"
69 
70 static const long double C[] = {
71 /* Smallest integer x for which e^x overflows.  */
72 #define himark C[0]
73  709.78271289338399678773454114191496482L,
74 
75 /* Largest integer x for which e^x underflows.  */
76 #define lomark C[1]
77 -744.44007192138126231410729844608163411L,
78 
79 /* 3x2^96 */
80 #define THREEp96 C[2]
81  59421121885698253195157962752.0L,
82 
83 /* 3x2^103 */
84 #define THREEp103 C[3]
85  30423614405477505635920876929024.0L,
86 
87 /* 3x2^111 */
88 #define THREEp111 C[4]
89  7788445287802241442795744493830144.0L,
90 
91 /* 1/ln(2) */
92 #define M_1_LN2 C[5]
93  1.44269504088896340735992468100189204L,
94 
95 /* first 93 bits of ln(2) */
96 #define M_LN2_0 C[6]
97  0.693147180559945309417232121457981864L,
98 
99 /* ln2_0 - ln(2) */
100 #define M_LN2_1 C[7]
101 -1.94704509238074995158795957333327386E-31L,
102 
103 /* very small number */
104 #define TINY C[8]
105  1.0e-308L,
106 
107 /* 2^16383 */
108 #define TWO1023 C[9]
109  8.988465674311579538646525953945123668E+307L,
110 
111 /* 256 */
112 #define TWO8 C[10]
113  256.0L,
114 
115 /* 32768 */
116 #define TWO15 C[11]
117  32768.0L,
118 
119 /* Chebyshev polynom coefficients for (exp(x)-1)/x */
120 #define P1 C[12]
121 #define P2 C[13]
122 #define P3 C[14]
123 #define P4 C[15]
124 #define P5 C[16]
125 #define P6 C[17]
126  0.5L,
127  1.66666666666666666666666666666666683E-01L,
128  4.16666666666666666666654902320001674E-02L,
129  8.33333333333333333333314659767198461E-03L,
130  1.38888888889899438565058018857254025E-03L,
131  1.98412698413981650382436541785404286E-04L,
132 };
133 
134 /* Avoid local PLT entry use from (int) roundl (...) being converted
135    to a call to lroundl in the case of 32-bit long and roundl not
136    inlined.  */
137 long int lroundl (long double) asm ("__lroundl");
138 
139 long double
__ieee754_expl(long double x)140 __ieee754_expl (long double x)
141 {
142   long double result, x22;
143   union ibm_extended_long_double ex2_u, scale_u;
144   int unsafe;
145 
146   /* Check for usual case.  */
147   if (isless (x, himark) && isgreater (x, lomark))
148     {
149       int tval1, tval2, n_i, exponent2;
150       long double n, xl;
151 
152       SET_RESTORE_ROUND (FE_TONEAREST);
153 
154       n = roundl (x*M_1_LN2);
155       x = x-n*M_LN2_0;
156       xl = n*M_LN2_1;
157 
158       tval1 = roundl (x*TWO8);
159       x -= __expl_table[T_EXPL_ARG1+2*tval1];
160       xl -= __expl_table[T_EXPL_ARG1+2*tval1+1];
161 
162       tval2 = roundl (x*TWO15);
163       x -= __expl_table[T_EXPL_ARG2+2*tval2];
164       xl -= __expl_table[T_EXPL_ARG2+2*tval2+1];
165 
166       x = x + xl;
167 
168       /* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]).  */
169       ex2_u.ld = (__expl_table[T_EXPL_RES1 + tval1]
170 		  * __expl_table[T_EXPL_RES2 + tval2]);
171       n_i = (int)n;
172       /* 'unsafe' is 1 iff n_1 != 0.  */
173       unsafe = fabsl(n_i) >= -LDBL_MIN_EXP - 1;
174       ex2_u.d[0].ieee.exponent += n_i >> unsafe;
175       /* Fortunately, there are no subnormal lowpart doubles in
176 	 __expl_table, only normal values and zeros.
177 	 But after scaling it can be subnormal.  */
178       exponent2 = ex2_u.d[1].ieee.exponent + (n_i >> unsafe);
179       if (ex2_u.d[1].ieee.exponent == 0)
180 	/* assert ((ex2_u.d[1].ieee.mantissa0|ex2_u.d[1].ieee.mantissa1) == 0) */;
181       else if (exponent2 > 0)
182 	ex2_u.d[1].ieee.exponent = exponent2;
183       else if (exponent2 <= -54)
184 	{
185 	  ex2_u.d[1].ieee.exponent = 0;
186 	  ex2_u.d[1].ieee.mantissa0 = 0;
187 	  ex2_u.d[1].ieee.mantissa1 = 0;
188 	}
189       else
190 	{
191 	  static const double
192 	    two54 = 1.80143985094819840000e+16, /* 4350000000000000 */
193 	    twom54 = 5.55111512312578270212e-17; /* 3C90000000000000 */
194 	  ex2_u.d[1].d *= two54;
195 	  ex2_u.d[1].ieee.exponent += n_i >> unsafe;
196 	  ex2_u.d[1].d *= twom54;
197 	}
198 
199       /* Compute scale = 2^n_1.  */
200       scale_u.ld = 1.0L;
201       scale_u.d[0].ieee.exponent += n_i - (n_i >> unsafe);
202 
203       /* Approximate e^x2 - 1, using a seventh-degree polynomial,
204 	 with maximum error in [-2^-16-2^-53,2^-16+2^-53]
205 	 less than 4.8e-39.  */
206       x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
207 
208       /* Now we can test whether the result is ultimate or if we are unsure.
209 	 In the later case we should probably call a mpn based routine to give
210 	 the ultimate result.
211 	 Empirically, this routine is already ultimate in about 99.9986% of
212 	 cases, the test below for the round to nearest case will be false
213 	 in ~ 99.9963% of cases.
214 	 Without proc2 routine maximum error which has been seen is
215 	 0.5000262 ulp.
216 
217 	  union ieee854_long_double ex3_u;
218 
219 	  #ifdef FE_TONEAREST
220 	    fesetround (FE_TONEAREST);
221 	  #endif
222 	  ex3_u.d = (result - ex2_u.d) - x22 * ex2_u.d;
223 	  ex2_u.d = result;
224 	  ex3_u.ieee.exponent += LDBL_MANT_DIG + 15 + IEEE854_LONG_DOUBLE_BIAS
225 				 - ex2_u.ieee.exponent;
226 	  n_i = abs (ex3_u.d);
227 	  n_i = (n_i + 1) / 2;
228 	  fesetenv (&oldenv);
229 	  #ifdef FE_TONEAREST
230 	  if (fegetround () == FE_TONEAREST)
231 	    n_i -= 0x4000;
232 	  #endif
233 	  if (!n_i) {
234 	    return __ieee754_expl_proc2 (origx);
235 	  }
236        */
237     }
238   /* Exceptional cases:  */
239   else if (isless (x, himark))
240     {
241       if (isinf (x))
242 	/* e^-inf == 0, with no error.  */
243 	return 0;
244       else
245 	/* Underflow */
246 	return TINY * TINY;
247     }
248   else
249     /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
250     return TWO1023*x;
251 
252   result = x22 * ex2_u.ld + ex2_u.ld;
253   if (!unsafe)
254     return result;
255   return result * scale_u.ld;
256 }
257 libm_alias_finite (__ieee754_expl, __expl)
258