1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12 /* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <https://www.gnu.org/licenses/>. */
32
33 /* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67 #include <math.h>
68 #include <math_private.h>
69 #include <math-underflow.h>
70 #include <libm-alias-finite.h>
71
72 static const long double bp[] = {
73 1.0L,
74 1.5L,
75 };
76
77 /* log_2(1.5) */
78 static const long double dp_h[] = {
79 0.0,
80 5.8496250072115607565592654282227158546448E-1L
81 };
82
83 /* Low part of log_2(1.5) */
84 static const long double dp_l[] = {
85 0.0,
86 1.0579781240112554492329533686862998106046E-16L
87 };
88
89 static const long double zero = 0.0L,
90 one = 1.0L,
91 two = 2.0L,
92 two113 = 1.0384593717069655257060992658440192E34L,
93 huge = 1.0e300L,
94 tiny = 1.0e-300L;
95
96 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
97 z = (x-1)/(x+1)
98 1 <= x <= 1.25
99 Peak relative error 2.3e-37 */
100 static const long double LN[] =
101 {
102 -3.0779177200290054398792536829702930623200E1L,
103 6.5135778082209159921251824580292116201640E1L,
104 -4.6312921812152436921591152809994014413540E1L,
105 1.2510208195629420304615674658258363295208E1L,
106 -9.9266909031921425609179910128531667336670E-1L
107 };
108 static const long double LD[] =
109 {
110 -5.129862866715009066465422805058933131960E1L,
111 1.452015077564081884387441590064272782044E2L,
112 -1.524043275549860505277434040464085593165E2L,
113 7.236063513651544224319663428634139768808E1L,
114 -1.494198912340228235853027849917095580053E1L
115 /* 1.0E0 */
116 };
117
118 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
119 0 <= x <= 0.5
120 Peak relative error 5.7e-38 */
121 static const long double PN[] =
122 {
123 5.081801691915377692446852383385968225675E8L,
124 9.360895299872484512023336636427675327355E6L,
125 4.213701282274196030811629773097579432957E4L,
126 5.201006511142748908655720086041570288182E1L,
127 9.088368420359444263703202925095675982530E-3L,
128 };
129 static const long double PD[] =
130 {
131 3.049081015149226615468111430031590411682E9L,
132 1.069833887183886839966085436512368982758E8L,
133 8.259257717868875207333991924545445705394E5L,
134 1.872583833284143212651746812884298360922E3L,
135 /* 1.0E0 */
136 };
137
138 static const long double
139 /* ln 2 */
140 lg2 = 6.9314718055994530941723212145817656807550E-1L,
141 lg2_h = 6.9314718055994528622676398299518041312695E-1L,
142 lg2_l = 2.3190468138462996154948554638754786504121E-17L,
143 ovt = 8.0085662595372944372e-0017L,
144 /* 2/(3*log(2)) */
145 cp = 9.6179669392597560490661645400126142495110E-1L,
146 cp_h = 9.6179669392597555432899980587535537779331E-1L,
147 cp_l = 5.0577616648125906047157785230014751039424E-17L;
148
149 long double
__ieee754_powl(long double x,long double y)150 __ieee754_powl (long double x, long double y)
151 {
152 long double z, ax, z_h, z_l, p_h, p_l;
153 long double y1, t1, t2, r, s, sgn, t, u, v, w;
154 long double s2, s_h, s_l, t_h, t_l, ay;
155 int32_t i, j, k, yisint, n;
156 uint32_t ix, iy;
157 int32_t hx, hy, hax;
158 double ohi, xhi, xlo, yhi, ylo;
159 uint32_t lx, ly, lj;
160
161 ldbl_unpack (x, &xhi, &xlo);
162 EXTRACT_WORDS (hx, lx, xhi);
163 ix = hx & 0x7fffffff;
164
165 ldbl_unpack (y, &yhi, &ylo);
166 EXTRACT_WORDS (hy, ly, yhi);
167 iy = hy & 0x7fffffff;
168
169 /* y==zero: x**0 = 1 */
170 if ((iy | ly) == 0 && !issignaling (x))
171 return one;
172
173 /* 1.0**y = 1; -1.0**+-Inf = 1 */
174 if (x == one && !issignaling (y))
175 return one;
176 if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
177 return one;
178
179 /* +-NaN return x+y */
180 if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
181 || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
182 return x + y;
183
184 /* determine if y is an odd int when x < 0
185 * yisint = 0 ... y is not an integer
186 * yisint = 1 ... y is an odd int
187 * yisint = 2 ... y is an even int
188 */
189 yisint = 0;
190 if (hx < 0)
191 {
192 uint32_t low_ye;
193
194 GET_HIGH_WORD (low_ye, ylo);
195 if ((low_ye & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */
196 yisint = 2; /* even integer y */
197 else if (iy >= 0x3ff00000) /* 1.0 */
198 {
199 if (floorl (y) == y)
200 {
201 z = 0.5 * y;
202 if (floorl (z) == z)
203 yisint = 2;
204 else
205 yisint = 1;
206 }
207 }
208 }
209
210 ax = fabsl (x);
211
212 /* special value of y */
213 if (ly == 0)
214 {
215 if (iy == 0x7ff00000) /* y is +-inf */
216 {
217 if (ax > one)
218 /* (|x|>1)**+-inf = inf,0 */
219 return (hy >= 0) ? y : zero;
220 else
221 /* (|x|<1)**-,+inf = inf,0 */
222 return (hy < 0) ? -y : zero;
223 }
224 if (ylo == 0.0)
225 {
226 if (iy == 0x3ff00000)
227 { /* y is +-1 */
228 if (hy < 0)
229 return one / x;
230 else
231 return x;
232 }
233 if (hy == 0x40000000)
234 return x * x; /* y is 2 */
235 if (hy == 0x3fe00000)
236 { /* y is 0.5 */
237 if (hx >= 0) /* x >= +0 */
238 return sqrtl (x);
239 }
240 }
241 }
242
243 /* special value of x */
244 if (lx == 0)
245 {
246 if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
247 {
248 z = ax; /*x is +-0,+-inf,+-1 */
249 if (hy < 0)
250 z = one / z; /* z = (1/|x|) */
251 if (hx < 0)
252 {
253 if (((ix - 0x3ff00000) | yisint) == 0)
254 {
255 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
256 }
257 else if (yisint == 1)
258 z = -z; /* (x<0)**odd = -(|x|**odd) */
259 }
260 return z;
261 }
262 }
263
264 /* (x<0)**(non-int) is NaN */
265 if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
266 return (x - x) / (x - x);
267
268 /* sgn (sign of result -ve**odd) = -1 else = 1 */
269 sgn = one;
270 if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
271 sgn = -one; /* (-ve)**(odd int) */
272
273 /* |y| is huge.
274 2^-16495 = 1/2 of smallest representable value.
275 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
276 if (iy > 0x41d654b0)
277 {
278 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
279 if (iy > 0x47d654b0)
280 {
281 if (ix <= 0x3fefffff)
282 return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
283 if (ix >= 0x3ff00000)
284 return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
285 }
286 /* over/underflow if x is not close to one */
287 if (ix < 0x3fefffff)
288 return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
289 if (ix > 0x3ff00000)
290 return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
291 }
292
293 ay = y > 0 ? y : -y;
294 if (ay < 0x1p-117)
295 y = y < 0 ? -0x1p-117 : 0x1p-117;
296
297 n = 0;
298 /* take care subnormal number */
299 if (ix < 0x00100000)
300 {
301 ax *= two113;
302 n -= 113;
303 ohi = ldbl_high (ax);
304 GET_HIGH_WORD (ix, ohi);
305 }
306 n += ((ix) >> 20) - 0x3ff;
307 j = ix & 0x000fffff;
308 /* determine interval */
309 ix = j | 0x3ff00000; /* normalize ix */
310 if (j <= 0x39880)
311 k = 0; /* |x|<sqrt(3/2) */
312 else if (j < 0xbb670)
313 k = 1; /* |x|<sqrt(3) */
314 else
315 {
316 k = 0;
317 n += 1;
318 ix -= 0x00100000;
319 }
320
321 ohi = ldbl_high (ax);
322 GET_HIGH_WORD (hax, ohi);
323 ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
324
325 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
326 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
327 v = one / (ax + bp[k]);
328 s = u * v;
329 s_h = ldbl_high (s);
330
331 /* t_h=ax+bp[k] High */
332 t_h = ax + bp[k];
333 t_h = ldbl_high (t_h);
334 t_l = ax - (t_h - bp[k]);
335 s_l = v * ((u - s_h * t_h) - s_h * t_l);
336 /* compute log(ax) */
337 s2 = s * s;
338 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
339 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
340 r = s2 * s2 * u / v;
341 r += s_l * (s_h + s);
342 s2 = s_h * s_h;
343 t_h = 3.0 + s2 + r;
344 t_h = ldbl_high (t_h);
345 t_l = r - ((t_h - 3.0) - s2);
346 /* u+v = s*(1+...) */
347 u = s_h * t_h;
348 v = s_l * t_h + t_l * s;
349 /* 2/(3log2)*(s+...) */
350 p_h = u + v;
351 p_h = ldbl_high (p_h);
352 p_l = v - (p_h - u);
353 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
354 z_l = cp_l * p_h + p_l * cp + dp_l[k];
355 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
356 t = (long double) n;
357 t1 = (((z_h + z_l) + dp_h[k]) + t);
358 t1 = ldbl_high (t1);
359 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
360
361 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
362 y1 = ldbl_high (y);
363 p_l = (y - y1) * t1 + y * t2;
364 p_h = y1 * t1;
365 z = p_l + p_h;
366 ohi = ldbl_high (z);
367 EXTRACT_WORDS (j, lj, ohi);
368 if (j >= 0x40d00000) /* z >= 16384 */
369 {
370 /* if z > 16384 */
371 if (((j - 0x40d00000) | lj) != 0)
372 return sgn * huge * huge; /* overflow */
373 else
374 {
375 if (p_l + ovt > z - p_h)
376 return sgn * huge * huge; /* overflow */
377 }
378 }
379 else if ((j & 0x7fffffff) >= 0x40d01b90) /* z <= -16495 */
380 {
381 /* z < -16495 */
382 if (((j - 0xc0d01bc0) | lj) != 0)
383 return sgn * tiny * tiny; /* underflow */
384 else
385 {
386 if (p_l <= z - p_h)
387 return sgn * tiny * tiny; /* underflow */
388 }
389 }
390 /* compute 2**(p_h+p_l) */
391 i = j & 0x7fffffff;
392 k = (i >> 20) - 0x3ff;
393 n = 0;
394 if (i > 0x3fe00000)
395 { /* if |z| > 0.5, set n = [z+0.5] */
396 n = floorl (z + 0.5L);
397 t = n;
398 p_h -= t;
399 }
400 t = p_l + p_h;
401 t = ldbl_high (t);
402 u = t * lg2_h;
403 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
404 z = u + v;
405 w = v - (z - u);
406 /* exp(z) */
407 t = z * z;
408 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
409 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
410 t1 = z - t * u / v;
411 r = (z * t1) / (t1 - two) - (w + z * w);
412 z = one - (r - z);
413 z = __scalbnl (sgn * z, n);
414 math_check_force_underflow (z);
415 return z;
416 }
417 libm_alias_finite (__ieee754_powl, __powl)
418