1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12 /* Expansions and modifications for 128-bit long double are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <https://www.gnu.org/licenses/>. */
32
33 /* __ieee754_powl(x,y) return x**y
34 *
35 * n
36 * Method: Let x = 2 * (1+f)
37 * 1. Compute and return log2(x) in two pieces:
38 * log2(x) = w1 + w2,
39 * where w1 has 113-53 = 60 bit trailing zeros.
40 * 2. Perform y*log2(x) = n+y' by simulating muti-precision
41 * arithmetic, where |y'|<=0.5.
42 * 3. Return x**y = 2**n*exp(y'*log2)
43 *
44 * Special cases:
45 * 1. (anything) ** 0 is 1
46 * 2. (anything) ** 1 is itself
47 * 3. (anything) ** NAN is NAN
48 * 4. NAN ** (anything except 0) is NAN
49 * 5. +-(|x| > 1) ** +INF is +INF
50 * 6. +-(|x| > 1) ** -INF is +0
51 * 7. +-(|x| < 1) ** +INF is +0
52 * 8. +-(|x| < 1) ** -INF is +INF
53 * 9. +-1 ** +-INF is NAN
54 * 10. +0 ** (+anything except 0, NAN) is +0
55 * 11. -0 ** (+anything except 0, NAN, odd integer) is +0
56 * 12. +0 ** (-anything except 0, NAN) is +INF
57 * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF
58 * 14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59 * 15. +INF ** (+anything except 0,NAN) is +INF
60 * 16. +INF ** (-anything except 0,NAN) is +0
61 * 17. -INF ** (anything) = -0 ** (-anything)
62 * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63 * 19. (-anything except 0 and inf) ** (non-integer) is NAN
64 *
65 */
66
67 #include <math.h>
68 #include <math-barriers.h>
69 #include <math_private.h>
70 #include <libm-alias-finite.h>
71
72 static const _Float128 bp[] = {
73 1,
74 L(1.5),
75 };
76
77 /* log_2(1.5) */
78 static const _Float128 dp_h[] = {
79 0.0,
80 L(5.8496250072115607565592654282227158546448E-1)
81 };
82
83 /* Low part of log_2(1.5) */
84 static const _Float128 dp_l[] = {
85 0.0,
86 L(1.0579781240112554492329533686862998106046E-16)
87 };
88
89 static const _Float128 zero = 0,
90 one = 1,
91 two = 2,
92 two113 = L(1.0384593717069655257060992658440192E34),
93 huge = L(1.0e3000),
94 tiny = L(1.0e-3000);
95
96 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
97 z = (x-1)/(x+1)
98 1 <= x <= 1.25
99 Peak relative error 2.3e-37 */
100 static const _Float128 LN[] =
101 {
102 L(-3.0779177200290054398792536829702930623200E1),
103 L(6.5135778082209159921251824580292116201640E1),
104 L(-4.6312921812152436921591152809994014413540E1),
105 L(1.2510208195629420304615674658258363295208E1),
106 L(-9.9266909031921425609179910128531667336670E-1)
107 };
108 static const _Float128 LD[] =
109 {
110 L(-5.129862866715009066465422805058933131960E1),
111 L(1.452015077564081884387441590064272782044E2),
112 L(-1.524043275549860505277434040464085593165E2),
113 L(7.236063513651544224319663428634139768808E1),
114 L(-1.494198912340228235853027849917095580053E1)
115 /* 1.0E0 */
116 };
117
118 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
119 0 <= x <= 0.5
120 Peak relative error 5.7e-38 */
121 static const _Float128 PN[] =
122 {
123 L(5.081801691915377692446852383385968225675E8),
124 L(9.360895299872484512023336636427675327355E6),
125 L(4.213701282274196030811629773097579432957E4),
126 L(5.201006511142748908655720086041570288182E1),
127 L(9.088368420359444263703202925095675982530E-3),
128 };
129 static const _Float128 PD[] =
130 {
131 L(3.049081015149226615468111430031590411682E9),
132 L(1.069833887183886839966085436512368982758E8),
133 L(8.259257717868875207333991924545445705394E5),
134 L(1.872583833284143212651746812884298360922E3),
135 /* 1.0E0 */
136 };
137
138 static const _Float128
139 /* ln 2 */
140 lg2 = L(6.9314718055994530941723212145817656807550E-1),
141 lg2_h = L(6.9314718055994528622676398299518041312695E-1),
142 lg2_l = L(2.3190468138462996154948554638754786504121E-17),
143 ovt = L(8.0085662595372944372e-0017),
144 /* 2/(3*log(2)) */
145 cp = L(9.6179669392597560490661645400126142495110E-1),
146 cp_h = L(9.6179669392597555432899980587535537779331E-1),
147 cp_l = L(5.0577616648125906047157785230014751039424E-17);
148
149 _Float128
__ieee754_powl(_Float128 x,_Float128 y)150 __ieee754_powl (_Float128 x, _Float128 y)
151 {
152 _Float128 z, ax, z_h, z_l, p_h, p_l;
153 _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
154 _Float128 s2, s_h, s_l, t_h, t_l, ay;
155 int32_t i, j, k, yisint, n;
156 uint32_t ix, iy;
157 int32_t hx, hy;
158 ieee854_long_double_shape_type o, p, q;
159
160 p.value = x;
161 hx = p.parts32.w0;
162 ix = hx & 0x7fffffff;
163
164 q.value = y;
165 hy = q.parts32.w0;
166 iy = hy & 0x7fffffff;
167
168
169 /* y==zero: x**0 = 1 */
170 if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0
171 && !issignaling (x))
172 return one;
173
174 /* 1.0**y = 1; -1.0**+-Inf = 1 */
175 if (x == one && !issignaling (y))
176 return one;
177 if (x == -1 && iy == 0x7fff0000
178 && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
179 return one;
180
181 /* +-NaN return x+y */
182 if ((ix > 0x7fff0000)
183 || ((ix == 0x7fff0000)
184 && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
185 || (iy > 0x7fff0000)
186 || ((iy == 0x7fff0000)
187 && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
188 return x + y;
189
190 /* determine if y is an odd int when x < 0
191 * yisint = 0 ... y is not an integer
192 * yisint = 1 ... y is an odd int
193 * yisint = 2 ... y is an even int
194 */
195 yisint = 0;
196 if (hx < 0)
197 {
198 if (iy >= 0x40700000) /* 2^113 */
199 yisint = 2; /* even integer y */
200 else if (iy >= 0x3fff0000) /* 1.0 */
201 {
202 if (floorl (y) == y)
203 {
204 z = 0.5 * y;
205 if (floorl (z) == z)
206 yisint = 2;
207 else
208 yisint = 1;
209 }
210 }
211 }
212
213 /* special value of y */
214 if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
215 {
216 if (iy == 0x7fff0000) /* y is +-inf */
217 {
218 if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
219 == 0)
220 return y - y; /* +-1**inf is NaN */
221 else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */
222 return (hy >= 0) ? y : zero;
223 else /* (|x|<1)**-,+inf = inf,0 */
224 return (hy < 0) ? -y : zero;
225 }
226 if (iy == 0x3fff0000)
227 { /* y is +-1 */
228 if (hy < 0)
229 return one / x;
230 else
231 return x;
232 }
233 if (hy == 0x40000000)
234 return x * x; /* y is 2 */
235 if (hy == 0x3ffe0000)
236 { /* y is 0.5 */
237 if (hx >= 0) /* x >= +0 */
238 return sqrtl (x);
239 }
240 }
241
242 ax = fabsl (x);
243 /* special value of x */
244 if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
245 {
246 if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
247 {
248 z = ax; /*x is +-0,+-inf,+-1 */
249 if (hy < 0)
250 z = one / z; /* z = (1/|x|) */
251 if (hx < 0)
252 {
253 if (((ix - 0x3fff0000) | yisint) == 0)
254 {
255 z = (z - z) / (z - z); /* (-1)**non-int is NaN */
256 }
257 else if (yisint == 1)
258 z = -z; /* (x<0)**odd = -(|x|**odd) */
259 }
260 return z;
261 }
262 }
263
264 /* (x<0)**(non-int) is NaN */
265 if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
266 return (x - x) / (x - x);
267
268 /* sgn (sign of result -ve**odd) = -1 else = 1 */
269 sgn = one;
270 if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
271 sgn = -one; /* (-ve)**(odd int) */
272
273 /* |y| is huge.
274 2^-16495 = 1/2 of smallest representable value.
275 If (1 - 1/131072)^y underflows, y > 1.4986e9 */
276 if (iy > 0x401d654b)
277 {
278 /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
279 if (iy > 0x407d654b)
280 {
281 if (ix <= 0x3ffeffff)
282 return (hy < 0) ? huge * huge : tiny * tiny;
283 if (ix >= 0x3fff0000)
284 return (hy > 0) ? huge * huge : tiny * tiny;
285 }
286 /* over/underflow if x is not close to one */
287 if (ix < 0x3ffeffff)
288 return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
289 if (ix > 0x3fff0000)
290 return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
291 }
292
293 ay = y > 0 ? y : -y;
294 if (ay < 0x1p-128)
295 y = y < 0 ? -0x1p-128 : 0x1p-128;
296
297 n = 0;
298 /* take care subnormal number */
299 if (ix < 0x00010000)
300 {
301 ax *= two113;
302 n -= 113;
303 o.value = ax;
304 ix = o.parts32.w0;
305 }
306 n += ((ix) >> 16) - 0x3fff;
307 j = ix & 0x0000ffff;
308 /* determine interval */
309 ix = j | 0x3fff0000; /* normalize ix */
310 if (j <= 0x3988)
311 k = 0; /* |x|<sqrt(3/2) */
312 else if (j < 0xbb67)
313 k = 1; /* |x|<sqrt(3) */
314 else
315 {
316 k = 0;
317 n += 1;
318 ix -= 0x00010000;
319 }
320
321 o.value = ax;
322 o.parts32.w0 = ix;
323 ax = o.value;
324
325 /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
326 u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
327 v = one / (ax + bp[k]);
328 s = u * v;
329 s_h = s;
330
331 o.value = s_h;
332 o.parts32.w3 = 0;
333 o.parts32.w2 &= 0xf8000000;
334 s_h = o.value;
335 /* t_h=ax+bp[k] High */
336 t_h = ax + bp[k];
337 o.value = t_h;
338 o.parts32.w3 = 0;
339 o.parts32.w2 &= 0xf8000000;
340 t_h = o.value;
341 t_l = ax - (t_h - bp[k]);
342 s_l = v * ((u - s_h * t_h) - s_h * t_l);
343 /* compute log(ax) */
344 s2 = s * s;
345 u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
346 v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
347 r = s2 * s2 * u / v;
348 r += s_l * (s_h + s);
349 s2 = s_h * s_h;
350 t_h = 3.0 + s2 + r;
351 o.value = t_h;
352 o.parts32.w3 = 0;
353 o.parts32.w2 &= 0xf8000000;
354 t_h = o.value;
355 t_l = r - ((t_h - 3.0) - s2);
356 /* u+v = s*(1+...) */
357 u = s_h * t_h;
358 v = s_l * t_h + t_l * s;
359 /* 2/(3log2)*(s+...) */
360 p_h = u + v;
361 o.value = p_h;
362 o.parts32.w3 = 0;
363 o.parts32.w2 &= 0xf8000000;
364 p_h = o.value;
365 p_l = v - (p_h - u);
366 z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
367 z_l = cp_l * p_h + p_l * cp + dp_l[k];
368 /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
369 t = (_Float128) n;
370 t1 = (((z_h + z_l) + dp_h[k]) + t);
371 o.value = t1;
372 o.parts32.w3 = 0;
373 o.parts32.w2 &= 0xf8000000;
374 t1 = o.value;
375 t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
376
377 /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
378 y1 = y;
379 o.value = y1;
380 o.parts32.w3 = 0;
381 o.parts32.w2 &= 0xf8000000;
382 y1 = o.value;
383 p_l = (y - y1) * t1 + y * t2;
384 p_h = y1 * t1;
385 z = p_l + p_h;
386 o.value = z;
387 j = o.parts32.w0;
388 if (j >= 0x400d0000) /* z >= 16384 */
389 {
390 /* if z > 16384 */
391 if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
392 return sgn * huge * huge; /* overflow */
393 else
394 {
395 if (p_l + ovt > z - p_h)
396 return sgn * huge * huge; /* overflow */
397 }
398 }
399 else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */
400 {
401 /* z < -16495 */
402 if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
403 != 0)
404 return sgn * tiny * tiny; /* underflow */
405 else
406 {
407 if (p_l <= z - p_h)
408 return sgn * tiny * tiny; /* underflow */
409 }
410 }
411 /* compute 2**(p_h+p_l) */
412 i = j & 0x7fffffff;
413 k = (i >> 16) - 0x3fff;
414 n = 0;
415 if (i > 0x3ffe0000)
416 { /* if |z| > 0.5, set n = [z+0.5] */
417 n = floorl (z + L(0.5));
418 t = n;
419 p_h -= t;
420 }
421 t = p_l + p_h;
422 o.value = t;
423 o.parts32.w3 = 0;
424 o.parts32.w2 &= 0xf8000000;
425 t = o.value;
426 u = t * lg2_h;
427 v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
428 z = u + v;
429 w = v - (z - u);
430 /* exp(z) */
431 t = z * z;
432 u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
433 v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
434 t1 = z - t * u / v;
435 r = (z * t1) / (t1 - two) - (w + z * w);
436 z = one - (r - z);
437 o.value = z;
438 j = o.parts32.w0;
439 j += (n << 16);
440 if ((j >> 16) <= 0)
441 {
442 z = __scalbnl (z, n); /* subnormal output */
443 _Float128 force_underflow = z * z;
444 math_force_eval (force_underflow);
445 }
446 else
447 {
448 o.parts32.w0 = j;
449 z = o.value;
450 }
451 return sgn * z;
452 }
453 libm_alias_finite (__ieee754_powl, __powl)
454