1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /* Expansions and modifications for 128-bit long double are
13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14    and are incorporated herein by permission of the author.  The author
15    reserves the right to distribute this material elsewhere under different
16    copying permissions.  These modifications are distributed here under
17    the following terms:
18 
19     This library is free software; you can redistribute it and/or
20     modify it under the terms of the GNU Lesser General Public
21     License as published by the Free Software Foundation; either
22     version 2.1 of the License, or (at your option) any later version.
23 
24     This library is distributed in the hope that it will be useful,
25     but WITHOUT ANY WARRANTY; without even the implied warranty of
26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27     Lesser General Public License for more details.
28 
29     You should have received a copy of the GNU Lesser General Public
30     License along with this library; if not, see
31     <https://www.gnu.org/licenses/>.  */
32 
33 /* __ieee754_powl(x,y) return x**y
34  *
35  *		      n
36  * Method:  Let x =  2   * (1+f)
37  *	1. Compute and return log2(x) in two pieces:
38  *		log2(x) = w1 + w2,
39  *	   where w1 has 113-53 = 60 bit trailing zeros.
40  *	2. Perform y*log2(x) = n+y' by simulating muti-precision
41  *	   arithmetic, where |y'|<=0.5.
42  *	3. Return x**y = 2**n*exp(y'*log2)
43  *
44  * Special cases:
45  *	1.  (anything) ** 0  is 1
46  *	2.  (anything) ** 1  is itself
47  *	3.  (anything) ** NAN is NAN
48  *	4.  NAN ** (anything except 0) is NAN
49  *	5.  +-(|x| > 1) **  +INF is +INF
50  *	6.  +-(|x| > 1) **  -INF is +0
51  *	7.  +-(|x| < 1) **  +INF is +0
52  *	8.  +-(|x| < 1) **  -INF is +INF
53  *	9.  +-1         ** +-INF is NAN
54  *	10. +0 ** (+anything except 0, NAN)               is +0
55  *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
56  *	12. +0 ** (-anything except 0, NAN)               is +INF
57  *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
58  *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59  *	15. +INF ** (+anything except 0,NAN) is +INF
60  *	16. +INF ** (-anything except 0,NAN) is +0
61  *	17. -INF ** (anything)  = -0 ** (-anything)
62  *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63  *	19. (-anything except 0 and inf) ** (non-integer) is NAN
64  *
65  */
66 
67 #include <math.h>
68 #include <math-barriers.h>
69 #include <math_private.h>
70 #include <libm-alias-finite.h>
71 
72 static const _Float128 bp[] = {
73   1,
74   L(1.5),
75 };
76 
77 /* log_2(1.5) */
78 static const _Float128 dp_h[] = {
79   0.0,
80   L(5.8496250072115607565592654282227158546448E-1)
81 };
82 
83 /* Low part of log_2(1.5) */
84 static const _Float128 dp_l[] = {
85   0.0,
86   L(1.0579781240112554492329533686862998106046E-16)
87 };
88 
89 static const _Float128 zero = 0,
90   one = 1,
91   two = 2,
92   two113 = L(1.0384593717069655257060992658440192E34),
93   huge = L(1.0e3000),
94   tiny = L(1.0e-3000);
95 
96 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
97    z = (x-1)/(x+1)
98    1 <= x <= 1.25
99    Peak relative error 2.3e-37 */
100 static const _Float128 LN[] =
101 {
102  L(-3.0779177200290054398792536829702930623200E1),
103   L(6.5135778082209159921251824580292116201640E1),
104  L(-4.6312921812152436921591152809994014413540E1),
105   L(1.2510208195629420304615674658258363295208E1),
106  L(-9.9266909031921425609179910128531667336670E-1)
107 };
108 static const _Float128 LD[] =
109 {
110  L(-5.129862866715009066465422805058933131960E1),
111   L(1.452015077564081884387441590064272782044E2),
112  L(-1.524043275549860505277434040464085593165E2),
113   L(7.236063513651544224319663428634139768808E1),
114  L(-1.494198912340228235853027849917095580053E1)
115   /* 1.0E0 */
116 };
117 
118 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
119    0 <= x <= 0.5
120    Peak relative error 5.7e-38  */
121 static const _Float128 PN[] =
122 {
123   L(5.081801691915377692446852383385968225675E8),
124   L(9.360895299872484512023336636427675327355E6),
125   L(4.213701282274196030811629773097579432957E4),
126   L(5.201006511142748908655720086041570288182E1),
127   L(9.088368420359444263703202925095675982530E-3),
128 };
129 static const _Float128 PD[] =
130 {
131   L(3.049081015149226615468111430031590411682E9),
132   L(1.069833887183886839966085436512368982758E8),
133   L(8.259257717868875207333991924545445705394E5),
134   L(1.872583833284143212651746812884298360922E3),
135   /* 1.0E0 */
136 };
137 
138 static const _Float128
139   /* ln 2 */
140   lg2 = L(6.9314718055994530941723212145817656807550E-1),
141   lg2_h = L(6.9314718055994528622676398299518041312695E-1),
142   lg2_l = L(2.3190468138462996154948554638754786504121E-17),
143   ovt = L(8.0085662595372944372e-0017),
144   /* 2/(3*log(2)) */
145   cp = L(9.6179669392597560490661645400126142495110E-1),
146   cp_h = L(9.6179669392597555432899980587535537779331E-1),
147   cp_l = L(5.0577616648125906047157785230014751039424E-17);
148 
149 _Float128
__ieee754_powl(_Float128 x,_Float128 y)150 __ieee754_powl (_Float128 x, _Float128 y)
151 {
152   _Float128 z, ax, z_h, z_l, p_h, p_l;
153   _Float128 y1, t1, t2, r, s, sgn, t, u, v, w;
154   _Float128 s2, s_h, s_l, t_h, t_l, ay;
155   int32_t i, j, k, yisint, n;
156   uint32_t ix, iy;
157   int32_t hx, hy;
158   ieee854_long_double_shape_type o, p, q;
159 
160   p.value = x;
161   hx = p.parts32.w0;
162   ix = hx & 0x7fffffff;
163 
164   q.value = y;
165   hy = q.parts32.w0;
166   iy = hy & 0x7fffffff;
167 
168 
169   /* y==zero: x**0 = 1 */
170   if ((iy | q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0
171       && !issignaling (x))
172     return one;
173 
174   /* 1.0**y = 1; -1.0**+-Inf = 1 */
175   if (x == one && !issignaling (y))
176     return one;
177   if (x == -1 && iy == 0x7fff0000
178       && (q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
179     return one;
180 
181   /* +-NaN return x+y */
182   if ((ix > 0x7fff0000)
183       || ((ix == 0x7fff0000)
184 	  && ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) != 0))
185       || (iy > 0x7fff0000)
186       || ((iy == 0x7fff0000)
187 	  && ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) != 0)))
188     return x + y;
189 
190   /* determine if y is an odd int when x < 0
191    * yisint = 0       ... y is not an integer
192    * yisint = 1       ... y is an odd int
193    * yisint = 2       ... y is an even int
194    */
195   yisint = 0;
196   if (hx < 0)
197     {
198       if (iy >= 0x40700000)	/* 2^113 */
199 	yisint = 2;		/* even integer y */
200       else if (iy >= 0x3fff0000)	/* 1.0 */
201 	{
202 	  if (floorl (y) == y)
203 	    {
204 	      z = 0.5 * y;
205 	      if (floorl (z) == z)
206 		yisint = 2;
207 	      else
208 		yisint = 1;
209 	    }
210 	}
211     }
212 
213   /* special value of y */
214   if ((q.parts32.w1 | q.parts32.w2 | q.parts32.w3) == 0)
215     {
216       if (iy == 0x7fff0000)	/* y is +-inf */
217 	{
218 	  if (((ix - 0x3fff0000) | p.parts32.w1 | p.parts32.w2 | p.parts32.w3)
219 	      == 0)
220 	    return y - y;	/* +-1**inf is NaN */
221 	  else if (ix >= 0x3fff0000)	/* (|x|>1)**+-inf = inf,0 */
222 	    return (hy >= 0) ? y : zero;
223 	  else			/* (|x|<1)**-,+inf = inf,0 */
224 	    return (hy < 0) ? -y : zero;
225 	}
226       if (iy == 0x3fff0000)
227 	{			/* y is  +-1 */
228 	  if (hy < 0)
229 	    return one / x;
230 	  else
231 	    return x;
232 	}
233       if (hy == 0x40000000)
234 	return x * x;		/* y is  2 */
235       if (hy == 0x3ffe0000)
236 	{			/* y is  0.5 */
237 	  if (hx >= 0)		/* x >= +0 */
238 	    return sqrtl (x);
239 	}
240     }
241 
242   ax = fabsl (x);
243   /* special value of x */
244   if ((p.parts32.w1 | p.parts32.w2 | p.parts32.w3) == 0)
245     {
246       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
247 	{
248 	  z = ax;		/*x is +-0,+-inf,+-1 */
249 	  if (hy < 0)
250 	    z = one / z;	/* z = (1/|x|) */
251 	  if (hx < 0)
252 	    {
253 	      if (((ix - 0x3fff0000) | yisint) == 0)
254 		{
255 		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
256 		}
257 	      else if (yisint == 1)
258 		z = -z;		/* (x<0)**odd = -(|x|**odd) */
259 	    }
260 	  return z;
261 	}
262     }
263 
264   /* (x<0)**(non-int) is NaN */
265   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
266     return (x - x) / (x - x);
267 
268   /* sgn (sign of result -ve**odd) = -1 else = 1 */
269   sgn = one;
270   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
271     sgn = -one;			/* (-ve)**(odd int) */
272 
273   /* |y| is huge.
274      2^-16495 = 1/2 of smallest representable value.
275      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
276   if (iy > 0x401d654b)
277     {
278       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
279       if (iy > 0x407d654b)
280 	{
281 	  if (ix <= 0x3ffeffff)
282 	    return (hy < 0) ? huge * huge : tiny * tiny;
283 	  if (ix >= 0x3fff0000)
284 	    return (hy > 0) ? huge * huge : tiny * tiny;
285 	}
286       /* over/underflow if x is not close to one */
287       if (ix < 0x3ffeffff)
288 	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
289       if (ix > 0x3fff0000)
290 	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
291     }
292 
293   ay = y > 0 ? y : -y;
294   if (ay < 0x1p-128)
295     y = y < 0 ? -0x1p-128 : 0x1p-128;
296 
297   n = 0;
298   /* take care subnormal number */
299   if (ix < 0x00010000)
300     {
301       ax *= two113;
302       n -= 113;
303       o.value = ax;
304       ix = o.parts32.w0;
305     }
306   n += ((ix) >> 16) - 0x3fff;
307   j = ix & 0x0000ffff;
308   /* determine interval */
309   ix = j | 0x3fff0000;		/* normalize ix */
310   if (j <= 0x3988)
311     k = 0;			/* |x|<sqrt(3/2) */
312   else if (j < 0xbb67)
313     k = 1;			/* |x|<sqrt(3)   */
314   else
315     {
316       k = 0;
317       n += 1;
318       ix -= 0x00010000;
319     }
320 
321   o.value = ax;
322   o.parts32.w0 = ix;
323   ax = o.value;
324 
325   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
326   u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
327   v = one / (ax + bp[k]);
328   s = u * v;
329   s_h = s;
330 
331   o.value = s_h;
332   o.parts32.w3 = 0;
333   o.parts32.w2 &= 0xf8000000;
334   s_h = o.value;
335   /* t_h=ax+bp[k] High */
336   t_h = ax + bp[k];
337   o.value = t_h;
338   o.parts32.w3 = 0;
339   o.parts32.w2 &= 0xf8000000;
340   t_h = o.value;
341   t_l = ax - (t_h - bp[k]);
342   s_l = v * ((u - s_h * t_h) - s_h * t_l);
343   /* compute log(ax) */
344   s2 = s * s;
345   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
346   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
347   r = s2 * s2 * u / v;
348   r += s_l * (s_h + s);
349   s2 = s_h * s_h;
350   t_h = 3.0 + s2 + r;
351   o.value = t_h;
352   o.parts32.w3 = 0;
353   o.parts32.w2 &= 0xf8000000;
354   t_h = o.value;
355   t_l = r - ((t_h - 3.0) - s2);
356   /* u+v = s*(1+...) */
357   u = s_h * t_h;
358   v = s_l * t_h + t_l * s;
359   /* 2/(3log2)*(s+...) */
360   p_h = u + v;
361   o.value = p_h;
362   o.parts32.w3 = 0;
363   o.parts32.w2 &= 0xf8000000;
364   p_h = o.value;
365   p_l = v - (p_h - u);
366   z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
367   z_l = cp_l * p_h + p_l * cp + dp_l[k];
368   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
369   t = (_Float128) n;
370   t1 = (((z_h + z_l) + dp_h[k]) + t);
371   o.value = t1;
372   o.parts32.w3 = 0;
373   o.parts32.w2 &= 0xf8000000;
374   t1 = o.value;
375   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
376 
377   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
378   y1 = y;
379   o.value = y1;
380   o.parts32.w3 = 0;
381   o.parts32.w2 &= 0xf8000000;
382   y1 = o.value;
383   p_l = (y - y1) * t1 + y * t2;
384   p_h = y1 * t1;
385   z = p_l + p_h;
386   o.value = z;
387   j = o.parts32.w0;
388   if (j >= 0x400d0000) /* z >= 16384 */
389     {
390       /* if z > 16384 */
391       if (((j - 0x400d0000) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3) != 0)
392 	return sgn * huge * huge;	/* overflow */
393       else
394 	{
395 	  if (p_l + ovt > z - p_h)
396 	    return sgn * huge * huge;	/* overflow */
397 	}
398     }
399   else if ((j & 0x7fffffff) >= 0x400d01b9)	/* z <= -16495 */
400     {
401       /* z < -16495 */
402       if (((j - 0xc00d01bc) | o.parts32.w1 | o.parts32.w2 | o.parts32.w3)
403 	  != 0)
404 	return sgn * tiny * tiny;	/* underflow */
405       else
406 	{
407 	  if (p_l <= z - p_h)
408 	    return sgn * tiny * tiny;	/* underflow */
409 	}
410     }
411   /* compute 2**(p_h+p_l) */
412   i = j & 0x7fffffff;
413   k = (i >> 16) - 0x3fff;
414   n = 0;
415   if (i > 0x3ffe0000)
416     {				/* if |z| > 0.5, set n = [z+0.5] */
417       n = floorl (z + L(0.5));
418       t = n;
419       p_h -= t;
420     }
421   t = p_l + p_h;
422   o.value = t;
423   o.parts32.w3 = 0;
424   o.parts32.w2 &= 0xf8000000;
425   t = o.value;
426   u = t * lg2_h;
427   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
428   z = u + v;
429   w = v - (z - u);
430   /*  exp(z) */
431   t = z * z;
432   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
433   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
434   t1 = z - t * u / v;
435   r = (z * t1) / (t1 - two) - (w + z * w);
436   z = one - (r - z);
437   o.value = z;
438   j = o.parts32.w0;
439   j += (n << 16);
440   if ((j >> 16) <= 0)
441     {
442       z = __scalbnl (z, n);	/* subnormal output */
443       _Float128 force_underflow = z * z;
444       math_force_eval (force_underflow);
445     }
446   else
447     {
448       o.parts32.w0 = j;
449       z = o.value;
450     }
451   return sgn * z;
452 }
453 libm_alias_finite (__ieee754_powl, __powl)
454