1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /* Expansions and modifications for 128-bit long double are
13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14    and are incorporated herein by permission of the author.  The author
15    reserves the right to distribute this material elsewhere under different
16    copying permissions.  These modifications are distributed here under
17    the following terms:
18 
19     This library is free software; you can redistribute it and/or
20     modify it under the terms of the GNU Lesser General Public
21     License as published by the Free Software Foundation; either
22     version 2.1 of the License, or (at your option) any later version.
23 
24     This library is distributed in the hope that it will be useful,
25     but WITHOUT ANY WARRANTY; without even the implied warranty of
26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27     Lesser General Public License for more details.
28 
29     You should have received a copy of the GNU Lesser General Public
30     License along with this library; if not, see
31     <https://www.gnu.org/licenses/>.  */
32 
33 /* __ieee754_powl(x,y) return x**y
34  *
35  *		      n
36  * Method:  Let x =  2   * (1+f)
37  *	1. Compute and return log2(x) in two pieces:
38  *		log2(x) = w1 + w2,
39  *	   where w1 has 113-53 = 60 bit trailing zeros.
40  *	2. Perform y*log2(x) = n+y' by simulating muti-precision
41  *	   arithmetic, where |y'|<=0.5.
42  *	3. Return x**y = 2**n*exp(y'*log2)
43  *
44  * Special cases:
45  *	1.  (anything) ** 0  is 1
46  *	2.  (anything) ** 1  is itself
47  *	3.  (anything) ** NAN is NAN
48  *	4.  NAN ** (anything except 0) is NAN
49  *	5.  +-(|x| > 1) **  +INF is +INF
50  *	6.  +-(|x| > 1) **  -INF is +0
51  *	7.  +-(|x| < 1) **  +INF is +0
52  *	8.  +-(|x| < 1) **  -INF is +INF
53  *	9.  +-1         ** +-INF is NAN
54  *	10. +0 ** (+anything except 0, NAN)               is +0
55  *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
56  *	12. +0 ** (-anything except 0, NAN)               is +INF
57  *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
58  *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59  *	15. +INF ** (+anything except 0,NAN) is +INF
60  *	16. +INF ** (-anything except 0,NAN) is +0
61  *	17. -INF ** (anything)  = -0 ** (-anything)
62  *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63  *	19. (-anything except 0 and inf) ** (non-integer) is NAN
64  *
65  */
66 
67 #include <math.h>
68 #include <math_private.h>
69 #include <math-underflow.h>
70 #include <libm-alias-finite.h>
71 
72 static const long double bp[] = {
73   1.0L,
74   1.5L,
75 };
76 
77 /* log_2(1.5) */
78 static const long double dp_h[] = {
79   0.0,
80   5.8496250072115607565592654282227158546448E-1L
81 };
82 
83 /* Low part of log_2(1.5) */
84 static const long double dp_l[] = {
85   0.0,
86   1.0579781240112554492329533686862998106046E-16L
87 };
88 
89 static const long double zero = 0.0L,
90   one = 1.0L,
91   two = 2.0L,
92   two113 = 1.0384593717069655257060992658440192E34L,
93   huge = 1.0e300L,
94   tiny = 1.0e-300L;
95 
96 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
97    z = (x-1)/(x+1)
98    1 <= x <= 1.25
99    Peak relative error 2.3e-37 */
100 static const long double LN[] =
101 {
102  -3.0779177200290054398792536829702930623200E1L,
103   6.5135778082209159921251824580292116201640E1L,
104  -4.6312921812152436921591152809994014413540E1L,
105   1.2510208195629420304615674658258363295208E1L,
106  -9.9266909031921425609179910128531667336670E-1L
107 };
108 static const long double LD[] =
109 {
110  -5.129862866715009066465422805058933131960E1L,
111   1.452015077564081884387441590064272782044E2L,
112  -1.524043275549860505277434040464085593165E2L,
113   7.236063513651544224319663428634139768808E1L,
114  -1.494198912340228235853027849917095580053E1L
115   /* 1.0E0 */
116 };
117 
118 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
119    0 <= x <= 0.5
120    Peak relative error 5.7e-38  */
121 static const long double PN[] =
122 {
123   5.081801691915377692446852383385968225675E8L,
124   9.360895299872484512023336636427675327355E6L,
125   4.213701282274196030811629773097579432957E4L,
126   5.201006511142748908655720086041570288182E1L,
127   9.088368420359444263703202925095675982530E-3L,
128 };
129 static const long double PD[] =
130 {
131   3.049081015149226615468111430031590411682E9L,
132   1.069833887183886839966085436512368982758E8L,
133   8.259257717868875207333991924545445705394E5L,
134   1.872583833284143212651746812884298360922E3L,
135   /* 1.0E0 */
136 };
137 
138 static const long double
139   /* ln 2 */
140   lg2 = 6.9314718055994530941723212145817656807550E-1L,
141   lg2_h = 6.9314718055994528622676398299518041312695E-1L,
142   lg2_l = 2.3190468138462996154948554638754786504121E-17L,
143   ovt = 8.0085662595372944372e-0017L,
144   /* 2/(3*log(2)) */
145   cp = 9.6179669392597560490661645400126142495110E-1L,
146   cp_h = 9.6179669392597555432899980587535537779331E-1L,
147   cp_l = 5.0577616648125906047157785230014751039424E-17L;
148 
149 long double
__ieee754_powl(long double x,long double y)150 __ieee754_powl (long double x, long double y)
151 {
152   long double z, ax, z_h, z_l, p_h, p_l;
153   long double y1, t1, t2, r, s, sgn, t, u, v, w;
154   long double s2, s_h, s_l, t_h, t_l, ay;
155   int32_t i, j, k, yisint, n;
156   uint32_t ix, iy;
157   int32_t hx, hy, hax;
158   double ohi, xhi, xlo, yhi, ylo;
159   uint32_t lx, ly, lj;
160 
161   ldbl_unpack (x, &xhi, &xlo);
162   EXTRACT_WORDS (hx, lx, xhi);
163   ix = hx & 0x7fffffff;
164 
165   ldbl_unpack (y, &yhi, &ylo);
166   EXTRACT_WORDS (hy, ly, yhi);
167   iy = hy & 0x7fffffff;
168 
169   /* y==zero: x**0 = 1 */
170   if ((iy | ly) == 0 && !issignaling (x))
171     return one;
172 
173   /* 1.0**y = 1; -1.0**+-Inf = 1 */
174   if (x == one && !issignaling (y))
175     return one;
176   if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
177     return one;
178 
179   /* +-NaN return x+y */
180   if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
181       || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
182     return x + y;
183 
184   /* determine if y is an odd int when x < 0
185    * yisint = 0       ... y is not an integer
186    * yisint = 1       ... y is an odd int
187    * yisint = 2       ... y is an even int
188    */
189   yisint = 0;
190   if (hx < 0)
191     {
192       uint32_t low_ye;
193 
194       GET_HIGH_WORD (low_ye, ylo);
195       if ((low_ye & 0x7fffffff) >= 0x43400000)	/* Low part >= 2^53 */
196 	yisint = 2;		/* even integer y */
197       else if (iy >= 0x3ff00000)	/* 1.0 */
198 	{
199 	  if (floorl (y) == y)
200 	    {
201 	      z = 0.5 * y;
202 	      if (floorl (z) == z)
203 		yisint = 2;
204 	      else
205 		yisint = 1;
206 	    }
207 	}
208     }
209 
210   ax = fabsl (x);
211 
212   /* special value of y */
213   if (ly == 0)
214     {
215       if (iy == 0x7ff00000)	/* y is +-inf */
216 	{
217 	  if (ax > one)
218 	    /* (|x|>1)**+-inf = inf,0 */
219 	    return (hy >= 0) ? y : zero;
220 	  else
221 	    /* (|x|<1)**-,+inf = inf,0 */
222 	    return (hy < 0) ? -y : zero;
223 	}
224       if (ylo == 0.0)
225 	{
226 	  if (iy == 0x3ff00000)
227 	    {			/* y is  +-1 */
228 	      if (hy < 0)
229 		return one / x;
230 	      else
231 		return x;
232 	    }
233 	  if (hy == 0x40000000)
234 	    return x * x;		/* y is  2 */
235 	  if (hy == 0x3fe00000)
236 	    {			/* y is  0.5 */
237 	      if (hx >= 0)		/* x >= +0 */
238 		return sqrtl (x);
239 	    }
240 	}
241     }
242 
243   /* special value of x */
244   if (lx == 0)
245     {
246       if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
247 	{
248 	  z = ax;		/*x is +-0,+-inf,+-1 */
249 	  if (hy < 0)
250 	    z = one / z;	/* z = (1/|x|) */
251 	  if (hx < 0)
252 	    {
253 	      if (((ix - 0x3ff00000) | yisint) == 0)
254 		{
255 		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
256 		}
257 	      else if (yisint == 1)
258 		z = -z;		/* (x<0)**odd = -(|x|**odd) */
259 	    }
260 	  return z;
261 	}
262     }
263 
264   /* (x<0)**(non-int) is NaN */
265   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
266     return (x - x) / (x - x);
267 
268   /* sgn (sign of result -ve**odd) = -1 else = 1 */
269   sgn = one;
270   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
271     sgn = -one;			/* (-ve)**(odd int) */
272 
273   /* |y| is huge.
274      2^-16495 = 1/2 of smallest representable value.
275      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
276   if (iy > 0x41d654b0)
277     {
278       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
279       if (iy > 0x47d654b0)
280 	{
281 	  if (ix <= 0x3fefffff)
282 	    return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
283 	  if (ix >= 0x3ff00000)
284 	    return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
285 	}
286       /* over/underflow if x is not close to one */
287       if (ix < 0x3fefffff)
288 	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
289       if (ix > 0x3ff00000)
290 	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
291     }
292 
293   ay = y > 0 ? y : -y;
294   if (ay < 0x1p-117)
295     y = y < 0 ? -0x1p-117 : 0x1p-117;
296 
297   n = 0;
298   /* take care subnormal number */
299   if (ix < 0x00100000)
300     {
301       ax *= two113;
302       n -= 113;
303       ohi = ldbl_high (ax);
304       GET_HIGH_WORD (ix, ohi);
305     }
306   n += ((ix) >> 20) - 0x3ff;
307   j = ix & 0x000fffff;
308   /* determine interval */
309   ix = j | 0x3ff00000;		/* normalize ix */
310   if (j <= 0x39880)
311     k = 0;			/* |x|<sqrt(3/2) */
312   else if (j < 0xbb670)
313     k = 1;			/* |x|<sqrt(3)   */
314   else
315     {
316       k = 0;
317       n += 1;
318       ix -= 0x00100000;
319     }
320 
321   ohi = ldbl_high (ax);
322   GET_HIGH_WORD (hax, ohi);
323   ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
324 
325   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
326   u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
327   v = one / (ax + bp[k]);
328   s = u * v;
329   s_h = ldbl_high (s);
330 
331   /* t_h=ax+bp[k] High */
332   t_h = ax + bp[k];
333   t_h = ldbl_high (t_h);
334   t_l = ax - (t_h - bp[k]);
335   s_l = v * ((u - s_h * t_h) - s_h * t_l);
336   /* compute log(ax) */
337   s2 = s * s;
338   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
339   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
340   r = s2 * s2 * u / v;
341   r += s_l * (s_h + s);
342   s2 = s_h * s_h;
343   t_h = 3.0 + s2 + r;
344   t_h = ldbl_high (t_h);
345   t_l = r - ((t_h - 3.0) - s2);
346   /* u+v = s*(1+...) */
347   u = s_h * t_h;
348   v = s_l * t_h + t_l * s;
349   /* 2/(3log2)*(s+...) */
350   p_h = u + v;
351   p_h = ldbl_high (p_h);
352   p_l = v - (p_h - u);
353   z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
354   z_l = cp_l * p_h + p_l * cp + dp_l[k];
355   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
356   t = (long double) n;
357   t1 = (((z_h + z_l) + dp_h[k]) + t);
358   t1 = ldbl_high (t1);
359   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
360 
361   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
362   y1 = ldbl_high (y);
363   p_l = (y - y1) * t1 + y * t2;
364   p_h = y1 * t1;
365   z = p_l + p_h;
366   ohi = ldbl_high (z);
367   EXTRACT_WORDS (j, lj, ohi);
368   if (j >= 0x40d00000) /* z >= 16384 */
369     {
370       /* if z > 16384 */
371       if (((j - 0x40d00000) | lj) != 0)
372 	return sgn * huge * huge;	/* overflow */
373       else
374 	{
375 	  if (p_l + ovt > z - p_h)
376 	    return sgn * huge * huge;	/* overflow */
377 	}
378     }
379   else if ((j & 0x7fffffff) >= 0x40d01b90)	/* z <= -16495 */
380     {
381       /* z < -16495 */
382       if (((j - 0xc0d01bc0) | lj) != 0)
383 	return sgn * tiny * tiny;	/* underflow */
384       else
385 	{
386 	  if (p_l <= z - p_h)
387 	    return sgn * tiny * tiny;	/* underflow */
388 	}
389     }
390   /* compute 2**(p_h+p_l) */
391   i = j & 0x7fffffff;
392   k = (i >> 20) - 0x3ff;
393   n = 0;
394   if (i > 0x3fe00000)
395     {				/* if |z| > 0.5, set n = [z+0.5] */
396       n = floorl (z + 0.5L);
397       t = n;
398       p_h -= t;
399     }
400   t = p_l + p_h;
401   t = ldbl_high (t);
402   u = t * lg2_h;
403   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
404   z = u + v;
405   w = v - (z - u);
406   /*  exp(z) */
407   t = z * z;
408   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
409   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
410   t1 = z - t * u / v;
411   r = (z * t1) / (t1 - two) - (w + z * w);
412   z = one - (r - z);
413   z = __scalbnl (sgn * z, n);
414   math_check_force_underflow (z);
415   return z;
416 }
417 libm_alias_finite (__ieee754_powl, __powl)
418