1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /* Long double expansions are
13   Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14   and are incorporated herein by permission of the author.  The author
15   reserves the right to distribute this material elsewhere under different
16   copying permissions.  These modifications are distributed here under
17   the following terms:
18 
19     This library is free software; you can redistribute it and/or
20     modify it under the terms of the GNU Lesser General Public
21     License as published by the Free Software Foundation; either
22     version 2.1 of the License, or (at your option) any later version.
23 
24     This library is distributed in the hope that it will be useful,
25     but WITHOUT ANY WARRANTY; without even the implied warranty of
26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27     Lesser General Public License for more details.
28 
29     You should have received a copy of the GNU Lesser General Public
30     License along with this library; if not, see
31     <https://www.gnu.org/licenses/>.  */
32 
33 /* __ieee754_lgammal_r(x, signgamp)
34  * Reentrant version of the logarithm of the Gamma function
35  * with user provide pointer for the sign of Gamma(x).
36  *
37  * Method:
38  *   1. Argument Reduction for 0 < x <= 8
39  *	Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
40  *	reduce x to a number in [1.5,2.5] by
41  *		lgamma(1+s) = log(s) + lgamma(s)
42  *	for example,
43  *		lgamma(7.3) = log(6.3) + lgamma(6.3)
44  *			    = log(6.3*5.3) + lgamma(5.3)
45  *			    = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
46  *   2. Polynomial approximation of lgamma around its
47  *	minimun ymin=1.461632144968362245 to maintain monotonicity.
48  *	On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
49  *		Let z = x-ymin;
50  *		lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
51  *   2. Rational approximation in the primary interval [2,3]
52  *	We use the following approximation:
53  *		s = x-2.0;
54  *		lgamma(x) = 0.5*s + s*P(s)/Q(s)
55  *	Our algorithms are based on the following observation
56  *
57  *                             zeta(2)-1    2    zeta(3)-1    3
58  * lgamma(2+s) = s*(1-Euler) + --------- * s  -  --------- * s  + ...
59  *                                 2                 3
60  *
61  *	where Euler = 0.5771... is the Euler constant, which is very
62  *	close to 0.5.
63  *
64  *   3. For x>=8, we have
65  *	lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
66  *	(better formula:
67  *	   lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
68  *	Let z = 1/x, then we approximation
69  *		f(z) = lgamma(x) - (x-0.5)(log(x)-1)
70  *	by
71  *				    3       5             11
72  *		w = w0 + w1*z + w2*z  + w3*z  + ... + w6*z
73  *
74  *   4. For negative x, since (G is gamma function)
75  *		-x*G(-x)*G(x) = pi/sin(pi*x),
76  *	we have
77  *		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
78  *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
79  *	Hence, for x<0, signgam = sign(sin(pi*x)) and
80  *		lgamma(x) = log(|Gamma(x)|)
81  *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
82  *	Note: one should avoid compute pi*(-x) directly in the
83  *	      computation of sin(pi*(-x)).
84  *
85  *   5. Special Cases
86  *		lgamma(2+s) ~ s*(1-Euler) for tiny s
87  *		lgamma(1)=lgamma(2)=0
88  *		lgamma(x) ~ -log(x) for tiny x
89  *		lgamma(0) = lgamma(inf) = inf
90  *		lgamma(-integer) = +-inf
91  *
92  */
93 
94 #include <math.h>
95 #include <math_private.h>
96 #include <libc-diag.h>
97 #include <libm-alias-finite.h>
98 
99 static const long double
100   half = 0.5L,
101   one = 1.0L,
102   pi = 3.14159265358979323846264L,
103   two63 = 9.223372036854775808e18L,
104 
105   /* lgam(1+x) = 0.5 x + x a(x)/b(x)
106      -0.268402099609375 <= x <= 0
107      peak relative error 6.6e-22 */
108   a0 = -6.343246574721079391729402781192128239938E2L,
109   a1 =  1.856560238672465796768677717168371401378E3L,
110   a2 =  2.404733102163746263689288466865843408429E3L,
111   a3 =  8.804188795790383497379532868917517596322E2L,
112   a4 =  1.135361354097447729740103745999661157426E2L,
113   a5 =  3.766956539107615557608581581190400021285E0L,
114 
115   b0 =  8.214973713960928795704317259806842490498E3L,
116   b1 =  1.026343508841367384879065363925870888012E4L,
117   b2 =  4.553337477045763320522762343132210919277E3L,
118   b3 =  8.506975785032585797446253359230031874803E2L,
119   b4 =  6.042447899703295436820744186992189445813E1L,
120   /* b5 =  1.000000000000000000000000000000000000000E0 */
121 
122 
123   tc =  1.4616321449683623412626595423257213284682E0L,
124   tf = -1.2148629053584961146050602565082954242826E-1,/* double precision */
125 /* tt = (tail of tf), i.e. tf + tt has extended precision. */
126   tt = 3.3649914684731379602768989080467587736363E-18L,
127   /* lgam ( 1.4616321449683623412626595423257213284682E0 ) =
128 -1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */
129 
130   /* lgam (x + tc) = tf + tt + x g(x)/h(x)
131      - 0.230003726999612341262659542325721328468 <= x
132      <= 0.2699962730003876587373404576742786715318
133      peak relative error 2.1e-21 */
134   g0 = 3.645529916721223331888305293534095553827E-18L,
135   g1 = 5.126654642791082497002594216163574795690E3L,
136   g2 = 8.828603575854624811911631336122070070327E3L,
137   g3 = 5.464186426932117031234820886525701595203E3L,
138   g4 = 1.455427403530884193180776558102868592293E3L,
139   g5 = 1.541735456969245924860307497029155838446E2L,
140   g6 = 4.335498275274822298341872707453445815118E0L,
141 
142   h0 = 1.059584930106085509696730443974495979641E4L,
143   h1 =  2.147921653490043010629481226937850618860E4L,
144   h2 = 1.643014770044524804175197151958100656728E4L,
145   h3 =  5.869021995186925517228323497501767586078E3L,
146   h4 =  9.764244777714344488787381271643502742293E2L,
147   h5 =  6.442485441570592541741092969581997002349E1L,
148   /* h6 = 1.000000000000000000000000000000000000000E0 */
149 
150 
151   /* lgam (x+1) = -0.5 x + x u(x)/v(x)
152      -0.100006103515625 <= x <= 0.231639862060546875
153      peak relative error 1.3e-21 */
154   u0 = -8.886217500092090678492242071879342025627E1L,
155   u1 =  6.840109978129177639438792958320783599310E2L,
156   u2 =  2.042626104514127267855588786511809932433E3L,
157   u3 =  1.911723903442667422201651063009856064275E3L,
158   u4 =  7.447065275665887457628865263491667767695E2L,
159   u5 =  1.132256494121790736268471016493103952637E2L,
160   u6 =  4.484398885516614191003094714505960972894E0L,
161 
162   v0 =  1.150830924194461522996462401210374632929E3L,
163   v1 =  3.399692260848747447377972081399737098610E3L,
164   v2 =  3.786631705644460255229513563657226008015E3L,
165   v3 =  1.966450123004478374557778781564114347876E3L,
166   v4 =  4.741359068914069299837355438370682773122E2L,
167   v5 =  4.508989649747184050907206782117647852364E1L,
168   /* v6 =  1.000000000000000000000000000000000000000E0 */
169 
170 
171   /* lgam (x+2) = .5 x + x s(x)/r(x)
172      0 <= x <= 1
173      peak relative error 7.2e-22 */
174   s0 =  1.454726263410661942989109455292824853344E6L,
175   s1 = -3.901428390086348447890408306153378922752E6L,
176   s2 = -6.573568698209374121847873064292963089438E6L,
177   s3 = -3.319055881485044417245964508099095984643E6L,
178   s4 = -7.094891568758439227560184618114707107977E5L,
179   s5 = -6.263426646464505837422314539808112478303E4L,
180   s6 = -1.684926520999477529949915657519454051529E3L,
181 
182   r0 = -1.883978160734303518163008696712983134698E7L,
183   r1 = -2.815206082812062064902202753264922306830E7L,
184   r2 = -1.600245495251915899081846093343626358398E7L,
185   r3 = -4.310526301881305003489257052083370058799E6L,
186   r4 = -5.563807682263923279438235987186184968542E5L,
187   r5 = -3.027734654434169996032905158145259713083E4L,
188   r6 = -4.501995652861105629217250715790764371267E2L,
189   /* r6 =  1.000000000000000000000000000000000000000E0 */
190 
191 
192 /* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2)
193    x >= 8
194    Peak relative error 1.51e-21
195    w0 = LS2PI - 0.5 */
196   w0 =  4.189385332046727417803e-1L,
197   w1 =  8.333333333333331447505E-2L,
198   w2 = -2.777777777750349603440E-3L,
199   w3 =  7.936507795855070755671E-4L,
200   w4 = -5.952345851765688514613E-4L,
201   w5 =  8.412723297322498080632E-4L,
202   w6 = -1.880801938119376907179E-3L,
203   w7 =  4.885026142432270781165E-3L;
204 
205 static const long double zero = 0.0L;
206 
207 static long double
sin_pi(long double x)208 sin_pi (long double x)
209 {
210   long double y, z;
211   int n, ix;
212   uint32_t se, i0, i1;
213 
214   GET_LDOUBLE_WORDS (se, i0, i1, x);
215   ix = se & 0x7fff;
216   ix = (ix << 16) | (i0 >> 16);
217   if (ix < 0x3ffd8000) /* 0.25 */
218     return __sinl (pi * x);
219   y = -x;			/* x is assume negative */
220 
221   /*
222    * argument reduction, make sure inexact flag not raised if input
223    * is an integer
224    */
225   z = floorl (y);
226   if (z != y)
227     {				/* inexact anyway */
228       y  *= 0.5;
229       y = 2.0*(y - floorl(y));		/* y = |x| mod 2.0 */
230       n = (int) (y*4.0);
231     }
232   else
233     {
234       if (ix >= 0x403f8000)  /* 2^64 */
235 	{
236 	  y = zero; n = 0;                 /* y must be even */
237 	}
238       else
239 	{
240 	if (ix < 0x403e8000)  /* 2^63 */
241 	  z = y + two63;	/* exact */
242 	GET_LDOUBLE_WORDS (se, i0, i1, z);
243 	n = i1 & 1;
244 	y  = n;
245 	n <<= 2;
246       }
247     }
248 
249   switch (n)
250     {
251     case 0:
252       y = __sinl (pi * y);
253       break;
254     case 1:
255     case 2:
256       y = __cosl (pi * (half - y));
257       break;
258     case 3:
259     case 4:
260       y = __sinl (pi * (one - y));
261       break;
262     case 5:
263     case 6:
264       y = -__cosl (pi * (y - 1.5));
265       break;
266     default:
267       y = __sinl (pi * (y - 2.0));
268       break;
269     }
270   return -y;
271 }
272 
273 
274 long double
__ieee754_lgammal_r(long double x,int * signgamp)275 __ieee754_lgammal_r (long double x, int *signgamp)
276 {
277   long double t, y, z, nadj, p, p1, p2, q, r, w;
278   int i, ix;
279   uint32_t se, i0, i1;
280 
281   *signgamp = 1;
282   GET_LDOUBLE_WORDS (se, i0, i1, x);
283   ix = se & 0x7fff;
284 
285   if (__builtin_expect((ix | i0 | i1) == 0, 0))
286     {
287       if (se & 0x8000)
288 	*signgamp = -1;
289       return one / fabsl (x);
290     }
291 
292   ix = (ix << 16) | (i0 >> 16);
293 
294   /* purge off +-inf, NaN, +-0, and negative arguments */
295   if (__builtin_expect(ix >= 0x7fff0000, 0))
296     return x * x;
297 
298   if (__builtin_expect(ix < 0x3fc08000, 0)) /* 2^-63 */
299     {				/* |x|<2**-63, return -log(|x|) */
300       if (se & 0x8000)
301 	{
302 	  *signgamp = -1;
303 	  return -__ieee754_logl (-x);
304 	}
305       else
306 	return -__ieee754_logl (x);
307     }
308   if (se & 0x8000)
309     {
310       if (x < -2.0L && x > -33.0L)
311 	return __lgamma_negl (x, signgamp);
312       t = sin_pi (x);
313       if (t == zero)
314 	return one / fabsl (t);	/* -integer */
315       nadj = __ieee754_logl (pi / fabsl (t * x));
316       if (t < zero)
317 	*signgamp = -1;
318       x = -x;
319     }
320 
321   /* purge off 1 and 2 */
322   if ((((ix - 0x3fff8000) | i0 | i1) == 0)
323       || (((ix - 0x40008000) | i0 | i1) == 0))
324     r = 0;
325   else if (ix < 0x40008000) /* 2.0 */
326     {
327       /* x < 2.0 */
328       if (ix <= 0x3ffee666) /* 8.99993896484375e-1 */
329 	{
330 	  /* lgamma(x) = lgamma(x+1) - log(x) */
331 	  r = -__ieee754_logl (x);
332 	  if (ix >= 0x3ffebb4a) /* 7.31597900390625e-1 */
333 	    {
334 	      y = x - one;
335 	      i = 0;
336 	    }
337 	  else if (ix >= 0x3ffced33)/* 2.31639862060546875e-1 */
338 	    {
339 	      y = x - (tc - one);
340 	      i = 1;
341 	    }
342 	  else
343 	    {
344 	      /* x < 0.23 */
345 	      y = x;
346 	      i = 2;
347 	    }
348 	}
349       else
350 	{
351 	  r = zero;
352 	  if (ix >= 0x3fffdda6) /* 1.73162841796875 */
353 	    {
354 	      /* [1.7316,2] */
355 	      y = x - 2.0;
356 	      i = 0;
357 	    }
358 	  else if (ix >= 0x3fff9da6)/* 1.23162841796875 */
359 	    {
360 	      /* [1.23,1.73] */
361 	      y = x - tc;
362 	      i = 1;
363 	    }
364 	  else
365 	    {
366 	      /* [0.9, 1.23] */
367 	      y = x - one;
368 	      i = 2;
369 	    }
370 	}
371       switch (i)
372 	{
373 	case 0:
374 	  p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5))));
375 	  p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y))));
376 	  r += half * y + y * p1/p2;
377 	  break;
378 	case 1:
379     p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6)))));
380     p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y)))));
381     p = tt + y * p1/p2;
382 	  r += (tf + p);
383 	  break;
384 	case 2:
385  p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6))))));
386       p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y)))));
387 	  r += (-half * y + p1 / p2);
388 	}
389     }
390   else if (ix < 0x40028000) /* 8.0 */
391     {
392       /* x < 8.0 */
393       i = (int) x;
394       t = zero;
395       y = x - (double) i;
396   p = y *
397      (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
398   q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y))))));
399       r = half * y + p / q;
400       z = one;			/* lgamma(1+s) = log(s) + lgamma(s) */
401       switch (i)
402 	{
403 	case 7:
404 	  z *= (y + 6.0);	/* FALLTHRU */
405 	case 6:
406 	  z *= (y + 5.0);	/* FALLTHRU */
407 	case 5:
408 	  z *= (y + 4.0);	/* FALLTHRU */
409 	case 4:
410 	  z *= (y + 3.0);	/* FALLTHRU */
411 	case 3:
412 	  z *= (y + 2.0);	/* FALLTHRU */
413 	  r += __ieee754_logl (z);
414 	  break;
415 	}
416     }
417   else if (ix < 0x40418000) /* 2^66 */
418     {
419       /* 8.0 <= x < 2**66 */
420       t = __ieee754_logl (x);
421       z = one / x;
422       y = z * z;
423       w = w0 + z * (w1
424 	  + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7))))));
425       r = (x - half) * (t - one) + w;
426     }
427   else
428     /* 2**66 <= x <= inf */
429     r = x * (__ieee754_logl (x) - one);
430   /* NADJ is set for negative arguments but not otherwise, resulting
431      in warnings that it may be used uninitialized although in the
432      cases where it is used it has always been set.  */
433   DIAG_PUSH_NEEDS_COMMENT;
434   DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wmaybe-uninitialized");
435   if (se & 0x8000)
436     r = nadj - r;
437   DIAG_POP_NEEDS_COMMENT;
438   return r;
439 }
440 libm_alias_finite (__ieee754_lgammal_r, __lgammal_r)
441