1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12 /* Long double expansions are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
18
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
23
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
28
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <https://www.gnu.org/licenses/>. */
32
33 /* double erf(double x)
34 * double erfc(double x)
35 * x
36 * 2 |\
37 * erf(x) = --------- | exp(-t*t)dt
38 * sqrt(pi) \|
39 * 0
40 *
41 * erfc(x) = 1-erf(x)
42 * Note that
43 * erf(-x) = -erf(x)
44 * erfc(-x) = 2 - erfc(x)
45 *
46 * Method:
47 * 1. For |x| in [0, 0.84375]
48 * erf(x) = x + x*R(x^2)
49 * erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
50 * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
51 * Remark. The formula is derived by noting
52 * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
53 * and that
54 * 2/sqrt(pi) = 1.128379167095512573896158903121545171688
55 * is close to one. The interval is chosen because the fix
56 * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
57 * near 0.6174), and by some experiment, 0.84375 is chosen to
58 * guarantee the error is less than one ulp for erf.
59 *
60 * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
61 * c = 0.84506291151 rounded to single (24 bits)
62 * erf(x) = sign(x) * (c + P1(s)/Q1(s))
63 * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
64 * 1+(c+P1(s)/Q1(s)) if x < 0
65 * Remark: here we use the taylor series expansion at x=1.
66 * erf(1+s) = erf(1) + s*Poly(s)
67 * = 0.845.. + P1(s)/Q1(s)
68 * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
69 *
70 * 3. For x in [1.25,1/0.35(~2.857143)],
71 * erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
72 * z=1/x^2
73 * erf(x) = 1 - erfc(x)
74 *
75 * 4. For x in [1/0.35,107]
76 * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
77 * = 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
78 * if -6.666<x<0
79 * = 2.0 - tiny (if x <= -6.666)
80 * z=1/x^2
81 * erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
82 * erf(x) = sign(x)*(1.0 - tiny)
83 * Note1:
84 * To compute exp(-x*x-0.5625+R/S), let s be a single
85 * precision number and s := x; then
86 * -x*x = -s*s + (s-x)*(s+x)
87 * exp(-x*x-0.5626+R/S) =
88 * exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
89 * Note2:
90 * Here 4 and 5 make use of the asymptotic series
91 * exp(-x*x)
92 * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
93 * x*sqrt(pi)
94 *
95 * 5. For inf > x >= 107
96 * erf(x) = sign(x) *(1 - tiny) (raise inexact)
97 * erfc(x) = tiny*tiny (raise underflow) if x > 0
98 * = 2 - tiny if x<0
99 *
100 * 7. Special case:
101 * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
102 * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
103 * erfc/erf(NaN) is NaN
104 */
105
106
107 #include <errno.h>
108 #include <float.h>
109 #include <math.h>
110 #include <math_private.h>
111 #include <math-underflow.h>
112 #include <libm-alias-ldouble.h>
113
114 static const long double
115 tiny = 1e-4931L,
116 half = 0.5L,
117 one = 1.0L,
118 two = 2.0L,
119 /* c = (float)0.84506291151 */
120 erx = 0.845062911510467529296875L,
121 /*
122 * Coefficients for approximation to erf on [0,0.84375]
123 */
124 /* 2/sqrt(pi) - 1 */
125 efx = 1.2837916709551257389615890312154517168810E-1L,
126
127 pp[6] = {
128 1.122751350964552113068262337278335028553E6L,
129 -2.808533301997696164408397079650699163276E6L,
130 -3.314325479115357458197119660818768924100E5L,
131 -6.848684465326256109712135497895525446398E4L,
132 -2.657817695110739185591505062971929859314E3L,
133 -1.655310302737837556654146291646499062882E2L,
134 },
135
136 qq[6] = {
137 8.745588372054466262548908189000448124232E6L,
138 3.746038264792471129367533128637019611485E6L,
139 7.066358783162407559861156173539693900031E5L,
140 7.448928604824620999413120955705448117056E4L,
141 4.511583986730994111992253980546131408924E3L,
142 1.368902937933296323345610240009071254014E2L,
143 /* 1.000000000000000000000000000000000000000E0 */
144 },
145
146 /*
147 * Coefficients for approximation to erf in [0.84375,1.25]
148 */
149 /* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
150 -0.15625 <= x <= +.25
151 Peak relative error 8.5e-22 */
152
153 pa[8] = {
154 -1.076952146179812072156734957705102256059E0L,
155 1.884814957770385593365179835059971587220E2L,
156 -5.339153975012804282890066622962070115606E1L,
157 4.435910679869176625928504532109635632618E1L,
158 1.683219516032328828278557309642929135179E1L,
159 -2.360236618396952560064259585299045804293E0L,
160 1.852230047861891953244413872297940938041E0L,
161 9.394994446747752308256773044667843200719E-2L,
162 },
163
164 qa[7] = {
165 4.559263722294508998149925774781887811255E2L,
166 3.289248982200800575749795055149780689738E2L,
167 2.846070965875643009598627918383314457912E2L,
168 1.398715859064535039433275722017479994465E2L,
169 6.060190733759793706299079050985358190726E1L,
170 2.078695677795422351040502569964299664233E1L,
171 4.641271134150895940966798357442234498546E0L,
172 /* 1.000000000000000000000000000000000000000E0 */
173 },
174
175 /*
176 * Coefficients for approximation to erfc in [1.25,1/0.35]
177 */
178 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
179 1/2.85711669921875 < 1/x < 1/1.25
180 Peak relative error 3.1e-21 */
181
182 ra[] = {
183 1.363566591833846324191000679620738857234E-1L,
184 1.018203167219873573808450274314658434507E1L,
185 1.862359362334248675526472871224778045594E2L,
186 1.411622588180721285284945138667933330348E3L,
187 5.088538459741511988784440103218342840478E3L,
188 8.928251553922176506858267311750789273656E3L,
189 7.264436000148052545243018622742770549982E3L,
190 2.387492459664548651671894725748959751119E3L,
191 2.220916652813908085449221282808458466556E2L,
192 },
193
194 sa[] = {
195 -1.382234625202480685182526402169222331847E1L,
196 -3.315638835627950255832519203687435946482E2L,
197 -2.949124863912936259747237164260785326692E3L,
198 -1.246622099070875940506391433635999693661E4L,
199 -2.673079795851665428695842853070996219632E4L,
200 -2.880269786660559337358397106518918220991E4L,
201 -1.450600228493968044773354186390390823713E4L,
202 -2.874539731125893533960680525192064277816E3L,
203 -1.402241261419067750237395034116942296027E2L,
204 /* 1.000000000000000000000000000000000000000E0 */
205 },
206 /*
207 * Coefficients for approximation to erfc in [1/.35,107]
208 */
209 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
210 1/6.6666259765625 < 1/x < 1/2.85711669921875
211 Peak relative error 4.2e-22 */
212 rb[] = {
213 -4.869587348270494309550558460786501252369E-5L,
214 -4.030199390527997378549161722412466959403E-3L,
215 -9.434425866377037610206443566288917589122E-2L,
216 -9.319032754357658601200655161585539404155E-1L,
217 -4.273788174307459947350256581445442062291E0L,
218 -8.842289940696150508373541814064198259278E0L,
219 -7.069215249419887403187988144752613025255E0L,
220 -1.401228723639514787920274427443330704764E0L,
221 },
222
223 sb[] = {
224 4.936254964107175160157544545879293019085E-3L,
225 1.583457624037795744377163924895349412015E-1L,
226 1.850647991850328356622940552450636420484E0L,
227 9.927611557279019463768050710008450625415E0L,
228 2.531667257649436709617165336779212114570E1L,
229 2.869752886406743386458304052862814690045E1L,
230 1.182059497870819562441683560749192539345E1L,
231 /* 1.000000000000000000000000000000000000000E0 */
232 },
233 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
234 1/107 <= 1/x <= 1/6.6666259765625
235 Peak relative error 1.1e-21 */
236 rc[] = {
237 -8.299617545269701963973537248996670806850E-5L,
238 -6.243845685115818513578933902532056244108E-3L,
239 -1.141667210620380223113693474478394397230E-1L,
240 -7.521343797212024245375240432734425789409E-1L,
241 -1.765321928311155824664963633786967602934E0L,
242 -1.029403473103215800456761180695263439188E0L,
243 },
244
245 sc[] = {
246 8.413244363014929493035952542677768808601E-3L,
247 2.065114333816877479753334599639158060979E-1L,
248 1.639064941530797583766364412782135680148E0L,
249 4.936788463787115555582319302981666347450E0L,
250 5.005177727208955487404729933261347679090E0L,
251 /* 1.000000000000000000000000000000000000000E0 */
252 };
253
254 long double
__erfl(long double x)255 __erfl (long double x)
256 {
257 long double R, S, P, Q, s, y, z, r;
258 int32_t ix, i;
259 uint32_t se, i0, i1;
260
261 GET_LDOUBLE_WORDS (se, i0, i1, x);
262 ix = se & 0x7fff;
263
264 if (ix >= 0x7fff)
265 { /* erf(nan)=nan */
266 i = ((se & 0xffff) >> 15) << 1;
267 return (long double) (1 - i) + one / x; /* erf(+-inf)=+-1 */
268 }
269
270 ix = (ix << 16) | (i0 >> 16);
271 if (ix < 0x3ffed800) /* |x|<0.84375 */
272 {
273 if (ix < 0x3fde8000) /* |x|<2**-33 */
274 {
275 if (ix < 0x00080000)
276 {
277 /* Avoid spurious underflow. */
278 long double ret = 0.0625 * (16.0 * x + (16.0 * efx) * x);
279 math_check_force_underflow (ret);
280 return ret;
281 }
282 return x + efx * x;
283 }
284 z = x * x;
285 r = pp[0] + z * (pp[1]
286 + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
287 s = qq[0] + z * (qq[1]
288 + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
289 y = r / s;
290 return x + x * y;
291 }
292 if (ix < 0x3fffa000) /* 1.25 */
293 { /* 0.84375 <= |x| < 1.25 */
294 s = fabsl (x) - one;
295 P = pa[0] + s * (pa[1] + s * (pa[2]
296 + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
297 Q = qa[0] + s * (qa[1] + s * (qa[2]
298 + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
299 if ((se & 0x8000) == 0)
300 return erx + P / Q;
301 else
302 return -erx - P / Q;
303 }
304 if (ix >= 0x4001d555) /* 6.6666259765625 */
305 { /* inf>|x|>=6.666 */
306 if ((se & 0x8000) == 0)
307 return one - tiny;
308 else
309 return tiny - one;
310 }
311 x = fabsl (x);
312 s = one / (x * x);
313 if (ix < 0x4000b6db) /* 2.85711669921875 */
314 {
315 R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
316 s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
317 S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
318 s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
319 }
320 else
321 { /* |x| >= 1/0.35 */
322 R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
323 s * (rb[5] + s * (rb[6] + s * rb[7]))))));
324 S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
325 s * (sb[5] + s * (sb[6] + s))))));
326 }
327 z = x;
328 GET_LDOUBLE_WORDS (i, i0, i1, z);
329 i1 = 0;
330 SET_LDOUBLE_WORDS (z, i, i0, i1);
331 r =
332 __ieee754_expl (-z * z - 0.5625) * __ieee754_expl ((z - x) * (z + x) +
333 R / S);
334 if ((se & 0x8000) == 0)
335 return one - r / x;
336 else
337 return r / x - one;
338 }
339
libm_alias_ldouble(__erf,erf)340 libm_alias_ldouble (__erf, erf)
341 long double
342 __erfcl (long double x)
343 {
344 int32_t hx, ix;
345 long double R, S, P, Q, s, y, z, r;
346 uint32_t se, i0, i1;
347
348 GET_LDOUBLE_WORDS (se, i0, i1, x);
349 ix = se & 0x7fff;
350 if (ix >= 0x7fff)
351 { /* erfc(nan)=nan */
352 /* erfc(+-inf)=0,2 */
353 return (long double) (((se & 0xffff) >> 15) << 1) + one / x;
354 }
355
356 ix = (ix << 16) | (i0 >> 16);
357 if (ix < 0x3ffed800) /* |x|<0.84375 */
358 {
359 if (ix < 0x3fbe0000) /* |x|<2**-65 */
360 return one - x;
361 z = x * x;
362 r = pp[0] + z * (pp[1]
363 + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
364 s = qq[0] + z * (qq[1]
365 + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
366 y = r / s;
367 if (ix < 0x3ffd8000) /* x<1/4 */
368 {
369 return one - (x + x * y);
370 }
371 else
372 {
373 r = x * y;
374 r += (x - half);
375 return half - r;
376 }
377 }
378 if (ix < 0x3fffa000) /* 1.25 */
379 { /* 0.84375 <= |x| < 1.25 */
380 s = fabsl (x) - one;
381 P = pa[0] + s * (pa[1] + s * (pa[2]
382 + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
383 Q = qa[0] + s * (qa[1] + s * (qa[2]
384 + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
385 if ((se & 0x8000) == 0)
386 {
387 z = one - erx;
388 return z - P / Q;
389 }
390 else
391 {
392 z = erx + P / Q;
393 return one + z;
394 }
395 }
396 if (ix < 0x4005d600) /* 107 */
397 { /* |x|<107 */
398 x = fabsl (x);
399 s = one / (x * x);
400 if (ix < 0x4000b6db) /* 2.85711669921875 */
401 { /* |x| < 1/.35 ~ 2.857143 */
402 R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
403 s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
404 S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
405 s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
406 }
407 else if (ix < 0x4001d555) /* 6.6666259765625 */
408 { /* 6.666 > |x| >= 1/.35 ~ 2.857143 */
409 R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
410 s * (rb[5] + s * (rb[6] + s * rb[7]))))));
411 S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
412 s * (sb[5] + s * (sb[6] + s))))));
413 }
414 else
415 { /* |x| >= 6.666 */
416 if (se & 0x8000)
417 return two - tiny; /* x < -6.666 */
418
419 R = rc[0] + s * (rc[1] + s * (rc[2] + s * (rc[3] +
420 s * (rc[4] + s * rc[5]))));
421 S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
422 s * (sc[4] + s))));
423 }
424 z = x;
425 GET_LDOUBLE_WORDS (hx, i0, i1, z);
426 i1 = 0;
427 i0 &= 0xffffff00;
428 SET_LDOUBLE_WORDS (z, hx, i0, i1);
429 r = __ieee754_expl (-z * z - 0.5625) *
430 __ieee754_expl ((z - x) * (z + x) + R / S);
431 if ((se & 0x8000) == 0)
432 {
433 long double ret = r / x;
434 if (ret == 0)
435 __set_errno (ERANGE);
436 return ret;
437 }
438 else
439 return two - r / x;
440 }
441 else
442 {
443 if ((se & 0x8000) == 0)
444 {
445 __set_errno (ERANGE);
446 return tiny * tiny;
447 }
448 else
449 return two - tiny;
450 }
451 }
452
453 libm_alias_ldouble (__erfc, erfc)
454