1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /*
13    Long double expansions are
14    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
15    and are incorporated herein by permission of the author.  The author
16    reserves the right to distribute this material elsewhere under different
17    copying permissions.  These modifications are distributed here under
18    the following terms:
19 
20     This library is free software; you can redistribute it and/or
21     modify it under the terms of the GNU Lesser General Public
22     License as published by the Free Software Foundation; either
23     version 2.1 of the License, or (at your option) any later version.
24 
25     This library is distributed in the hope that it will be useful,
26     but WITHOUT ANY WARRANTY; without even the implied warranty of
27     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28     Lesser General Public License for more details.
29 
30     You should have received a copy of the GNU Lesser General Public
31     License along with this library; if not, see
32     <https://www.gnu.org/licenses/>.  */
33 
34 /* __ieee754_acosl(x)
35  * Method :
36  *      acos(x)  = pi/2 - asin(x)
37  *      acos(-x) = pi/2 + asin(x)
38  * For |x| <= 0.375
39  *      acos(x) = pi/2 - asin(x)
40  * Between .375 and .5 the approximation is
41  *      acos(0.4375 + x) = acos(0.4375) + x P(x) / Q(x)
42  * Between .5 and .625 the approximation is
43  *      acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
44  * For x > 0.625,
45  *      acos(x) = 2 asin(sqrt((1-x)/2))
46  *      computed with an extended precision square root in the leading term.
47  * For x < -0.625
48  *      acos(x) = pi - 2 asin(sqrt((1-|x|)/2))
49  *
50  * Special cases:
51  *      if x is NaN, return x itself;
52  *      if |x|>1, return NaN with invalid signal.
53  *
54  * Functions needed: sqrtl.
55  */
56 
57 #include <math.h>
58 #include <math_private.h>
59 #include <libm-alias-finite.h>
60 
61 static const _Float128
62   one = 1,
63   pio2_hi = L(1.5707963267948966192313216916397514420986),
64   pio2_lo = L(4.3359050650618905123985220130216759843812E-35),
65 
66   /* acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
67      -0.0625 <= x <= 0.0625
68      peak relative error 3.3e-35  */
69 
70   rS0 =  L(5.619049346208901520945464704848780243887E0),
71   rS1 = L(-4.460504162777731472539175700169871920352E1),
72   rS2 =  L(1.317669505315409261479577040530751477488E2),
73   rS3 = L(-1.626532582423661989632442410808596009227E2),
74   rS4 =  L(3.144806644195158614904369445440583873264E1),
75   rS5 =  L(9.806674443470740708765165604769099559553E1),
76   rS6 = L(-5.708468492052010816555762842394927806920E1),
77   rS7 = L(-1.396540499232262112248553357962639431922E1),
78   rS8 =  L(1.126243289311910363001762058295832610344E1),
79   rS9 =  L(4.956179821329901954211277873774472383512E-1),
80   rS10 = L(-3.313227657082367169241333738391762525780E-1),
81 
82   sS0 = L(-4.645814742084009935700221277307007679325E0),
83   sS1 =  L(3.879074822457694323970438316317961918430E1),
84   sS2 = L(-1.221986588013474694623973554726201001066E2),
85   sS3 =  L(1.658821150347718105012079876756201905822E2),
86   sS4 = L(-4.804379630977558197953176474426239748977E1),
87   sS5 = L(-1.004296417397316948114344573811562952793E2),
88   sS6 =  L(7.530281592861320234941101403870010111138E1),
89   sS7 =  L(1.270735595411673647119592092304357226607E1),
90   sS8 = L(-1.815144839646376500705105967064792930282E1),
91   sS9 = L(-7.821597334910963922204235247786840828217E-2),
92   /* 1.000000000000000000000000000000000000000E0 */
93 
94   acosr5625 = L(9.7338991014954640492751132535550279812151E-1),
95   pimacosr5625 = L(2.1682027434402468335351320579240000860757E0),
96 
97   /* acos(0.4375 + x) = acos(0.4375) + x rS(x) / sS(x)
98      -0.0625 <= x <= 0.0625
99      peak relative error 2.1e-35  */
100 
101   P0 =  L(2.177690192235413635229046633751390484892E0),
102   P1 = L(-2.848698225706605746657192566166142909573E1),
103   P2 =  L(1.040076477655245590871244795403659880304E2),
104   P3 = L(-1.400087608918906358323551402881238180553E2),
105   P4 =  L(2.221047917671449176051896400503615543757E1),
106   P5 =  L(9.643714856395587663736110523917499638702E1),
107   P6 = L(-5.158406639829833829027457284942389079196E1),
108   P7 = L(-1.578651828337585944715290382181219741813E1),
109   P8 =  L(1.093632715903802870546857764647931045906E1),
110   P9 =  L(5.448925479898460003048760932274085300103E-1),
111   P10 = L(-3.315886001095605268470690485170092986337E-1),
112   Q0 = L(-1.958219113487162405143608843774587557016E0),
113   Q1 =  L(2.614577866876185080678907676023269360520E1),
114   Q2 = L(-9.990858606464150981009763389881793660938E1),
115   Q3 =  L(1.443958741356995763628660823395334281596E2),
116   Q4 = L(-3.206441012484232867657763518369723873129E1),
117   Q5 = L(-1.048560885341833443564920145642588991492E2),
118   Q6 =  L(6.745883931909770880159915641984874746358E1),
119   Q7 =  L(1.806809656342804436118449982647641392951E1),
120   Q8 = L(-1.770150690652438294290020775359580915464E1),
121   Q9 = L(-5.659156469628629327045433069052560211164E-1),
122   /* 1.000000000000000000000000000000000000000E0 */
123 
124   acosr4375 = L(1.1179797320499710475919903296900511518755E0),
125   pimacosr4375 = L(2.0236129215398221908706530535894517323217E0),
126 
127   /* asin(x) = x + x^3 pS(x^2) / qS(x^2)
128      0 <= x <= 0.5
129      peak relative error 1.9e-35  */
130   pS0 = L(-8.358099012470680544198472400254596543711E2),
131   pS1 =  L(3.674973957689619490312782828051860366493E3),
132   pS2 = L(-6.730729094812979665807581609853656623219E3),
133   pS3 =  L(6.643843795209060298375552684423454077633E3),
134   pS4 = L(-3.817341990928606692235481812252049415993E3),
135   pS5 =  L(1.284635388402653715636722822195716476156E3),
136   pS6 = L(-2.410736125231549204856567737329112037867E2),
137   pS7 =  L(2.219191969382402856557594215833622156220E1),
138   pS8 = L(-7.249056260830627156600112195061001036533E-1),
139   pS9 =  L(1.055923570937755300061509030361395604448E-3),
140 
141   qS0 = L(-5.014859407482408326519083440151745519205E3),
142   qS1 =  L(2.430653047950480068881028451580393430537E4),
143   qS2 = L(-4.997904737193653607449250593976069726962E4),
144   qS3 =  L(5.675712336110456923807959930107347511086E4),
145   qS4 = L(-3.881523118339661268482937768522572588022E4),
146   qS5 =  L(1.634202194895541569749717032234510811216E4),
147   qS6 = L(-4.151452662440709301601820849901296953752E3),
148   qS7 =  L(5.956050864057192019085175976175695342168E2),
149   qS8 = L(-4.175375777334867025769346564600396877176E1);
150   /* 1.000000000000000000000000000000000000000E0 */
151 
152 _Float128
__ieee754_acosl(_Float128 x)153 __ieee754_acosl (_Float128 x)
154 {
155   _Float128 z, r, w, p, q, s, t, f2;
156   int32_t ix, sign;
157   ieee854_long_double_shape_type u;
158 
159   u.value = x;
160   sign = u.parts32.w0;
161   ix = sign & 0x7fffffff;
162   u.parts32.w0 = ix;		/* |x| */
163   if (ix >= 0x3fff0000)		/* |x| >= 1 */
164     {
165       if (ix == 0x3fff0000
166 	  && (u.parts32.w1 | u.parts32.w2 | u.parts32.w3) == 0)
167 	{			/* |x| == 1 */
168 	  if ((sign & 0x80000000) == 0)
169 	    return 0.0;		/* acos(1) = 0  */
170 	  else
171 	    return (2.0 * pio2_hi) + (2.0 * pio2_lo);	/* acos(-1)= pi */
172 	}
173       return (x - x) / (x - x);	/* acos(|x| > 1) is NaN */
174     }
175   else if (ix < 0x3ffe0000)	/* |x| < 0.5 */
176     {
177       if (ix < 0x3f8e0000)	/* |x| < 2**-113 */
178 	return pio2_hi + pio2_lo;
179       if (ix < 0x3ffde000)	/* |x| < .4375 */
180 	{
181 	  /* Arcsine of x.  */
182 	  z = x * x;
183 	  p = (((((((((pS9 * z
184 		       + pS8) * z
185 		      + pS7) * z
186 		     + pS6) * z
187 		    + pS5) * z
188 		   + pS4) * z
189 		  + pS3) * z
190 		 + pS2) * z
191 		+ pS1) * z
192 	       + pS0) * z;
193 	  q = (((((((( z
194 		       + qS8) * z
195 		     + qS7) * z
196 		    + qS6) * z
197 		   + qS5) * z
198 		  + qS4) * z
199 		 + qS3) * z
200 		+ qS2) * z
201 	       + qS1) * z
202 	    + qS0;
203 	  r = x + x * p / q;
204 	  z = pio2_hi - (r - pio2_lo);
205 	  return z;
206 	}
207       /* .4375 <= |x| < .5 */
208       t = u.value - L(0.4375);
209       p = ((((((((((P10 * t
210 		    + P9) * t
211 		   + P8) * t
212 		  + P7) * t
213 		 + P6) * t
214 		+ P5) * t
215 	       + P4) * t
216 	      + P3) * t
217 	     + P2) * t
218 	    + P1) * t
219 	   + P0) * t;
220 
221       q = (((((((((t
222 		   + Q9) * t
223 		  + Q8) * t
224 		 + Q7) * t
225 		+ Q6) * t
226 	       + Q5) * t
227 	      + Q4) * t
228 	     + Q3) * t
229 	    + Q2) * t
230 	   + Q1) * t
231 	+ Q0;
232       r = p / q;
233       if (sign & 0x80000000)
234 	r = pimacosr4375 - r;
235       else
236 	r = acosr4375 + r;
237       return r;
238     }
239   else if (ix < 0x3ffe4000)	/* |x| < 0.625 */
240     {
241       t = u.value - L(0.5625);
242       p = ((((((((((rS10 * t
243 		    + rS9) * t
244 		   + rS8) * t
245 		  + rS7) * t
246 		 + rS6) * t
247 		+ rS5) * t
248 	       + rS4) * t
249 	      + rS3) * t
250 	     + rS2) * t
251 	    + rS1) * t
252 	   + rS0) * t;
253 
254       q = (((((((((t
255 		   + sS9) * t
256 		  + sS8) * t
257 		 + sS7) * t
258 		+ sS6) * t
259 	       + sS5) * t
260 	      + sS4) * t
261 	     + sS3) * t
262 	    + sS2) * t
263 	   + sS1) * t
264 	+ sS0;
265       if (sign & 0x80000000)
266 	r = pimacosr5625 - p / q;
267       else
268 	r = acosr5625 + p / q;
269       return r;
270     }
271   else
272     {				/* |x| >= .625 */
273       z = (one - u.value) * 0.5;
274       s = sqrtl (z);
275       /* Compute an extended precision square root from
276 	 the Newton iteration  s -> 0.5 * (s + z / s).
277 	 The change w from s to the improved value is
278 	    w = 0.5 * (s + z / s) - s  = (s^2 + z)/2s - s = (z - s^2)/2s.
279 	  Express s = f1 + f2 where f1 * f1 is exactly representable.
280 	  w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
281 	  s + w has extended precision.  */
282       u.value = s;
283       u.parts32.w2 = 0;
284       u.parts32.w3 = 0;
285       f2 = s - u.value;
286       w = z - u.value * u.value;
287       w = w - 2.0 * u.value * f2;
288       w = w - f2 * f2;
289       w = w / (2.0 * s);
290       /* Arcsine of s.  */
291       p = (((((((((pS9 * z
292 		   + pS8) * z
293 		  + pS7) * z
294 		 + pS6) * z
295 		+ pS5) * z
296 	       + pS4) * z
297 	      + pS3) * z
298 	     + pS2) * z
299 	    + pS1) * z
300 	   + pS0) * z;
301       q = (((((((( z
302 		   + qS8) * z
303 		 + qS7) * z
304 		+ qS6) * z
305 	       + qS5) * z
306 	      + qS4) * z
307 	     + qS3) * z
308 	    + qS2) * z
309 	   + qS1) * z
310 	+ qS0;
311       r = s + (w + s * p / q);
312 
313       if (sign & 0x80000000)
314 	w = pio2_hi + (pio2_lo - r);
315       else
316 	w = r;
317       return 2.0 * w;
318     }
319 }
320 libm_alias_finite (__ieee754_acosl, __acosl)
321