1 /* Compute x * y + z as ternary operation.
2    Copyright (C) 2011-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #define NO_MATH_REDIRECT
20 #include <fenv.h>
21 #include <float.h>
22 #include <math.h>
23 #include <math-barriers.h>
24 #include <math_private.h>
25 #include <fenv_private.h>
26 #include <math-underflow.h>
27 #include <math_ldbl_opt.h>
28 #include <mul_split.h>
29 #include <stdlib.h>
30 
31 /* Calculate X + Y exactly and store the result in *HI + *LO.  It is
32    given that |X| >= |Y| and the values are small enough that no
33    overflow occurs.  */
34 
35 static void
add_split(double * hi,double * lo,double x,double y)36 add_split (double *hi, double *lo, double x, double y)
37 {
38   /* Apply Dekker's algorithm.  */
39   *hi = x + y;
40   *lo = (x - *hi) + y;
41 }
42 
43 /* Value with extended range, used in intermediate computations.  */
44 typedef struct
45 {
46   /* Value in [0.5, 1), as from frexp, or 0.  */
47   double val;
48   /* Exponent of power of 2 it is multiplied by, or 0 for zero.  */
49   int exp;
50 } ext_val;
51 
52 /* Store D as an ext_val value.  */
53 
54 static void
store_ext_val(ext_val * v,double d)55 store_ext_val (ext_val *v, double d)
56 {
57   v->val = __frexp (d, &v->exp);
58 }
59 
60 /* Store X * Y as ext_val values *V0 and *V1.  */
61 
62 static void
mul_ext_val(ext_val * v0,ext_val * v1,double x,double y)63 mul_ext_val (ext_val *v0, ext_val *v1, double x, double y)
64 {
65   int xexp, yexp;
66   x = __frexp (x, &xexp);
67   y = __frexp (y, &yexp);
68   double hi, lo;
69   mul_split (&hi, &lo, x, y);
70   store_ext_val (v0, hi);
71   if (hi != 0)
72     v0->exp += xexp + yexp;
73   store_ext_val (v1, lo);
74   if (lo != 0)
75     v1->exp += xexp + yexp;
76 }
77 
78 /* Compare absolute values of ext_val values pointed to by P and Q for
79    qsort.  */
80 
81 static int
compare(const void * p,const void * q)82 compare (const void *p, const void *q)
83 {
84   const ext_val *pe = p;
85   const ext_val *qe = q;
86   if (pe->val == 0)
87     return qe->val == 0 ? 0 : -1;
88   else if (qe->val == 0)
89     return 1;
90   else if (pe->exp < qe->exp)
91     return -1;
92   else if (pe->exp > qe->exp)
93     return 1;
94   else
95     {
96       double pd = fabs (pe->val);
97       double qd = fabs (qe->val);
98       if (pd < qd)
99 	return -1;
100       else if (pd == qd)
101 	return 0;
102       else
103 	return 1;
104     }
105 }
106 
107 /* Calculate *X + *Y exactly, storing the high part in *X (rounded to
108    nearest) and the low part in *Y.  It is given that |X| >= |Y|.  */
109 
110 static void
add_split_ext(ext_val * x,ext_val * y)111 add_split_ext (ext_val *x, ext_val *y)
112 {
113   int xexp = x->exp, yexp = y->exp;
114   if (y->val == 0 || xexp - yexp > 53)
115     return;
116   double hi = x->val;
117   double lo = __scalbn (y->val, yexp - xexp);
118   add_split (&hi, &lo, hi, lo);
119   store_ext_val (x, hi);
120   if (hi != 0)
121     x->exp += xexp;
122   store_ext_val (y, lo);
123   if (lo != 0)
124     y->exp += xexp;
125 }
126 
127 long double
__fmal(long double x,long double y,long double z)128 __fmal (long double x, long double y, long double z)
129 {
130   double xhi, xlo, yhi, ylo, zhi, zlo;
131   int64_t hx, hy, hz;
132   int xexp, yexp, zexp;
133   double scale_val;
134   int scale_exp;
135   ldbl_unpack (x, &xhi, &xlo);
136   EXTRACT_WORDS64 (hx, xhi);
137   xexp = (hx & 0x7ff0000000000000LL) >> 52;
138   ldbl_unpack (y, &yhi, &ylo);
139   EXTRACT_WORDS64 (hy, yhi);
140   yexp = (hy & 0x7ff0000000000000LL) >> 52;
141   ldbl_unpack (z, &zhi, &zlo);
142   EXTRACT_WORDS64 (hz, zhi);
143   zexp = (hz & 0x7ff0000000000000LL) >> 52;
144 
145   /* If z is Inf or NaN, but x and y are finite, avoid any exceptions
146      from computing x * y.  */
147   if (zexp == 0x7ff && xexp != 0x7ff && yexp != 0x7ff)
148     return (z + x) + y;
149 
150   /* If z is zero and x are y are nonzero, compute the result as x * y
151      to avoid the wrong sign of a zero result if x * y underflows to
152      0.  */
153   if (z == 0 && x != 0 && y != 0)
154     return x * y;
155 
156   /* If x or y or z is Inf/NaN, or if x * y is zero, compute as x * y
157      + z.  */
158   if (xexp == 0x7ff || yexp == 0x7ff || zexp == 0x7ff
159       || x == 0 || y == 0)
160     return (x * y) + z;
161 
162   {
163     SET_RESTORE_ROUND (FE_TONEAREST);
164 
165     ext_val vals[10];
166     store_ext_val (&vals[0], zhi);
167     store_ext_val (&vals[1], zlo);
168     mul_ext_val (&vals[2], &vals[3], xhi, yhi);
169     mul_ext_val (&vals[4], &vals[5], xhi, ylo);
170     mul_ext_val (&vals[6], &vals[7], xlo, yhi);
171     mul_ext_val (&vals[8], &vals[9], xlo, ylo);
172     qsort (vals, 10, sizeof (ext_val), compare);
173     /* Add up the values so that each element of VALS has absolute
174        value at most equal to the last set bit of the next nonzero
175        element.  */
176     for (size_t i = 0; i <= 8; i++)
177       {
178 	add_split_ext (&vals[i + 1], &vals[i]);
179 	qsort (vals + i + 1, 9 - i, sizeof (ext_val), compare);
180       }
181     /* Add up the values in the other direction, so that each element
182        of VALS has absolute value less than 5ulp of the next
183        value.  */
184     size_t dstpos = 9;
185     for (size_t i = 1; i <= 9; i++)
186       {
187 	if (vals[dstpos].val == 0)
188 	  {
189 	    vals[dstpos] = vals[9 - i];
190 	    vals[9 - i].val = 0;
191 	    vals[9 - i].exp = 0;
192 	  }
193 	else
194 	  {
195 	    add_split_ext (&vals[dstpos], &vals[9 - i]);
196 	    if (vals[9 - i].val != 0)
197 	      {
198 		if (9 - i < dstpos - 1)
199 		  {
200 		    vals[dstpos - 1] = vals[9 - i];
201 		    vals[9 - i].val = 0;
202 		    vals[9 - i].exp = 0;
203 		  }
204 		dstpos--;
205 	      }
206 	  }
207       }
208     /* If the result is an exact zero, it results from adding two
209        values with opposite signs; recompute in the original rounding
210        mode.  */
211     if (vals[9].val == 0)
212       goto zero_out;
213     /* Adding the top three values will now give a result as accurate
214        as the underlying long double arithmetic.  */
215     add_split_ext (&vals[9], &vals[8]);
216     if (compare (&vals[8], &vals[7]) < 0)
217       {
218 	ext_val tmp = vals[7];
219 	vals[7] = vals[8];
220 	vals[8] = tmp;
221       }
222     add_split_ext (&vals[8], &vals[7]);
223     add_split_ext (&vals[9], &vals[8]);
224     if (vals[9].exp > DBL_MAX_EXP || vals[9].exp < DBL_MIN_EXP)
225       {
226 	/* Overflow or underflow, with the result depending on the
227 	   original rounding mode, but not on the low part computed
228 	   here.  */
229 	scale_val = vals[9].val;
230 	scale_exp = vals[9].exp;
231 	goto scale_out;
232       }
233     double hi = __scalbn (vals[9].val, vals[9].exp);
234     double lo = __scalbn (vals[8].val, vals[8].exp);
235     /* It is possible that the low part became subnormal and was
236        rounded so that the result is no longer canonical.  */
237     ldbl_canonicalize (&hi, &lo);
238     long double ret = ldbl_pack (hi, lo);
239     math_check_force_underflow (ret);
240     return ret;
241   }
242 
243  scale_out:
244   scale_val = math_opt_barrier (scale_val);
245   scale_val = __scalbn (scale_val, scale_exp);
246   if (fabs (scale_val) == DBL_MAX)
247     return copysignl (LDBL_MAX, scale_val);
248   math_check_force_underflow (scale_val);
249   return scale_val;
250 
251  zero_out:;
252   double zero = 0.0;
253   zero = math_opt_barrier (zero);
254   return zero - zero;
255 }
256 #if IS_IN (libm)
257 long_double_symbol (libm, __fmal, fmal);
258 #else
259 long_double_symbol (libc, __fmal, fmal);
260 #endif
261