1 /* Compute x * y + z as ternary operation.
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #define NO_MATH_REDIRECT
20 #include <fenv.h>
21 #include <float.h>
22 #include <math.h>
23 #include <math-barriers.h>
24 #include <math_private.h>
25 #include <fenv_private.h>
26 #include <math-underflow.h>
27 #include <math_ldbl_opt.h>
28 #include <mul_split.h>
29 #include <stdlib.h>
30
31 /* Calculate X + Y exactly and store the result in *HI + *LO. It is
32 given that |X| >= |Y| and the values are small enough that no
33 overflow occurs. */
34
35 static void
add_split(double * hi,double * lo,double x,double y)36 add_split (double *hi, double *lo, double x, double y)
37 {
38 /* Apply Dekker's algorithm. */
39 *hi = x + y;
40 *lo = (x - *hi) + y;
41 }
42
43 /* Value with extended range, used in intermediate computations. */
44 typedef struct
45 {
46 /* Value in [0.5, 1), as from frexp, or 0. */
47 double val;
48 /* Exponent of power of 2 it is multiplied by, or 0 for zero. */
49 int exp;
50 } ext_val;
51
52 /* Store D as an ext_val value. */
53
54 static void
store_ext_val(ext_val * v,double d)55 store_ext_val (ext_val *v, double d)
56 {
57 v->val = __frexp (d, &v->exp);
58 }
59
60 /* Store X * Y as ext_val values *V0 and *V1. */
61
62 static void
mul_ext_val(ext_val * v0,ext_val * v1,double x,double y)63 mul_ext_val (ext_val *v0, ext_val *v1, double x, double y)
64 {
65 int xexp, yexp;
66 x = __frexp (x, &xexp);
67 y = __frexp (y, &yexp);
68 double hi, lo;
69 mul_split (&hi, &lo, x, y);
70 store_ext_val (v0, hi);
71 if (hi != 0)
72 v0->exp += xexp + yexp;
73 store_ext_val (v1, lo);
74 if (lo != 0)
75 v1->exp += xexp + yexp;
76 }
77
78 /* Compare absolute values of ext_val values pointed to by P and Q for
79 qsort. */
80
81 static int
compare(const void * p,const void * q)82 compare (const void *p, const void *q)
83 {
84 const ext_val *pe = p;
85 const ext_val *qe = q;
86 if (pe->val == 0)
87 return qe->val == 0 ? 0 : -1;
88 else if (qe->val == 0)
89 return 1;
90 else if (pe->exp < qe->exp)
91 return -1;
92 else if (pe->exp > qe->exp)
93 return 1;
94 else
95 {
96 double pd = fabs (pe->val);
97 double qd = fabs (qe->val);
98 if (pd < qd)
99 return -1;
100 else if (pd == qd)
101 return 0;
102 else
103 return 1;
104 }
105 }
106
107 /* Calculate *X + *Y exactly, storing the high part in *X (rounded to
108 nearest) and the low part in *Y. It is given that |X| >= |Y|. */
109
110 static void
add_split_ext(ext_val * x,ext_val * y)111 add_split_ext (ext_val *x, ext_val *y)
112 {
113 int xexp = x->exp, yexp = y->exp;
114 if (y->val == 0 || xexp - yexp > 53)
115 return;
116 double hi = x->val;
117 double lo = __scalbn (y->val, yexp - xexp);
118 add_split (&hi, &lo, hi, lo);
119 store_ext_val (x, hi);
120 if (hi != 0)
121 x->exp += xexp;
122 store_ext_val (y, lo);
123 if (lo != 0)
124 y->exp += xexp;
125 }
126
127 long double
__fmal(long double x,long double y,long double z)128 __fmal (long double x, long double y, long double z)
129 {
130 double xhi, xlo, yhi, ylo, zhi, zlo;
131 int64_t hx, hy, hz;
132 int xexp, yexp, zexp;
133 double scale_val;
134 int scale_exp;
135 ldbl_unpack (x, &xhi, &xlo);
136 EXTRACT_WORDS64 (hx, xhi);
137 xexp = (hx & 0x7ff0000000000000LL) >> 52;
138 ldbl_unpack (y, &yhi, &ylo);
139 EXTRACT_WORDS64 (hy, yhi);
140 yexp = (hy & 0x7ff0000000000000LL) >> 52;
141 ldbl_unpack (z, &zhi, &zlo);
142 EXTRACT_WORDS64 (hz, zhi);
143 zexp = (hz & 0x7ff0000000000000LL) >> 52;
144
145 /* If z is Inf or NaN, but x and y are finite, avoid any exceptions
146 from computing x * y. */
147 if (zexp == 0x7ff && xexp != 0x7ff && yexp != 0x7ff)
148 return (z + x) + y;
149
150 /* If z is zero and x are y are nonzero, compute the result as x * y
151 to avoid the wrong sign of a zero result if x * y underflows to
152 0. */
153 if (z == 0 && x != 0 && y != 0)
154 return x * y;
155
156 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as x * y
157 + z. */
158 if (xexp == 0x7ff || yexp == 0x7ff || zexp == 0x7ff
159 || x == 0 || y == 0)
160 return (x * y) + z;
161
162 {
163 SET_RESTORE_ROUND (FE_TONEAREST);
164
165 ext_val vals[10];
166 store_ext_val (&vals[0], zhi);
167 store_ext_val (&vals[1], zlo);
168 mul_ext_val (&vals[2], &vals[3], xhi, yhi);
169 mul_ext_val (&vals[4], &vals[5], xhi, ylo);
170 mul_ext_val (&vals[6], &vals[7], xlo, yhi);
171 mul_ext_val (&vals[8], &vals[9], xlo, ylo);
172 qsort (vals, 10, sizeof (ext_val), compare);
173 /* Add up the values so that each element of VALS has absolute
174 value at most equal to the last set bit of the next nonzero
175 element. */
176 for (size_t i = 0; i <= 8; i++)
177 {
178 add_split_ext (&vals[i + 1], &vals[i]);
179 qsort (vals + i + 1, 9 - i, sizeof (ext_val), compare);
180 }
181 /* Add up the values in the other direction, so that each element
182 of VALS has absolute value less than 5ulp of the next
183 value. */
184 size_t dstpos = 9;
185 for (size_t i = 1; i <= 9; i++)
186 {
187 if (vals[dstpos].val == 0)
188 {
189 vals[dstpos] = vals[9 - i];
190 vals[9 - i].val = 0;
191 vals[9 - i].exp = 0;
192 }
193 else
194 {
195 add_split_ext (&vals[dstpos], &vals[9 - i]);
196 if (vals[9 - i].val != 0)
197 {
198 if (9 - i < dstpos - 1)
199 {
200 vals[dstpos - 1] = vals[9 - i];
201 vals[9 - i].val = 0;
202 vals[9 - i].exp = 0;
203 }
204 dstpos--;
205 }
206 }
207 }
208 /* If the result is an exact zero, it results from adding two
209 values with opposite signs; recompute in the original rounding
210 mode. */
211 if (vals[9].val == 0)
212 goto zero_out;
213 /* Adding the top three values will now give a result as accurate
214 as the underlying long double arithmetic. */
215 add_split_ext (&vals[9], &vals[8]);
216 if (compare (&vals[8], &vals[7]) < 0)
217 {
218 ext_val tmp = vals[7];
219 vals[7] = vals[8];
220 vals[8] = tmp;
221 }
222 add_split_ext (&vals[8], &vals[7]);
223 add_split_ext (&vals[9], &vals[8]);
224 if (vals[9].exp > DBL_MAX_EXP || vals[9].exp < DBL_MIN_EXP)
225 {
226 /* Overflow or underflow, with the result depending on the
227 original rounding mode, but not on the low part computed
228 here. */
229 scale_val = vals[9].val;
230 scale_exp = vals[9].exp;
231 goto scale_out;
232 }
233 double hi = __scalbn (vals[9].val, vals[9].exp);
234 double lo = __scalbn (vals[8].val, vals[8].exp);
235 /* It is possible that the low part became subnormal and was
236 rounded so that the result is no longer canonical. */
237 ldbl_canonicalize (&hi, &lo);
238 long double ret = ldbl_pack (hi, lo);
239 math_check_force_underflow (ret);
240 return ret;
241 }
242
243 scale_out:
244 scale_val = math_opt_barrier (scale_val);
245 scale_val = __scalbn (scale_val, scale_exp);
246 if (fabs (scale_val) == DBL_MAX)
247 return copysignl (LDBL_MAX, scale_val);
248 math_check_force_underflow (scale_val);
249 return scale_val;
250
251 zero_out:;
252 double zero = 0.0;
253 zero = math_opt_barrier (zero);
254 return zero - zero;
255 }
256 #if IS_IN (libm)
257 long_double_symbol (libm, __fmal, fmal);
258 #else
259 long_double_symbol (libc, __fmal, fmal);
260 #endif
261