1 /* Compute x * y + z as ternary operation.
2 Copyright (C) 2010-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #define NO_MATH_REDIRECT
20 #include <float.h>
21 #define f64xfmaf128 __hide_f64xfmaf128
22 #include <math.h>
23 #undef f64xfmaf128
24 #include <fenv.h>
25 #include <ieee754.h>
26 #include <math-barriers.h>
27 #include <math_private.h>
28 #include <libm-alias-ldouble.h>
29 #include <math-narrow-alias.h>
30 #include <tininess.h>
31 #include <math-use-builtins.h>
32
33 /* This implementation uses rounding to odd to avoid problems with
34 double rounding. See a paper by Boldo and Melquiond:
35 http://www.lri.fr/~melquion/doc/08-tc.pdf */
36
37 _Float128
__fmal(_Float128 x,_Float128 y,_Float128 z)38 __fmal (_Float128 x, _Float128 y, _Float128 z)
39 {
40 #if USE_FMAL_BUILTIN
41 return __builtin_fmal (x, y, z);
42 #else
43 union ieee854_long_double u, v, w;
44 int adjust = 0;
45 u.d = x;
46 v.d = y;
47 w.d = z;
48 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
49 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
50 - LDBL_MANT_DIG, 0)
51 || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
52 || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
53 || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
54 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
55 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
56 {
57 /* If z is Inf, but x and y are finite, the result should be
58 z rather than NaN. */
59 if (w.ieee.exponent == 0x7fff
60 && u.ieee.exponent != 0x7fff
61 && v.ieee.exponent != 0x7fff)
62 return (z + x) + y;
63 /* If z is zero and x are y are nonzero, compute the result
64 as x * y to avoid the wrong sign of a zero result if x * y
65 underflows to 0. */
66 if (z == 0 && x != 0 && y != 0)
67 return x * y;
68 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
69 x * y + z. */
70 if (u.ieee.exponent == 0x7fff
71 || v.ieee.exponent == 0x7fff
72 || w.ieee.exponent == 0x7fff
73 || x == 0
74 || y == 0)
75 return x * y + z;
76 /* If fma will certainly overflow, compute as x * y. */
77 if (u.ieee.exponent + v.ieee.exponent
78 > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
79 return x * y;
80 /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
81 result nor whether there is underflow depends on its exact
82 value, only on its sign. */
83 if (u.ieee.exponent + v.ieee.exponent
84 < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
85 {
86 int neg = u.ieee.negative ^ v.ieee.negative;
87 _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
88 if (w.ieee.exponent >= 3)
89 return tiny + z;
90 /* Scaling up, adding TINY and scaling down produces the
91 correct result, because in round-to-nearest mode adding
92 TINY has no effect and in other modes double rounding is
93 harmless. But it may not produce required underflow
94 exceptions. */
95 v.d = z * L(0x1p114) + tiny;
96 if (TININESS_AFTER_ROUNDING
97 ? v.ieee.exponent < 115
98 : (w.ieee.exponent == 0
99 || (w.ieee.exponent == 1
100 && w.ieee.negative != neg
101 && w.ieee.mantissa3 == 0
102 && w.ieee.mantissa2 == 0
103 && w.ieee.mantissa1 == 0
104 && w.ieee.mantissa0 == 0)))
105 {
106 _Float128 force_underflow = x * y;
107 math_force_eval (force_underflow);
108 }
109 return v.d * L(0x1p-114);
110 }
111 if (u.ieee.exponent + v.ieee.exponent
112 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
113 {
114 /* Compute 1p-113 times smaller result and multiply
115 at the end. */
116 if (u.ieee.exponent > v.ieee.exponent)
117 u.ieee.exponent -= LDBL_MANT_DIG;
118 else
119 v.ieee.exponent -= LDBL_MANT_DIG;
120 /* If x + y exponent is very large and z exponent is very small,
121 it doesn't matter if we don't adjust it. */
122 if (w.ieee.exponent > LDBL_MANT_DIG)
123 w.ieee.exponent -= LDBL_MANT_DIG;
124 adjust = 1;
125 }
126 else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
127 {
128 /* Similarly.
129 If z exponent is very large and x and y exponents are
130 very small, adjust them up to avoid spurious underflows,
131 rather than down. */
132 if (u.ieee.exponent + v.ieee.exponent
133 <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
134 {
135 if (u.ieee.exponent > v.ieee.exponent)
136 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
137 else
138 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
139 }
140 else if (u.ieee.exponent > v.ieee.exponent)
141 {
142 if (u.ieee.exponent > LDBL_MANT_DIG)
143 u.ieee.exponent -= LDBL_MANT_DIG;
144 }
145 else if (v.ieee.exponent > LDBL_MANT_DIG)
146 v.ieee.exponent -= LDBL_MANT_DIG;
147 w.ieee.exponent -= LDBL_MANT_DIG;
148 adjust = 1;
149 }
150 else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
151 {
152 u.ieee.exponent -= LDBL_MANT_DIG;
153 if (v.ieee.exponent)
154 v.ieee.exponent += LDBL_MANT_DIG;
155 else
156 v.d *= L(0x1p113);
157 }
158 else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
159 {
160 v.ieee.exponent -= LDBL_MANT_DIG;
161 if (u.ieee.exponent)
162 u.ieee.exponent += LDBL_MANT_DIG;
163 else
164 u.d *= L(0x1p113);
165 }
166 else /* if (u.ieee.exponent + v.ieee.exponent
167 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
168 {
169 if (u.ieee.exponent > v.ieee.exponent)
170 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
171 else
172 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
173 if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
174 {
175 if (w.ieee.exponent)
176 w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
177 else
178 w.d *= L(0x1p228);
179 adjust = -1;
180 }
181 /* Otherwise x * y should just affect inexact
182 and nothing else. */
183 }
184 x = u.d;
185 y = v.d;
186 z = w.d;
187 }
188
189 /* Ensure correct sign of exact 0 + 0. */
190 if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
191 {
192 x = math_opt_barrier (x);
193 return x * y + z;
194 }
195
196 fenv_t env;
197 feholdexcept (&env);
198 fesetround (FE_TONEAREST);
199
200 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
201 #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
202 _Float128 x1 = x * C;
203 _Float128 y1 = y * C;
204 _Float128 m1 = x * y;
205 x1 = (x - x1) + x1;
206 y1 = (y - y1) + y1;
207 _Float128 x2 = x - x1;
208 _Float128 y2 = y - y1;
209 _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
210
211 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
212 _Float128 a1 = z + m1;
213 _Float128 t1 = a1 - z;
214 _Float128 t2 = a1 - t1;
215 t1 = m1 - t1;
216 t2 = z - t2;
217 _Float128 a2 = t1 + t2;
218 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
219 math_force_eval (m2);
220 math_force_eval (a2);
221 feclearexcept (FE_INEXACT);
222
223 /* If the result is an exact zero, ensure it has the correct sign. */
224 if (a1 == 0 && m2 == 0)
225 {
226 feupdateenv (&env);
227 /* Ensure that round-to-nearest value of z + m1 is not reused. */
228 z = math_opt_barrier (z);
229 return z + m1;
230 }
231
232 fesetround (FE_TOWARDZERO);
233 /* Perform m2 + a2 addition with round to odd. */
234 u.d = a2 + m2;
235
236 if (__glibc_likely (adjust == 0))
237 {
238 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
239 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
240 feupdateenv (&env);
241 /* Result is a1 + u.d. */
242 return a1 + u.d;
243 }
244 else if (__glibc_likely (adjust > 0))
245 {
246 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
247 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
248 feupdateenv (&env);
249 /* Result is a1 + u.d, scaled up. */
250 return (a1 + u.d) * L(0x1p113);
251 }
252 else
253 {
254 if ((u.ieee.mantissa3 & 1) == 0)
255 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
256 v.d = a1 + u.d;
257 /* Ensure the addition is not scheduled after fetestexcept call. */
258 math_force_eval (v.d);
259 int j = fetestexcept (FE_INEXACT) != 0;
260 feupdateenv (&env);
261 /* Ensure the following computations are performed in default rounding
262 mode instead of just reusing the round to zero computation. */
263 asm volatile ("" : "=m" (u) : "m" (u));
264 /* If a1 + u.d is exact, the only rounding happens during
265 scaling down. */
266 if (j == 0)
267 return v.d * L(0x1p-228);
268 /* If result rounded to zero is not subnormal, no double
269 rounding will occur. */
270 if (v.ieee.exponent > 228)
271 return (a1 + u.d) * L(0x1p-228);
272 /* If v.d * 0x1p-228L with round to zero is a subnormal above
273 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
274 down just by 1 bit, which means v.ieee.mantissa3 |= j would
275 change the round bit, not sticky or guard bit.
276 v.d * 0x1p-228L never normalizes by shifting up,
277 so round bit plus sticky bit should be already enough
278 for proper rounding. */
279 if (v.ieee.exponent == 228)
280 {
281 /* If the exponent would be in the normal range when
282 rounding to normal precision with unbounded exponent
283 range, the exact result is known and spurious underflows
284 must be avoided on systems detecting tininess after
285 rounding. */
286 if (TININESS_AFTER_ROUNDING)
287 {
288 w.d = a1 + u.d;
289 if (w.ieee.exponent == 229)
290 return w.d * L(0x1p-228);
291 }
292 /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
293 v.ieee.mantissa3 & 1 is the round bit and j is our sticky
294 bit. */
295 w.d = 0;
296 w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
297 w.ieee.negative = v.ieee.negative;
298 v.ieee.mantissa3 &= ~3U;
299 v.d *= L(0x1p-228);
300 w.d *= L(0x1p-2);
301 return v.d + w.d;
302 }
303 v.ieee.mantissa3 |= j;
304 return v.d * L(0x1p-228);
305 }
306 #endif /* ! USE_FMAL_BUILTIN */
307 }
308 libm_alias_ldouble (__fma, fma)
309 libm_alias_ldouble_narrow (__fma, fma)
310