1 /* Compute x * y + z as ternary operation.
2    Copyright (C) 2010-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #define NO_MATH_REDIRECT
20 #include <float.h>
21 #define f64xfmaf128 __hide_f64xfmaf128
22 #include <math.h>
23 #undef f64xfmaf128
24 #include <fenv.h>
25 #include <ieee754.h>
26 #include <math-barriers.h>
27 #include <math_private.h>
28 #include <libm-alias-ldouble.h>
29 #include <math-narrow-alias.h>
30 #include <tininess.h>
31 #include <math-use-builtins.h>
32 
33 /* This implementation uses rounding to odd to avoid problems with
34    double rounding.  See a paper by Boldo and Melquiond:
35    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
36 
37 _Float128
__fmal(_Float128 x,_Float128 y,_Float128 z)38 __fmal (_Float128 x, _Float128 y, _Float128 z)
39 {
40 #if USE_FMAL_BUILTIN
41   return __builtin_fmal (x, y, z);
42 #else
43   union ieee854_long_double u, v, w;
44   int adjust = 0;
45   u.d = x;
46   v.d = y;
47   w.d = z;
48   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
49 			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
50 			   - LDBL_MANT_DIG, 0)
51       || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
52       || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
53       || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
54       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
55 			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
56     {
57       /* If z is Inf, but x and y are finite, the result should be
58 	 z rather than NaN.  */
59       if (w.ieee.exponent == 0x7fff
60 	  && u.ieee.exponent != 0x7fff
61           && v.ieee.exponent != 0x7fff)
62 	return (z + x) + y;
63       /* If z is zero and x are y are nonzero, compute the result
64 	 as x * y to avoid the wrong sign of a zero result if x * y
65 	 underflows to 0.  */
66       if (z == 0 && x != 0 && y != 0)
67 	return x * y;
68       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
69 	 x * y + z.  */
70       if (u.ieee.exponent == 0x7fff
71 	  || v.ieee.exponent == 0x7fff
72 	  || w.ieee.exponent == 0x7fff
73 	  || x == 0
74 	  || y == 0)
75 	return x * y + z;
76       /* If fma will certainly overflow, compute as x * y.  */
77       if (u.ieee.exponent + v.ieee.exponent
78 	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
79 	return x * y;
80       /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
81 	 result nor whether there is underflow depends on its exact
82 	 value, only on its sign.  */
83       if (u.ieee.exponent + v.ieee.exponent
84 	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
85 	{
86 	  int neg = u.ieee.negative ^ v.ieee.negative;
87 	  _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
88 	  if (w.ieee.exponent >= 3)
89 	    return tiny + z;
90 	  /* Scaling up, adding TINY and scaling down produces the
91 	     correct result, because in round-to-nearest mode adding
92 	     TINY has no effect and in other modes double rounding is
93 	     harmless.  But it may not produce required underflow
94 	     exceptions.  */
95 	  v.d = z * L(0x1p114) + tiny;
96 	  if (TININESS_AFTER_ROUNDING
97 	      ? v.ieee.exponent < 115
98 	      : (w.ieee.exponent == 0
99 		 || (w.ieee.exponent == 1
100 		     && w.ieee.negative != neg
101 		     && w.ieee.mantissa3 == 0
102 		     && w.ieee.mantissa2 == 0
103 		     && w.ieee.mantissa1 == 0
104 		     && w.ieee.mantissa0 == 0)))
105 	    {
106 	      _Float128 force_underflow = x * y;
107 	      math_force_eval (force_underflow);
108 	    }
109 	  return v.d * L(0x1p-114);
110 	}
111       if (u.ieee.exponent + v.ieee.exponent
112 	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
113 	{
114 	  /* Compute 1p-113 times smaller result and multiply
115 	     at the end.  */
116 	  if (u.ieee.exponent > v.ieee.exponent)
117 	    u.ieee.exponent -= LDBL_MANT_DIG;
118 	  else
119 	    v.ieee.exponent -= LDBL_MANT_DIG;
120 	  /* If x + y exponent is very large and z exponent is very small,
121 	     it doesn't matter if we don't adjust it.  */
122 	  if (w.ieee.exponent > LDBL_MANT_DIG)
123 	    w.ieee.exponent -= LDBL_MANT_DIG;
124 	  adjust = 1;
125 	}
126       else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
127 	{
128 	  /* Similarly.
129 	     If z exponent is very large and x and y exponents are
130 	     very small, adjust them up to avoid spurious underflows,
131 	     rather than down.  */
132 	  if (u.ieee.exponent + v.ieee.exponent
133 	      <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
134 	    {
135 	      if (u.ieee.exponent > v.ieee.exponent)
136 		u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
137 	      else
138 		v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
139 	    }
140 	  else if (u.ieee.exponent > v.ieee.exponent)
141 	    {
142 	      if (u.ieee.exponent > LDBL_MANT_DIG)
143 		u.ieee.exponent -= LDBL_MANT_DIG;
144 	    }
145 	  else if (v.ieee.exponent > LDBL_MANT_DIG)
146 	    v.ieee.exponent -= LDBL_MANT_DIG;
147 	  w.ieee.exponent -= LDBL_MANT_DIG;
148 	  adjust = 1;
149 	}
150       else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
151 	{
152 	  u.ieee.exponent -= LDBL_MANT_DIG;
153 	  if (v.ieee.exponent)
154 	    v.ieee.exponent += LDBL_MANT_DIG;
155 	  else
156 	    v.d *= L(0x1p113);
157 	}
158       else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
159 	{
160 	  v.ieee.exponent -= LDBL_MANT_DIG;
161 	  if (u.ieee.exponent)
162 	    u.ieee.exponent += LDBL_MANT_DIG;
163 	  else
164 	    u.d *= L(0x1p113);
165 	}
166       else /* if (u.ieee.exponent + v.ieee.exponent
167 		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
168 	{
169 	  if (u.ieee.exponent > v.ieee.exponent)
170 	    u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
171 	  else
172 	    v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
173 	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
174 	    {
175 	      if (w.ieee.exponent)
176 		w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
177 	      else
178 		w.d *= L(0x1p228);
179 	      adjust = -1;
180 	    }
181 	  /* Otherwise x * y should just affect inexact
182 	     and nothing else.  */
183 	}
184       x = u.d;
185       y = v.d;
186       z = w.d;
187     }
188 
189   /* Ensure correct sign of exact 0 + 0.  */
190   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
191     {
192       x = math_opt_barrier (x);
193       return x * y + z;
194     }
195 
196   fenv_t env;
197   feholdexcept (&env);
198   fesetround (FE_TONEAREST);
199 
200   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
201 #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
202   _Float128 x1 = x * C;
203   _Float128 y1 = y * C;
204   _Float128 m1 = x * y;
205   x1 = (x - x1) + x1;
206   y1 = (y - y1) + y1;
207   _Float128 x2 = x - x1;
208   _Float128 y2 = y - y1;
209   _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
210 
211   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
212   _Float128 a1 = z + m1;
213   _Float128 t1 = a1 - z;
214   _Float128 t2 = a1 - t1;
215   t1 = m1 - t1;
216   t2 = z - t2;
217   _Float128 a2 = t1 + t2;
218   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
219   math_force_eval (m2);
220   math_force_eval (a2);
221   feclearexcept (FE_INEXACT);
222 
223   /* If the result is an exact zero, ensure it has the correct sign.  */
224   if (a1 == 0 && m2 == 0)
225     {
226       feupdateenv (&env);
227       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
228       z = math_opt_barrier (z);
229       return z + m1;
230     }
231 
232   fesetround (FE_TOWARDZERO);
233   /* Perform m2 + a2 addition with round to odd.  */
234   u.d = a2 + m2;
235 
236   if (__glibc_likely (adjust == 0))
237     {
238       if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
239 	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
240       feupdateenv (&env);
241       /* Result is a1 + u.d.  */
242       return a1 + u.d;
243     }
244   else if (__glibc_likely (adjust > 0))
245     {
246       if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
247 	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
248       feupdateenv (&env);
249       /* Result is a1 + u.d, scaled up.  */
250       return (a1 + u.d) * L(0x1p113);
251     }
252   else
253     {
254       if ((u.ieee.mantissa3 & 1) == 0)
255 	u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
256       v.d = a1 + u.d;
257       /* Ensure the addition is not scheduled after fetestexcept call.  */
258       math_force_eval (v.d);
259       int j = fetestexcept (FE_INEXACT) != 0;
260       feupdateenv (&env);
261       /* Ensure the following computations are performed in default rounding
262 	 mode instead of just reusing the round to zero computation.  */
263       asm volatile ("" : "=m" (u) : "m" (u));
264       /* If a1 + u.d is exact, the only rounding happens during
265 	 scaling down.  */
266       if (j == 0)
267 	return v.d * L(0x1p-228);
268       /* If result rounded to zero is not subnormal, no double
269 	 rounding will occur.  */
270       if (v.ieee.exponent > 228)
271 	return (a1 + u.d) * L(0x1p-228);
272       /* If v.d * 0x1p-228L with round to zero is a subnormal above
273 	 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
274 	 down just by 1 bit, which means v.ieee.mantissa3 |= j would
275 	 change the round bit, not sticky or guard bit.
276 	 v.d * 0x1p-228L never normalizes by shifting up,
277 	 so round bit plus sticky bit should be already enough
278 	 for proper rounding.  */
279       if (v.ieee.exponent == 228)
280 	{
281 	  /* If the exponent would be in the normal range when
282 	     rounding to normal precision with unbounded exponent
283 	     range, the exact result is known and spurious underflows
284 	     must be avoided on systems detecting tininess after
285 	     rounding.  */
286 	  if (TININESS_AFTER_ROUNDING)
287 	    {
288 	      w.d = a1 + u.d;
289 	      if (w.ieee.exponent == 229)
290 		return w.d * L(0x1p-228);
291 	    }
292 	  /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
293 	     v.ieee.mantissa3 & 1 is the round bit and j is our sticky
294 	     bit.  */
295 	  w.d = 0;
296 	  w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
297 	  w.ieee.negative = v.ieee.negative;
298 	  v.ieee.mantissa3 &= ~3U;
299 	  v.d *= L(0x1p-228);
300 	  w.d *= L(0x1p-2);
301 	  return v.d + w.d;
302 	}
303       v.ieee.mantissa3 |= j;
304       return v.d * L(0x1p-228);
305     }
306 #endif /* ! USE_FMAL_BUILTIN  */
307 }
308 libm_alias_ldouble (__fma, fma)
309 libm_alias_ldouble_narrow (__fma, fma)
310