1 /* Compute x * y + z as ternary operation.
2    Copyright (C) 2010-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #define NO_MATH_REDIRECT
20 #include <float.h>
21 #define dfmal __hide_dfmal
22 #define f32xfmaf64 __hide_f32xfmaf64
23 #include <math.h>
24 #undef dfmal
25 #undef f32xfmaf64
26 #include <fenv.h>
27 #include <ieee754.h>
28 #include <math-barriers.h>
29 #include <fenv_private.h>
30 #include <libm-alias-double.h>
31 #include <math-narrow-alias.h>
32 #include <tininess.h>
33 #include <math-use-builtins.h>
34 
35 /* This implementation uses rounding to odd to avoid problems with
36    double rounding.  See a paper by Boldo and Melquiond:
37    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
38 
39 double
__fma(double x,double y,double z)40 __fma (double x, double y, double z)
41 {
42 #if USE_FMA_BUILTIN
43   return __builtin_fma (x, y, z);
44 #else
45   /* Use generic implementation.  */
46   union ieee754_double u, v, w;
47   int adjust = 0;
48   u.d = x;
49   v.d = y;
50   w.d = z;
51   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
52 			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
53       || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
54       || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
55       || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
56       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
57 			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
58     {
59       /* If z is Inf, but x and y are finite, the result should be
60 	 z rather than NaN.  */
61       if (w.ieee.exponent == 0x7ff
62 	  && u.ieee.exponent != 0x7ff
63 	  && v.ieee.exponent != 0x7ff)
64 	return (z + x) + y;
65       /* If z is zero and x are y are nonzero, compute the result
66 	 as x * y to avoid the wrong sign of a zero result if x * y
67 	 underflows to 0.  */
68       if (z == 0 && x != 0 && y != 0)
69 	return x * y;
70       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
71 	 x * y + z.  */
72       if (u.ieee.exponent == 0x7ff
73 	  || v.ieee.exponent == 0x7ff
74 	  || w.ieee.exponent == 0x7ff
75 	  || x == 0
76 	  || y == 0)
77 	return x * y + z;
78       /* If fma will certainly overflow, compute as x * y.  */
79       if (u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS)
80 	return x * y;
81       /* If x * y is less than 1/4 of DBL_TRUE_MIN, neither the
82 	 result nor whether there is underflow depends on its exact
83 	 value, only on its sign.  */
84       if (u.ieee.exponent + v.ieee.exponent
85 	  < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
86 	{
87 	  int neg = u.ieee.negative ^ v.ieee.negative;
88 	  double tiny = neg ? -0x1p-1074 : 0x1p-1074;
89 	  if (w.ieee.exponent >= 3)
90 	    return tiny + z;
91 	  /* Scaling up, adding TINY and scaling down produces the
92 	     correct result, because in round-to-nearest mode adding
93 	     TINY has no effect and in other modes double rounding is
94 	     harmless.  But it may not produce required underflow
95 	     exceptions.  */
96 	  v.d = z * 0x1p54 + tiny;
97 	  if (TININESS_AFTER_ROUNDING
98 	      ? v.ieee.exponent < 55
99 	      : (w.ieee.exponent == 0
100 		 || (w.ieee.exponent == 1
101 		     && w.ieee.negative != neg
102 		     && w.ieee.mantissa1 == 0
103 		     && w.ieee.mantissa0 == 0)))
104 	    {
105 	      double force_underflow = x * y;
106 	      math_force_eval (force_underflow);
107 	    }
108 	  return v.d * 0x1p-54;
109 	}
110       if (u.ieee.exponent + v.ieee.exponent
111 	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
112 	{
113 	  /* Compute 1p-53 times smaller result and multiply
114 	     at the end.  */
115 	  if (u.ieee.exponent > v.ieee.exponent)
116 	    u.ieee.exponent -= DBL_MANT_DIG;
117 	  else
118 	    v.ieee.exponent -= DBL_MANT_DIG;
119 	  /* If x + y exponent is very large and z exponent is very small,
120 	     it doesn't matter if we don't adjust it.  */
121 	  if (w.ieee.exponent > DBL_MANT_DIG)
122 	    w.ieee.exponent -= DBL_MANT_DIG;
123 	  adjust = 1;
124 	}
125       else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
126 	{
127 	  /* Similarly.
128 	     If z exponent is very large and x and y exponents are
129 	     very small, adjust them up to avoid spurious underflows,
130 	     rather than down.  */
131 	  if (u.ieee.exponent + v.ieee.exponent
132 	      <= IEEE754_DOUBLE_BIAS + 2 * DBL_MANT_DIG)
133 	    {
134 	      if (u.ieee.exponent > v.ieee.exponent)
135 		u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
136 	      else
137 		v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
138 	    }
139 	  else if (u.ieee.exponent > v.ieee.exponent)
140 	    {
141 	      if (u.ieee.exponent > DBL_MANT_DIG)
142 		u.ieee.exponent -= DBL_MANT_DIG;
143 	    }
144 	  else if (v.ieee.exponent > DBL_MANT_DIG)
145 	    v.ieee.exponent -= DBL_MANT_DIG;
146 	  w.ieee.exponent -= DBL_MANT_DIG;
147 	  adjust = 1;
148 	}
149       else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
150 	{
151 	  u.ieee.exponent -= DBL_MANT_DIG;
152 	  if (v.ieee.exponent)
153 	    v.ieee.exponent += DBL_MANT_DIG;
154 	  else
155 	    v.d *= 0x1p53;
156 	}
157       else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
158 	{
159 	  v.ieee.exponent -= DBL_MANT_DIG;
160 	  if (u.ieee.exponent)
161 	    u.ieee.exponent += DBL_MANT_DIG;
162 	  else
163 	    u.d *= 0x1p53;
164 	}
165       else /* if (u.ieee.exponent + v.ieee.exponent
166 		  <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
167 	{
168 	  if (u.ieee.exponent > v.ieee.exponent)
169 	    u.ieee.exponent += 2 * DBL_MANT_DIG + 2;
170 	  else
171 	    v.ieee.exponent += 2 * DBL_MANT_DIG + 2;
172 	  if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6)
173 	    {
174 	      if (w.ieee.exponent)
175 		w.ieee.exponent += 2 * DBL_MANT_DIG + 2;
176 	      else
177 		w.d *= 0x1p108;
178 	      adjust = -1;
179 	    }
180 	  /* Otherwise x * y should just affect inexact
181 	     and nothing else.  */
182 	}
183       x = u.d;
184       y = v.d;
185       z = w.d;
186     }
187 
188   /* Ensure correct sign of exact 0 + 0.  */
189   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
190     {
191       x = math_opt_barrier (x);
192       return x * y + z;
193     }
194 
195   fenv_t env;
196   libc_feholdexcept_setround (&env, FE_TONEAREST);
197 
198   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
199 #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
200   double x1 = x * C;
201   double y1 = y * C;
202   double m1 = x * y;
203   x1 = (x - x1) + x1;
204   y1 = (y - y1) + y1;
205   double x2 = x - x1;
206   double y2 = y - y1;
207   double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
208 
209   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
210   double a1 = z + m1;
211   double t1 = a1 - z;
212   double t2 = a1 - t1;
213   t1 = m1 - t1;
214   t2 = z - t2;
215   double a2 = t1 + t2;
216   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
217   math_force_eval (m2);
218   math_force_eval (a2);
219   feclearexcept (FE_INEXACT);
220 
221   /* If the result is an exact zero, ensure it has the correct sign.  */
222   if (a1 == 0 && m2 == 0)
223     {
224       libc_feupdateenv (&env);
225       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
226       z = math_opt_barrier (z);
227       return z + m1;
228     }
229 
230   libc_fesetround (FE_TOWARDZERO);
231 
232   /* Perform m2 + a2 addition with round to odd.  */
233   u.d = a2 + m2;
234 
235   if (__glibc_unlikely (adjust < 0))
236     {
237       if ((u.ieee.mantissa1 & 1) == 0)
238 	u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
239       v.d = a1 + u.d;
240       /* Ensure the addition is not scheduled after fetestexcept call.  */
241       math_force_eval (v.d);
242     }
243 
244   /* Reset rounding mode and test for inexact simultaneously.  */
245   int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
246 
247   if (__glibc_likely (adjust == 0))
248     {
249       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
250 	u.ieee.mantissa1 |= j;
251       /* Result is a1 + u.d.  */
252       return a1 + u.d;
253     }
254   else if (__glibc_likely (adjust > 0))
255     {
256       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
257 	u.ieee.mantissa1 |= j;
258       /* Result is a1 + u.d, scaled up.  */
259       return (a1 + u.d) * 0x1p53;
260     }
261   else
262     {
263       /* If a1 + u.d is exact, the only rounding happens during
264 	 scaling down.  */
265       if (j == 0)
266 	return v.d * 0x1p-108;
267       /* If result rounded to zero is not subnormal, no double
268 	 rounding will occur.  */
269       if (v.ieee.exponent > 108)
270 	return (a1 + u.d) * 0x1p-108;
271       /* If v.d * 0x1p-108 with round to zero is a subnormal above
272 	 or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa
273 	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
274 	 change the round bit, not sticky or guard bit.
275 	 v.d * 0x1p-108 never normalizes by shifting up,
276 	 so round bit plus sticky bit should be already enough
277 	 for proper rounding.  */
278       if (v.ieee.exponent == 108)
279 	{
280 	  /* If the exponent would be in the normal range when
281 	     rounding to normal precision with unbounded exponent
282 	     range, the exact result is known and spurious underflows
283 	     must be avoided on systems detecting tininess after
284 	     rounding.  */
285 	  if (TININESS_AFTER_ROUNDING)
286 	    {
287 	      w.d = a1 + u.d;
288 	      if (w.ieee.exponent == 109)
289 		return w.d * 0x1p-108;
290 	    }
291 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
292 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
293 	     bit.  */
294 	  w.d = 0.0;
295 	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
296 	  w.ieee.negative = v.ieee.negative;
297 	  v.ieee.mantissa1 &= ~3U;
298 	  v.d *= 0x1p-108;
299 	  w.d *= 0x1p-2;
300 	  return v.d + w.d;
301 	}
302       v.ieee.mantissa1 |= j;
303       return v.d * 0x1p-108;
304     }
305 #endif /* ! USE_FMA_BUILTIN  */
306 }
307 #ifndef __fma
308 libm_alias_double (__fma, fma)
309 libm_alias_double_narrow (__fma, fma)
310 #endif
311