1 /* Compute x * y + z as ternary operation.
2    Copyright (C) 2010-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #define NO_MATH_REDIRECT
20 #include <float.h>
21 #include <math.h>
22 #include <fenv.h>
23 #include <ieee754.h>
24 #include <math-barriers.h>
25 #include <libm-alias-ldouble.h>
26 #include <tininess.h>
27 
28 /* This implementation uses rounding to odd to avoid problems with
29    double rounding.  See a paper by Boldo and Melquiond:
30    http://www.lri.fr/~melquion/doc/08-tc.pdf  */
31 
32 long double
__fmal(long double x,long double y,long double z)33 __fmal (long double x, long double y, long double z)
34 {
35   union ieee854_long_double u, v, w;
36   int adjust = 0;
37   u.d = x;
38   v.d = y;
39   w.d = z;
40   if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
41 			>= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
42 			   - LDBL_MANT_DIG, 0)
43       || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
44       || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
45       || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
46       || __builtin_expect (u.ieee.exponent + v.ieee.exponent
47 			   <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
48     {
49       /* If z is Inf, but x and y are finite, the result should be
50 	 z rather than NaN.  */
51       if (w.ieee.exponent == 0x7fff
52 	  && u.ieee.exponent != 0x7fff
53           && v.ieee.exponent != 0x7fff)
54 	return (z + x) + y;
55       /* If z is zero and x are y are nonzero, compute the result
56 	 as x * y to avoid the wrong sign of a zero result if x * y
57 	 underflows to 0.  */
58       if (z == 0 && x != 0 && y != 0)
59 	return x * y;
60       /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
61 	 x * y + z.  */
62       if (u.ieee.exponent == 0x7fff
63 	  || v.ieee.exponent == 0x7fff
64 	  || w.ieee.exponent == 0x7fff
65 	  || x == 0
66 	  || y == 0)
67 	return x * y + z;
68       /* If fma will certainly overflow, compute as x * y.  */
69       if (u.ieee.exponent + v.ieee.exponent
70 	  > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
71 	return x * y;
72       /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
73 	 result nor whether there is underflow depends on its exact
74 	 value, only on its sign.  */
75       if (u.ieee.exponent + v.ieee.exponent
76 	  < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
77 	{
78 	  int neg = u.ieee.negative ^ v.ieee.negative;
79 	  long double tiny = neg ? -0x1p-16445L : 0x1p-16445L;
80 	  if (w.ieee.exponent >= 3)
81 	    return tiny + z;
82 	  /* Scaling up, adding TINY and scaling down produces the
83 	     correct result, because in round-to-nearest mode adding
84 	     TINY has no effect and in other modes double rounding is
85 	     harmless.  But it may not produce required underflow
86 	     exceptions.  */
87 	  v.d = z * 0x1p65L + tiny;
88 	  if (TININESS_AFTER_ROUNDING
89 	      ? v.ieee.exponent < 66
90 	      : (w.ieee.exponent == 0
91 		 || (w.ieee.exponent == 1
92 		     && w.ieee.negative != neg
93 		     && w.ieee.mantissa1 == 0
94 		     && w.ieee.mantissa0 == 0x80000000)))
95 	    {
96 	      long double force_underflow = x * y;
97 	      math_force_eval (force_underflow);
98 	    }
99 	  return v.d * 0x1p-65L;
100 	}
101       if (u.ieee.exponent + v.ieee.exponent
102 	  >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
103 	{
104 	  /* Compute 1p-64 times smaller result and multiply
105 	     at the end.  */
106 	  if (u.ieee.exponent > v.ieee.exponent)
107 	    u.ieee.exponent -= LDBL_MANT_DIG;
108 	  else
109 	    v.ieee.exponent -= LDBL_MANT_DIG;
110 	  /* If x + y exponent is very large and z exponent is very small,
111 	     it doesn't matter if we don't adjust it.  */
112 	  if (w.ieee.exponent > LDBL_MANT_DIG)
113 	    w.ieee.exponent -= LDBL_MANT_DIG;
114 	  adjust = 1;
115 	}
116       else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
117 	{
118 	  /* Similarly.
119 	     If z exponent is very large and x and y exponents are
120 	     very small, adjust them up to avoid spurious underflows,
121 	     rather than down.  */
122 	  if (u.ieee.exponent + v.ieee.exponent
123 	      <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
124 	    {
125 	      if (u.ieee.exponent > v.ieee.exponent)
126 		u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
127 	      else
128 		v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
129 	    }
130 	  else if (u.ieee.exponent > v.ieee.exponent)
131 	    {
132 	      if (u.ieee.exponent > LDBL_MANT_DIG)
133 		u.ieee.exponent -= LDBL_MANT_DIG;
134 	    }
135 	  else if (v.ieee.exponent > LDBL_MANT_DIG)
136 	    v.ieee.exponent -= LDBL_MANT_DIG;
137 	  w.ieee.exponent -= LDBL_MANT_DIG;
138 	  adjust = 1;
139 	}
140       else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
141 	{
142 	  u.ieee.exponent -= LDBL_MANT_DIG;
143 	  if (v.ieee.exponent)
144 	    v.ieee.exponent += LDBL_MANT_DIG;
145 	  else
146 	    v.d *= 0x1p64L;
147 	}
148       else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
149 	{
150 	  v.ieee.exponent -= LDBL_MANT_DIG;
151 	  if (u.ieee.exponent)
152 	    u.ieee.exponent += LDBL_MANT_DIG;
153 	  else
154 	    u.d *= 0x1p64L;
155 	}
156       else /* if (u.ieee.exponent + v.ieee.exponent
157 		  <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
158 	{
159 	  if (u.ieee.exponent > v.ieee.exponent)
160 	    u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
161 	  else
162 	    v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
163 	  if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
164 	    {
165 	      if (w.ieee.exponent)
166 		w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
167 	      else
168 		w.d *= 0x1p130L;
169 	      adjust = -1;
170 	    }
171 	  /* Otherwise x * y should just affect inexact
172 	     and nothing else.  */
173 	}
174       x = u.d;
175       y = v.d;
176       z = w.d;
177     }
178 
179   /* Ensure correct sign of exact 0 + 0.  */
180   if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
181     {
182       x = math_opt_barrier (x);
183       return x * y + z;
184     }
185 
186   fenv_t env;
187   feholdexcept (&env);
188   fesetround (FE_TONEAREST);
189 
190   /* Multiplication m1 + m2 = x * y using Dekker's algorithm.  */
191 #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
192   long double x1 = x * C;
193   long double y1 = y * C;
194   long double m1 = x * y;
195   x1 = (x - x1) + x1;
196   y1 = (y - y1) + y1;
197   long double x2 = x - x1;
198   long double y2 = y - y1;
199   long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
200 
201   /* Addition a1 + a2 = z + m1 using Knuth's algorithm.  */
202   long double a1 = z + m1;
203   long double t1 = a1 - z;
204   long double t2 = a1 - t1;
205   t1 = m1 - t1;
206   t2 = z - t2;
207   long double a2 = t1 + t2;
208   /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
209   math_force_eval (m2);
210   math_force_eval (a2);
211   feclearexcept (FE_INEXACT);
212 
213   /* If the result is an exact zero, ensure it has the correct sign.  */
214   if (a1 == 0 && m2 == 0)
215     {
216       feupdateenv (&env);
217       /* Ensure that round-to-nearest value of z + m1 is not reused.  */
218       z = math_opt_barrier (z);
219       return z + m1;
220     }
221 
222   fesetround (FE_TOWARDZERO);
223   /* Perform m2 + a2 addition with round to odd.  */
224   u.d = a2 + m2;
225 
226   if (__glibc_likely (adjust == 0))
227     {
228       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
229 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
230       feupdateenv (&env);
231       /* Result is a1 + u.d.  */
232       return a1 + u.d;
233     }
234   else if (__glibc_likely (adjust > 0))
235     {
236       if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
237 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
238       feupdateenv (&env);
239       /* Result is a1 + u.d, scaled up.  */
240       return (a1 + u.d) * 0x1p64L;
241     }
242   else
243     {
244       if ((u.ieee.mantissa1 & 1) == 0)
245 	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
246       v.d = a1 + u.d;
247       /* Ensure the addition is not scheduled after fetestexcept call.  */
248       math_force_eval (v.d);
249       int j = fetestexcept (FE_INEXACT) != 0;
250       feupdateenv (&env);
251       /* Ensure the following computations are performed in default rounding
252 	 mode instead of just reusing the round to zero computation.  */
253       asm volatile ("" : "=m" (u) : "m" (u));
254       /* If a1 + u.d is exact, the only rounding happens during
255 	 scaling down.  */
256       if (j == 0)
257 	return v.d * 0x1p-130L;
258       /* If result rounded to zero is not subnormal, no double
259 	 rounding will occur.  */
260       if (v.ieee.exponent > 130)
261 	return (a1 + u.d) * 0x1p-130L;
262       /* If v.d * 0x1p-130L with round to zero is a subnormal above
263 	 or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa
264 	 down just by 1 bit, which means v.ieee.mantissa1 |= j would
265 	 change the round bit, not sticky or guard bit.
266 	 v.d * 0x1p-130L never normalizes by shifting up,
267 	 so round bit plus sticky bit should be already enough
268 	 for proper rounding.  */
269       if (v.ieee.exponent == 130)
270 	{
271 	  /* If the exponent would be in the normal range when
272 	     rounding to normal precision with unbounded exponent
273 	     range, the exact result is known and spurious underflows
274 	     must be avoided on systems detecting tininess after
275 	     rounding.  */
276 	  if (TININESS_AFTER_ROUNDING)
277 	    {
278 	      w.d = a1 + u.d;
279 	      if (w.ieee.exponent == 131)
280 		return w.d * 0x1p-130L;
281 	    }
282 	  /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
283 	     v.ieee.mantissa1 & 1 is the round bit and j is our sticky
284 	     bit.  */
285 	  w.d = 0.0L;
286 	  w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
287 	  w.ieee.negative = v.ieee.negative;
288 	  v.ieee.mantissa1 &= ~3U;
289 	  v.d *= 0x1p-130L;
290 	  w.d *= 0x1p-2L;
291 	  return v.d + w.d;
292 	}
293       v.ieee.mantissa1 |= j;
294       return v.d * 0x1p-130L;
295     }
296 }
297 libm_alias_ldouble (__fma, fma)
298