1 /* Software floating-point emulation.
2    Definitions for IEEE Extended Precision.
3    Copyright (C) 1999-2022 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10 
11    In addition to the permissions in the GNU Lesser General Public
12    License, the Free Software Foundation gives you unlimited
13    permission to link the compiled version of this file into
14    combinations with other programs, and to distribute those
15    combinations without any restriction coming from the use of this
16    file.  (The Lesser General Public License restrictions do apply in
17    other respects; for example, they cover modification of the file,
18    and distribution when not linked into a combine executable.)
19 
20    The GNU C Library is distributed in the hope that it will be useful,
21    but WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    Lesser General Public License for more details.
24 
25    You should have received a copy of the GNU Lesser General Public
26    License along with the GNU C Library; if not, see
27    <https://www.gnu.org/licenses/>.  */
28 
29 #ifndef SOFT_FP_EXTENDED_H
30 #define SOFT_FP_EXTENDED_H	1
31 
32 #if _FP_W_TYPE_SIZE < 32
33 # error "Here's a nickel, kid. Go buy yourself a real computer."
34 #endif
35 
36 #if _FP_W_TYPE_SIZE < 64
37 # define _FP_FRACTBITS_E	(4*_FP_W_TYPE_SIZE)
38 # define _FP_FRACTBITS_DW_E	(8*_FP_W_TYPE_SIZE)
39 #else
40 # define _FP_FRACTBITS_E	(2*_FP_W_TYPE_SIZE)
41 # define _FP_FRACTBITS_DW_E	(4*_FP_W_TYPE_SIZE)
42 #endif
43 
44 #define _FP_FRACBITS_E		64
45 #define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
46 #define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
47 #define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
48 #define _FP_EXPBITS_E		15
49 #define _FP_EXPBIAS_E		16383
50 #define _FP_EXPMAX_E		32767
51 
52 #define _FP_QNANBIT_E		\
53 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
54 #define _FP_QNANBIT_SH_E		\
55 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
56 #define _FP_IMPLBIT_E		\
57 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
58 #define _FP_IMPLBIT_SH_E		\
59 	((_FP_W_TYPE) 1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
60 #define _FP_OVERFLOW_E		\
61 	((_FP_W_TYPE) 1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
62 
63 #define _FP_WFRACBITS_DW_E	(2 * _FP_WFRACBITS_E)
64 #define _FP_WFRACXBITS_DW_E	(_FP_FRACTBITS_DW_E - _FP_WFRACBITS_DW_E)
65 #define _FP_HIGHBIT_DW_E	\
66   ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_E - 1) % _FP_W_TYPE_SIZE)
67 
68 typedef float XFtype __attribute__ ((mode (XF)));
69 
70 #if _FP_W_TYPE_SIZE < 64
71 
72 union _FP_UNION_E
73 {
74   XFtype flt;
75   struct _FP_STRUCT_LAYOUT
76   {
77 # if __BYTE_ORDER == __BIG_ENDIAN
78     unsigned long pad1 : _FP_W_TYPE_SIZE;
79     unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
80     unsigned long sign : 1;
81     unsigned long exp : _FP_EXPBITS_E;
82     unsigned long frac1 : _FP_W_TYPE_SIZE;
83     unsigned long frac0 : _FP_W_TYPE_SIZE;
84 # else
85     unsigned long frac0 : _FP_W_TYPE_SIZE;
86     unsigned long frac1 : _FP_W_TYPE_SIZE;
87     unsigned exp : _FP_EXPBITS_E;
88     unsigned sign : 1;
89 # endif /* not bigendian */
90   } bits;
91 };
92 
93 
94 # define FP_DECL_E(X)		_FP_DECL (4, X)
95 
96 # define FP_UNPACK_RAW_E(X, val)			\
97   do							\
98     {							\
99       union _FP_UNION_E FP_UNPACK_RAW_E_flo;		\
100       FP_UNPACK_RAW_E_flo.flt = (val);			\
101 							\
102       X##_f[2] = 0;					\
103       X##_f[3] = 0;					\
104       X##_f[0] = FP_UNPACK_RAW_E_flo.bits.frac0;	\
105       X##_f[1] = FP_UNPACK_RAW_E_flo.bits.frac1;	\
106       X##_f[1] &= ~_FP_IMPLBIT_E;			\
107       X##_e  = FP_UNPACK_RAW_E_flo.bits.exp;		\
108       X##_s  = FP_UNPACK_RAW_E_flo.bits.sign;		\
109     }							\
110   while (0)
111 
112 # define FP_UNPACK_RAW_EP(X, val)			\
113   do							\
114     {							\
115       union _FP_UNION_E *FP_UNPACK_RAW_EP_flo		\
116 	= (union _FP_UNION_E *) (val);			\
117 							\
118       X##_f[2] = 0;					\
119       X##_f[3] = 0;					\
120       X##_f[0] = FP_UNPACK_RAW_EP_flo->bits.frac0;	\
121       X##_f[1] = FP_UNPACK_RAW_EP_flo->bits.frac1;	\
122       X##_f[1] &= ~_FP_IMPLBIT_E;			\
123       X##_e  = FP_UNPACK_RAW_EP_flo->bits.exp;		\
124       X##_s  = FP_UNPACK_RAW_EP_flo->bits.sign;		\
125     }							\
126   while (0)
127 
128 # define FP_PACK_RAW_E(val, X)			\
129   do						\
130     {						\
131       union _FP_UNION_E FP_PACK_RAW_E_flo;	\
132 						\
133       if (X##_e)				\
134 	X##_f[1] |= _FP_IMPLBIT_E;		\
135       else					\
136 	X##_f[1] &= ~(_FP_IMPLBIT_E);		\
137       FP_PACK_RAW_E_flo.bits.frac0 = X##_f[0];	\
138       FP_PACK_RAW_E_flo.bits.frac1 = X##_f[1];	\
139       FP_PACK_RAW_E_flo.bits.exp   = X##_e;	\
140       FP_PACK_RAW_E_flo.bits.sign  = X##_s;	\
141 						\
142       (val) = FP_PACK_RAW_E_flo.flt;		\
143     }						\
144   while (0)
145 
146 # define FP_PACK_RAW_EP(val, X)				\
147   do							\
148     {							\
149       if (!FP_INHIBIT_RESULTS)				\
150 	{						\
151 	  union _FP_UNION_E *FP_PACK_RAW_EP_flo		\
152 	    = (union _FP_UNION_E *) (val);		\
153 							\
154 	  if (X##_e)					\
155 	    X##_f[1] |= _FP_IMPLBIT_E;			\
156 	  else						\
157 	    X##_f[1] &= ~(_FP_IMPLBIT_E);		\
158 	  FP_PACK_RAW_EP_flo->bits.frac0 = X##_f[0];	\
159 	  FP_PACK_RAW_EP_flo->bits.frac1 = X##_f[1];	\
160 	  FP_PACK_RAW_EP_flo->bits.exp   = X##_e;	\
161 	  FP_PACK_RAW_EP_flo->bits.sign  = X##_s;	\
162 	}						\
163     }							\
164   while (0)
165 
166 # define FP_UNPACK_E(X, val)			\
167   do						\
168     {						\
169       FP_UNPACK_RAW_E (X, (val));		\
170       _FP_UNPACK_CANONICAL (E, 4, X);		\
171     }						\
172   while (0)
173 
174 # define FP_UNPACK_EP(X, val)			\
175   do						\
176     {						\
177       FP_UNPACK_RAW_EP (X, (val));		\
178       _FP_UNPACK_CANONICAL (E, 4, X);		\
179     }						\
180   while (0)
181 
182 # define FP_UNPACK_SEMIRAW_E(X, val)		\
183   do						\
184     {						\
185       FP_UNPACK_RAW_E (X, (val));		\
186       _FP_UNPACK_SEMIRAW (E, 4, X);		\
187     }						\
188   while (0)
189 
190 # define FP_UNPACK_SEMIRAW_EP(X, val)		\
191   do						\
192     {						\
193       FP_UNPACK_RAW_EP (X, (val));		\
194       _FP_UNPACK_SEMIRAW (E, 4, X);		\
195     }						\
196   while (0)
197 
198 # define FP_PACK_E(val, X)			\
199   do						\
200     {						\
201       _FP_PACK_CANONICAL (E, 4, X);		\
202       FP_PACK_RAW_E ((val), X);			\
203     }						\
204   while (0)
205 
206 # define FP_PACK_EP(val, X)			\
207   do						\
208     {						\
209       _FP_PACK_CANONICAL (E, 4, X);		\
210       FP_PACK_RAW_EP ((val), X);		\
211     }						\
212   while (0)
213 
214 # define FP_PACK_SEMIRAW_E(val, X)		\
215   do						\
216     {						\
217       _FP_PACK_SEMIRAW (E, 4, X);		\
218       FP_PACK_RAW_E ((val), X);			\
219     }						\
220   while (0)
221 
222 # define FP_PACK_SEMIRAW_EP(val, X)		\
223   do						\
224     {						\
225       _FP_PACK_SEMIRAW (E, 4, X);		\
226       FP_PACK_RAW_EP ((val), X);		\
227     }						\
228   while (0)
229 
230 # define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN (E, 4, X)
231 # define FP_NEG_E(R, X)		_FP_NEG (E, 4, R, X)
232 # define FP_ADD_E(R, X, Y)	_FP_ADD (E, 4, R, X, Y)
233 # define FP_SUB_E(R, X, Y)	_FP_SUB (E, 4, R, X, Y)
234 # define FP_MUL_E(R, X, Y)	_FP_MUL (E, 4, R, X, Y)
235 # define FP_DIV_E(R, X, Y)	_FP_DIV (E, 4, R, X, Y)
236 # define FP_SQRT_E(R, X)	_FP_SQRT (E, 4, R, X)
237 # define FP_FMA_E(R, X, Y, Z)	_FP_FMA (E, 4, 8, R, X, Y, Z)
238 
239 /* Square root algorithms:
240    We have just one right now, maybe Newton approximation
241    should be added for those machines where division is fast.
242    This has special _E version because standard _4 square
243    root would not work (it has to start normally with the
244    second word and not the first), but as we have to do it
245    anyway, we optimize it by doing most of the calculations
246    in two UWtype registers instead of four.  */
247 
248 # define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
249   do							\
250     {							\
251       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
252       _FP_FRAC_SRL_4 (X, (_FP_WORKBITS));		\
253       while (q)						\
254 	{						\
255 	  T##_f[1] = S##_f[1] + (q);			\
256 	  if (T##_f[1] <= X##_f[1])			\
257 	    {						\
258 	      S##_f[1] = T##_f[1] + (q);		\
259 	      X##_f[1] -= T##_f[1];			\
260 	      R##_f[1] += (q);				\
261 	    }						\
262 	  _FP_FRAC_SLL_2 (X, 1);			\
263 	  (q) >>= 1;					\
264 	}						\
265       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
266       while (q)						\
267 	{						\
268 	  T##_f[0] = S##_f[0] + (q);			\
269 	  T##_f[1] = S##_f[1];				\
270 	  if (T##_f[1] < X##_f[1]			\
271 	      || (T##_f[1] == X##_f[1]			\
272 		  && T##_f[0] <= X##_f[0]))		\
273 	    {						\
274 	      S##_f[0] = T##_f[0] + (q);		\
275 	      S##_f[1] += (T##_f[0] > S##_f[0]);	\
276 	      _FP_FRAC_DEC_2 (X, T);			\
277 	      R##_f[0] += (q);				\
278 	    }						\
279 	  _FP_FRAC_SLL_2 (X, 1);			\
280 	  (q) >>= 1;					\
281 	}						\
282       _FP_FRAC_SLL_4 (R, (_FP_WORKBITS));		\
283       if (X##_f[0] | X##_f[1])				\
284 	{						\
285 	  if (S##_f[1] < X##_f[1]			\
286 	      || (S##_f[1] == X##_f[1]			\
287 		  && S##_f[0] < X##_f[0]))		\
288 	    R##_f[0] |= _FP_WORK_ROUND;			\
289 	  R##_f[0] |= _FP_WORK_STICKY;			\
290 	}						\
291     }							\
292   while (0)
293 
294 # define FP_CMP_E(r, X, Y, un, ex)	_FP_CMP (E, 4, (r), X, Y, (un), (ex))
295 # define FP_CMP_EQ_E(r, X, Y, ex)	_FP_CMP_EQ (E, 4, (r), X, Y, (ex))
296 # define FP_CMP_UNORD_E(r, X, Y, ex)	_FP_CMP_UNORD (E, 4, (r), X, Y, (ex))
297 
298 # define FP_TO_INT_E(r, X, rsz, rsg)	_FP_TO_INT (E, 4, (r), X, (rsz), (rsg))
299 # define FP_TO_INT_ROUND_E(r, X, rsz, rsg)	\
300   _FP_TO_INT_ROUND (E, 4, (r), X, (rsz), (rsg))
301 # define FP_FROM_INT_E(X, r, rs, rt)	_FP_FROM_INT (E, 4, X, (r), (rs), rt)
302 
303 # define _FP_FRAC_HIGH_E(X)	(X##_f[2])
304 # define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
305 
306 # define _FP_FRAC_HIGH_DW_E(X)	(X##_f[4])
307 
308 #else   /* not _FP_W_TYPE_SIZE < 64 */
309 union _FP_UNION_E
310 {
311   XFtype flt;
312   struct _FP_STRUCT_LAYOUT
313   {
314 # if __BYTE_ORDER == __BIG_ENDIAN
315     _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
316     unsigned sign   : 1;
317     unsigned exp    : _FP_EXPBITS_E;
318     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
319 # else
320     _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
321     unsigned exp    : _FP_EXPBITS_E;
322     unsigned sign   : 1;
323 # endif
324   } bits;
325 };
326 
327 # define FP_DECL_E(X)		_FP_DECL (2, X)
328 
329 # define FP_UNPACK_RAW_E(X, val)		\
330   do						\
331     {						\
332       union _FP_UNION_E FP_UNPACK_RAW_E_flo;	\
333       FP_UNPACK_RAW_E_flo.flt = (val);		\
334 						\
335       X##_f0 = FP_UNPACK_RAW_E_flo.bits.frac;	\
336       X##_f0 &= ~_FP_IMPLBIT_E;			\
337       X##_f1 = 0;				\
338       X##_e = FP_UNPACK_RAW_E_flo.bits.exp;	\
339       X##_s = FP_UNPACK_RAW_E_flo.bits.sign;	\
340     }						\
341   while (0)
342 
343 # define FP_UNPACK_RAW_EP(X, val)		\
344   do						\
345     {						\
346       union _FP_UNION_E *FP_UNPACK_RAW_EP_flo	\
347 	= (union _FP_UNION_E *) (val);		\
348 						\
349       X##_f0 = FP_UNPACK_RAW_EP_flo->bits.frac;	\
350       X##_f0 &= ~_FP_IMPLBIT_E;			\
351       X##_f1 = 0;				\
352       X##_e = FP_UNPACK_RAW_EP_flo->bits.exp;	\
353       X##_s = FP_UNPACK_RAW_EP_flo->bits.sign;	\
354     }						\
355   while (0)
356 
357 # define FP_PACK_RAW_E(val, X)			\
358   do						\
359     {						\
360       union _FP_UNION_E FP_PACK_RAW_E_flo;	\
361 						\
362       if (X##_e)				\
363 	X##_f0 |= _FP_IMPLBIT_E;		\
364       else					\
365 	X##_f0 &= ~(_FP_IMPLBIT_E);		\
366       FP_PACK_RAW_E_flo.bits.frac = X##_f0;	\
367       FP_PACK_RAW_E_flo.bits.exp  = X##_e;	\
368       FP_PACK_RAW_E_flo.bits.sign = X##_s;	\
369 						\
370       (val) = FP_PACK_RAW_E_flo.flt;		\
371     }						\
372   while (0)
373 
374 # define FP_PACK_RAW_EP(fs, val, X)			\
375   do							\
376     {							\
377       if (!FP_INHIBIT_RESULTS)				\
378 	{						\
379 	  union _FP_UNION_E *FP_PACK_RAW_EP_flo		\
380 	    = (union _FP_UNION_E *) (val);		\
381 							\
382 	  if (X##_e)					\
383 	    X##_f0 |= _FP_IMPLBIT_E;			\
384 	  else						\
385 	    X##_f0 &= ~(_FP_IMPLBIT_E);			\
386 	  FP_PACK_RAW_EP_flo->bits.frac = X##_f0;	\
387 	  FP_PACK_RAW_EP_flo->bits.exp  = X##_e;	\
388 	  FP_PACK_RAW_EP_flo->bits.sign = X##_s;	\
389 	}						\
390     }							\
391   while (0)
392 
393 
394 # define FP_UNPACK_E(X, val)			\
395   do						\
396     {						\
397       FP_UNPACK_RAW_E (X, (val));		\
398       _FP_UNPACK_CANONICAL (E, 2, X);		\
399     }						\
400   while (0)
401 
402 # define FP_UNPACK_EP(X, val)			\
403   do						\
404     {						\
405       FP_UNPACK_RAW_EP (X, (val));		\
406       _FP_UNPACK_CANONICAL (E, 2, X);		\
407     }						\
408   while (0)
409 
410 # define FP_UNPACK_SEMIRAW_E(X, val)		\
411   do						\
412     {						\
413       FP_UNPACK_RAW_E (X, (val));		\
414       _FP_UNPACK_SEMIRAW (E, 2, X);		\
415     }						\
416   while (0)
417 
418 # define FP_UNPACK_SEMIRAW_EP(X, val)		\
419   do						\
420     {						\
421       FP_UNPACK_RAW_EP (X, (val));		\
422       _FP_UNPACK_SEMIRAW (E, 2, X);		\
423     }						\
424   while (0)
425 
426 # define FP_PACK_E(val, X)			\
427   do						\
428     {						\
429       _FP_PACK_CANONICAL (E, 2, X);		\
430       FP_PACK_RAW_E ((val), X);			\
431     }						\
432   while (0)
433 
434 # define FP_PACK_EP(val, X)			\
435   do						\
436     {						\
437       _FP_PACK_CANONICAL (E, 2, X);		\
438       FP_PACK_RAW_EP ((val), X);		\
439     }						\
440   while (0)
441 
442 # define FP_PACK_SEMIRAW_E(val, X)		\
443   do						\
444     {						\
445       _FP_PACK_SEMIRAW (E, 2, X);		\
446       FP_PACK_RAW_E ((val), X);			\
447     }						\
448   while (0)
449 
450 # define FP_PACK_SEMIRAW_EP(val, X)		\
451   do						\
452     {						\
453       _FP_PACK_SEMIRAW (E, 2, X);		\
454       FP_PACK_RAW_EP ((val), X);		\
455     }						\
456   while (0)
457 
458 # define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN (E, 2, X)
459 # define FP_NEG_E(R, X)		_FP_NEG (E, 2, R, X)
460 # define FP_ADD_E(R, X, Y)	_FP_ADD (E, 2, R, X, Y)
461 # define FP_SUB_E(R, X, Y)	_FP_SUB (E, 2, R, X, Y)
462 # define FP_MUL_E(R, X, Y)	_FP_MUL (E, 2, R, X, Y)
463 # define FP_DIV_E(R, X, Y)	_FP_DIV (E, 2, R, X, Y)
464 # define FP_SQRT_E(R, X)	_FP_SQRT (E, 2, R, X)
465 # define FP_FMA_E(R, X, Y, Z)	_FP_FMA (E, 2, 4, R, X, Y, Z)
466 
467 /* Square root algorithms:
468    We have just one right now, maybe Newton approximation
469    should be added for those machines where division is fast.
470    We optimize it by doing most of the calculations
471    in one UWtype registers instead of two, although we don't
472    have to.  */
473 # define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
474   do							\
475     {							\
476       (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);	\
477       _FP_FRAC_SRL_2 (X, (_FP_WORKBITS));		\
478       while (q)						\
479 	{						\
480 	  T##_f0 = S##_f0 + (q);			\
481 	  if (T##_f0 <= X##_f0)				\
482 	    {						\
483 	      S##_f0 = T##_f0 + (q);			\
484 	      X##_f0 -= T##_f0;				\
485 	      R##_f0 += (q);				\
486 	    }						\
487 	  _FP_FRAC_SLL_1 (X, 1);			\
488 	  (q) >>= 1;					\
489 	}						\
490       _FP_FRAC_SLL_2 (R, (_FP_WORKBITS));		\
491       if (X##_f0)					\
492 	{						\
493 	  if (S##_f0 < X##_f0)				\
494 	    R##_f0 |= _FP_WORK_ROUND;			\
495 	  R##_f0 |= _FP_WORK_STICKY;			\
496 	}						\
497     }							\
498   while (0)
499 
500 # define FP_CMP_E(r, X, Y, un, ex)	_FP_CMP (E, 2, (r), X, Y, (un), (ex))
501 # define FP_CMP_EQ_E(r, X, Y, ex)	_FP_CMP_EQ (E, 2, (r), X, Y, (ex))
502 # define FP_CMP_UNORD_E(r, X, Y, ex)	_FP_CMP_UNORD (E, 2, (r), X, Y, (ex))
503 
504 # define FP_TO_INT_E(r, X, rsz, rsg)	_FP_TO_INT (E, 2, (r), X, (rsz), (rsg))
505 # define FP_TO_INT_ROUND_E(r, X, rsz, rsg)	\
506   _FP_TO_INT_ROUND (E, 2, (r), X, (rsz), (rsg))
507 # define FP_FROM_INT_E(X, r, rs, rt)	_FP_FROM_INT (E, 2, X, (r), (rs), rt)
508 
509 # define _FP_FRAC_HIGH_E(X)	(X##_f1)
510 # define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
511 
512 # define _FP_FRAC_HIGH_DW_E(X)	(X##_f[2])
513 
514 #endif /* not _FP_W_TYPE_SIZE < 64 */
515 
516 #endif /* !SOFT_FP_EXTENDED_H */
517