1 /*
2  * Basic two-word fraction declaration and manipulation.
3  */
4 
5 #define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0, X##_f1
6 #define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
7 #define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
8 #define _FP_FRAC_HIGH_2(X)	(X##_f1)
9 #define _FP_FRAC_LOW_2(X)	(X##_f0)
10 #define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
11 
12 #define _FP_FRAC_SLL_2(X,N)						\
13   do {									\
14     if ((N) < _FP_W_TYPE_SIZE)						\
15       {									\
16         if (__builtin_constant_p(N) && (N) == 1) 			\
17           {								\
18             X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);	\
19             X##_f0 += X##_f0;						\
20           }								\
21         else								\
22           {								\
23 	    X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N));	\
24 	    X##_f0 <<= (N);						\
25 	  }								\
26       }									\
27     else								\
28       {									\
29 	X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);			\
30 	X##_f0 = 0;							\
31       }									\
32   } while (0)
33 
34 #define _FP_FRAC_SRL_2(X,N)						\
35   do {									\
36     if ((N) < _FP_W_TYPE_SIZE)						\
37       {									\
38 	X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));	\
39 	X##_f1 >>= (N);							\
40       }									\
41     else								\
42       {									\
43 	X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);			\
44 	X##_f1 = 0;							\
45       }									\
46   } while (0)
47 
48 /* Right shift with sticky-lsb.  */
49 #define _FP_FRAC_SRS_2(X,N,sz)						\
50   do {									\
51     if ((N) < _FP_W_TYPE_SIZE)						\
52       {									\
53 	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |	\
54 		  (__builtin_constant_p(N) && (N) == 1			\
55 		   ? X##_f0 & 1						\
56 		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));	\
57 	X##_f1 >>= (N);							\
58       }									\
59     else								\
60       {									\
61 	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |			\
62 	          (((X##_f1 << (sz - (N))) | X##_f0) != 0));		\
63 	X##_f1 = 0;							\
64       }									\
65   } while (0)
66 
67 #define _FP_FRAC_ADDI_2(X,I) \
68   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
69 
70 #define _FP_FRAC_ADD_2(R,X,Y) \
71   __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
72 
73 #define _FP_FRAC_SUB_2(R,X,Y) \
74   __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
75 
76 #define _FP_FRAC_CLZ_2(R,X)	\
77   do {				\
78     if (X##_f1)			\
79       __FP_CLZ(R,X##_f1);	\
80     else 			\
81     {				\
82       __FP_CLZ(R,X##_f0);	\
83       R += _FP_W_TYPE_SIZE;	\
84     }				\
85   } while(0)
86 
87 /* Predicates */
88 #define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
89 #define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
90 #define _FP_FRAC_OVERP_2(fs,X)	(X##_f1 & _FP_OVERFLOW_##fs)
91 #define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
92 #define _FP_FRAC_GT_2(X, Y)	\
93   ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
94 #define _FP_FRAC_GE_2(X, Y)	\
95   ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
96 
97 #define _FP_ZEROFRAC_2		0, 0
98 #define _FP_MINFRAC_2		0, 1
99 
100 /*
101  * Internals
102  */
103 
104 #define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
105 
106 #define __FP_CLZ_2(R, xh, xl)	\
107   do {				\
108     if (xh)			\
109       __FP_CLZ(R,xl);		\
110     else 			\
111     {				\
112       __FP_CLZ(R,xl);		\
113       R += _FP_W_TYPE_SIZE;	\
114     }				\
115   } while(0)
116 
117 #if 0
118 
119 #ifndef __FP_FRAC_ADDI_2
120 #define __FP_FRAC_ADDI_2(xh, xl, i) \
121   (xh += ((xl += i) < i))
122 #endif
123 #ifndef __FP_FRAC_ADD_2
124 #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
125   (rh = xh + yh + ((rl = xl + yl) < xl))
126 #endif
127 #ifndef __FP_FRAC_SUB_2
128 #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
129   (rh = xh - yh - ((rl = xl - yl) > xl))
130 #endif
131 
132 #else
133 
134 #undef __FP_FRAC_ADDI_2
135 #define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
136 #undef __FP_FRAC_ADD_2
137 #define __FP_FRAC_ADD_2			add_ssaaaa
138 #undef __FP_FRAC_SUB_2
139 #define __FP_FRAC_SUB_2			sub_ddmmss
140 
141 #endif
142 
143 /*
144  * Unpack the raw bits of a native fp value.  Do not classify or
145  * normalize the data.
146  */
147 
148 #define _FP_UNPACK_RAW_2(fs, X, val)			\
149   do {							\
150     union _FP_UNION_##fs _flo; _flo.flt = (val);	\
151 							\
152     X##_f0 = _flo.bits.frac0;				\
153     X##_f1 = _flo.bits.frac1;				\
154     X##_e  = _flo.bits.exp;				\
155     X##_s  = _flo.bits.sign;				\
156   } while (0)
157 
158 
159 /*
160  * Repack the raw bits of a native fp value.
161  */
162 
163 #define _FP_PACK_RAW_2(fs, val, X)			\
164   do {							\
165     union _FP_UNION_##fs _flo;				\
166 							\
167     _flo.bits.frac0 = X##_f0;				\
168     _flo.bits.frac1 = X##_f1;				\
169     _flo.bits.exp   = X##_e;				\
170     _flo.bits.sign  = X##_s;				\
171 							\
172     (val) = _flo.flt;					\
173   } while (0)
174 
175 
176 /*
177  * Multiplication algorithms:
178  */
179 
180 /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
181 
182 #define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit)				\
183   do {									\
184     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
185 									\
186     doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
187     doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
188     doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
189     doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
190 									\
191     __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
192 		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0),	\
193 		    0, _b_f1, _b_f0, 0,					\
194 		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
195 		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));	\
196     __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
197 		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0),	\
198 		    0, _c_f1, _c_f0, 0,					\
199 		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
200 		    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));	\
201 									\
202     /* Normalize since we know where the msb of the multiplicands	\
203        were (bit B), we know that the msb of the of the product is	\
204        at either 2B or 2B-1.  */					\
205     _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);	\
206     R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
207     R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
208   } while (0)
209 
210 /* This next macro appears to be totally broken. Fortunately nowhere
211  * seems to use it :-> The problem is that we define _z[4] but
212  * then use it in _FP_FRAC_SRS_4, which will attempt to access
213  * _z_f[n] which will cause an error. The fix probably involves
214  * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998
215  */
216 #define _FP_MUL_MEAT_2_gmp(fs, R, X, Y)					\
217   do {									\
218     _FP_W_TYPE _x[2], _y[2], _z[4];					\
219     _x[0] = X##_f0; _x[1] = X##_f1;					\
220     _y[0] = Y##_f0; _y[1] = Y##_f1;					\
221 									\
222     mpn_mul_n(_z, _x, _y, 2);						\
223 									\
224     /* Normalize since we know where the msb of the multiplicands	\
225        were (bit B), we know that the msb of the of the product is	\
226        at either 2B or 2B-1.  */					\
227     _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs);	\
228     R##_f0 = _z[0];							\
229     R##_f1 = _z[1];							\
230   } while (0)
231 
232 
233 /*
234  * Division algorithms:
235  * This seems to be giving me difficulties -- PMM
236  * Look, NetBSD seems to be able to comment algorithms. Can't you?
237  * I've thrown printks at the problem.
238  * This now appears to work, but I still don't really know why.
239  * Also, I don't think the result is properly normalised...
240  */
241 
242 #define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y)				\
243   do {									\
244     extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2],	\
245 			       _FP_W_TYPE n1, _FP_W_TYPE n0,		\
246 			       _FP_W_TYPE d1, _FP_W_TYPE d0);		\
247     _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0;		\
248     _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0;				\
249     _FP_W_TYPE _rmem[2], _qmem[2];					\
250     /* I think this check is to ensure that the result is normalised.   \
251      * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in         \
252      * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y.      \
253      * In this case we tweak things. (this is based on comments in      \
254      * the NetBSD FPU emulation code. )                                 \
255      * We know X,Y are normalised because we ensure this as part of     \
256      * the unpacking process. -- PMM                                    \
257      */									\
258     if (_FP_FRAC_GT_2(X, Y))						\
259       {									\
260 /*	R##_e++; */							\
261 	_n_f3 = X##_f1 >> 1;						\
262 	_n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
263 	_n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
264 	_n_f0 = 0;							\
265       }									\
266     else								\
267       {									\
268 	R##_e--;							\
269 	_n_f3 = X##_f1;							\
270 	_n_f2 = X##_f0;							\
271 	_n_f1 = _n_f0 = 0;						\
272       }									\
273 									\
274     /* Normalize, i.e. make the most significant bit of the 		\
275        denominator set.  CHANGED: - 1 to nothing -- PMM */		\
276     _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */);			\
277 									\
278     /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 	\
279        primitive snagged from libgcc2.c.  */				\
280 									\
281     _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1);		\
282     _q_f1 = _qmem[0];							\
283     umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0);				\
284     _r_f1 = _rmem[0];							\
285     _r_f0 = _n_f1;							\
286     if (_FP_FRAC_GT_2(_m, _r))						\
287       {									\
288 	_q_f1--;							\
289 	_FP_FRAC_ADD_2(_r, _r, Y);					\
290 	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
291 	  {								\
292 	    _q_f1--;							\
293 	    _FP_FRAC_ADD_2(_r, _r, Y);					\
294 	  }								\
295       }									\
296     _FP_FRAC_SUB_2(_r, _r, _m);						\
297 									\
298     _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1);		\
299     _q_f0 = _qmem[0];							\
300     umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0);				\
301     _r_f1 = _rmem[0];							\
302     _r_f0 = _n_f0;							\
303     if (_FP_FRAC_GT_2(_m, _r))						\
304       {									\
305 	_q_f0--;							\
306 	_FP_FRAC_ADD_2(_r, _r, Y);					\
307 	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
308 	  {								\
309 	    _q_f0--;							\
310 	    _FP_FRAC_ADD_2(_r, _r, Y);					\
311 	  }								\
312       }									\
313     _FP_FRAC_SUB_2(_r, _r, _m);						\
314 									\
315     R##_f1 = _q_f1;							\
316     R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0);				\
317     /* adjust so answer is normalized again. I'm not sure what the 	\
318      * final sz param should be. In practice it's never used since      \
319      * N is 1 which is always going to be < _FP_W_TYPE_SIZE...		\
320      */									\
321     /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs);	*/			\
322   } while (0)
323 
324 
325 #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
326   do {									\
327     _FP_W_TYPE _x[4], _y[2], _z[4];					\
328     _y[0] = Y##_f0; _y[1] = Y##_f1;					\
329     _x[0] = _x[3] = 0;							\
330     if (_FP_FRAC_GT_2(X, Y))						\
331       {									\
332 	R##_e++;							\
333 	_x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) |	\
334 		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
335 			    (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE)));	\
336 	_x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE);		\
337       }									\
338     else								\
339       {									\
340 	_x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) |		\
341 		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
342 			    (_FP_WFRACBITS - _FP_W_TYPE_SIZE)));	\
343 	_x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE);		\
344       }									\
345 									\
346     (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
347     R##_f1 = _z[1];							\
348     R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
349   } while (0)
350 
351 
352 /*
353  * Square root algorithms:
354  * We have just one right now, maybe Newton approximation
355  * should be added for those machines where division is fast.
356  */
357 
358 #define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
359   do {							\
360     while (q)						\
361       {							\
362         T##_f1 = S##_f1 + q;				\
363         if (T##_f1 <= X##_f1)				\
364           {						\
365             S##_f1 = T##_f1 + q;			\
366             X##_f1 -= T##_f1;				\
367             R##_f1 += q;				\
368           }						\
369         _FP_FRAC_SLL_2(X, 1);				\
370         q >>= 1;					\
371       }							\
372     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
373     while (q)						\
374       {							\
375         T##_f0 = S##_f0 + q;				\
376         T##_f1 = S##_f1;				\
377         if (T##_f1 < X##_f1 || 				\
378             (T##_f1 == X##_f1 && T##_f0 < X##_f0))	\
379           {						\
380             S##_f0 = T##_f0 + q;			\
381             if (((_FP_WS_TYPE)T##_f0) < 0 &&		\
382                 ((_FP_WS_TYPE)S##_f0) >= 0)		\
383               S##_f1++;					\
384             _FP_FRAC_SUB_2(X, X, T);			\
385             R##_f0 += q;				\
386           }						\
387         _FP_FRAC_SLL_2(X, 1);				\
388         q >>= 1;					\
389       }							\
390   } while (0)
391 
392 
393 /*
394  * Assembly/disassembly for converting to/from integral types.
395  * No shifting or overflow handled here.
396  */
397 
398 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
399   do {						\
400     if (rsize <= _FP_W_TYPE_SIZE)		\
401       r = X##_f0;				\
402     else					\
403       {						\
404 	r = X##_f1;				\
405 	r <<= _FP_W_TYPE_SIZE;			\
406 	r += X##_f0;				\
407       }						\
408   } while (0)
409 
410 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
411   do {									\
412     X##_f0 = r;								\
413     X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
414   } while (0)
415 
416 /*
417  * Convert FP values between word sizes
418  */
419 
420 #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)				\
421   do {									\
422     _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),	\
423 		   _FP_WFRACBITS_##sfs);				\
424     D##_f = S##_f0;							\
425   } while (0)
426 
427 #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)				\
428   do {									\
429     D##_f0 = S##_f;							\
430     D##_f1 = 0;								\
431     _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));	\
432   } while (0)
433 
434