1 /* 2 * Basic one-word fraction declaration and manipulation. 3 */ 4 5 #define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f 6 #define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f) 7 #define _FP_FRAC_SET_1(X,I) (X##_f = I) 8 #define _FP_FRAC_HIGH_1(X) (X##_f) 9 #define _FP_FRAC_LOW_1(X) (X##_f) 10 #define _FP_FRAC_WORD_1(X,w) (X##_f) 11 12 #define _FP_FRAC_ADDI_1(X,I) (X##_f += I) 13 #define _FP_FRAC_SLL_1(X,N) \ 14 do { \ 15 if (__builtin_constant_p(N) && (N) == 1) \ 16 X##_f += X##_f; \ 17 else \ 18 X##_f <<= (N); \ 19 } while (0) 20 #define _FP_FRAC_SRL_1(X,N) (X##_f >>= N) 21 22 /* Right shift with sticky-lsb. */ 23 #define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz) 24 25 #define __FP_FRAC_SRS_1(X,N,sz) \ 26 (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1 \ 27 ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0))) 28 29 #define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f) 30 #define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f) 31 #define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f) 32 33 /* Predicates */ 34 #define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0) 35 #define _FP_FRAC_ZEROP_1(X) (X##_f == 0) 36 #define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs) 37 #define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f) 38 #define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f) 39 #define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f) 40 41 #define _FP_ZEROFRAC_1 0 42 #define _FP_MINFRAC_1 1 43 44 /* 45 * Unpack the raw bits of a native fp value. Do not classify or 46 * normalize the data. 47 */ 48 49 #define _FP_UNPACK_RAW_1(fs, X, val) \ 50 do { \ 51 union _FP_UNION_##fs _flo; _flo.flt = (val); \ 52 \ 53 X##_f = _flo.bits.frac; \ 54 X##_e = _flo.bits.exp; \ 55 X##_s = _flo.bits.sign; \ 56 } while (0) 57 58 59 /* 60 * Repack the raw bits of a native fp value. 61 */ 62 63 #define _FP_PACK_RAW_1(fs, val, X) \ 64 do { \ 65 union _FP_UNION_##fs _flo; \ 66 \ 67 _flo.bits.frac = X##_f; \ 68 _flo.bits.exp = X##_e; \ 69 _flo.bits.sign = X##_s; \ 70 \ 71 (val) = _flo.flt; \ 72 } while (0) 73 74 75 /* 76 * Multiplication algorithms: 77 */ 78 79 /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the 80 multiplication immediately. */ 81 82 #define _FP_MUL_MEAT_1_imm(fs, R, X, Y) \ 83 do { \ 84 R##_f = X##_f * Y##_f; \ 85 /* Normalize since we know where the msb of the multiplicands \ 86 were (bit B), we know that the msb of the of the product is \ 87 at either 2B or 2B-1. */ \ 88 _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ 89 } while (0) 90 91 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 92 93 #define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit) \ 94 do { \ 95 _FP_W_TYPE _Z_f0, _Z_f1; \ 96 doit(_Z_f1, _Z_f0, X##_f, Y##_f); \ 97 /* Normalize since we know where the msb of the multiplicands \ 98 were (bit B), we know that the msb of the of the product is \ 99 at either 2B or 2B-1. */ \ 100 _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ 101 R##_f = _Z_f0; \ 102 } while (0) 103 104 /* Finally, a simple widening multiply algorithm. What fun! */ 105 106 #define _FP_MUL_MEAT_1_hard(fs, R, X, Y) \ 107 do { \ 108 _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \ 109 \ 110 /* split the words in half */ \ 111 _xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ 112 _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ 113 _yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ 114 _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ 115 \ 116 /* multiply the pieces */ \ 117 _z_f0 = _xl * _yl; \ 118 _a_f0 = _xh * _yl; \ 119 _a_f1 = _xl * _yh; \ 120 _z_f1 = _xh * _yh; \ 121 \ 122 /* reassemble into two full words */ \ 123 if ((_a_f0 += _a_f1) < _a_f1) \ 124 _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \ 125 _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \ 126 _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \ 127 _FP_FRAC_ADD_2(_z, _z, _a); \ 128 \ 129 /* normalize */ \ 130 _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs); \ 131 R##_f = _z_f0; \ 132 } while (0) 133 134 135 /* 136 * Division algorithms: 137 */ 138 139 /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the 140 division immediately. Give this macro either _FP_DIV_HELP_imm for 141 C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you 142 choose will depend on what the compiler does with divrem4. */ 143 144 #define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ 145 do { \ 146 _FP_W_TYPE _q, _r; \ 147 X##_f <<= (X##_f < Y##_f \ 148 ? R##_e--, _FP_WFRACBITS_##fs \ 149 : _FP_WFRACBITS_##fs - 1); \ 150 doit(_q, _r, X##_f, Y##_f); \ 151 R##_f = _q | (_r != 0); \ 152 } while (0) 153 154 /* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd 155 that may be useful in this situation. This first is for a primitive 156 that requires normalization, the second for one that does not. Look 157 for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */ 158 159 #define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \ 160 do { \ 161 _FP_W_TYPE _nh, _nl, _q, _r; \ 162 \ 163 /* Normalize Y -- i.e. make the most significant bit set. */ \ 164 Y##_f <<= _FP_WFRACXBITS_##fs - 1; \ 165 \ 166 /* Shift X op correspondingly high, that is, up one full word. */ \ 167 if (X##_f <= Y##_f) \ 168 { \ 169 _nl = 0; \ 170 _nh = X##_f; \ 171 } \ 172 else \ 173 { \ 174 R##_e++; \ 175 _nl = X##_f << (_FP_W_TYPE_SIZE-1); \ 176 _nh = X##_f >> 1; \ 177 } \ 178 \ 179 udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \ 180 R##_f = _q | (_r != 0); \ 181 } while (0) 182 183 #define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ 184 do { \ 185 _FP_W_TYPE _nh, _nl, _q, _r; \ 186 if (X##_f < Y##_f) \ 187 { \ 188 R##_e--; \ 189 _nl = X##_f << _FP_WFRACBITS_##fs; \ 190 _nh = X##_f >> _FP_WFRACXBITS_##fs; \ 191 } \ 192 else \ 193 { \ 194 _nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ 195 _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ 196 } \ 197 udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \ 198 R##_f = _q | (_r != 0); \ 199 } while (0) 200 201 202 /* 203 * Square root algorithms: 204 * We have just one right now, maybe Newton approximation 205 * should be added for those machines where division is fast. 206 */ 207 208 #define _FP_SQRT_MEAT_1(R, S, T, X, q) \ 209 do { \ 210 while (q) \ 211 { \ 212 T##_f = S##_f + q; \ 213 if (T##_f <= X##_f) \ 214 { \ 215 S##_f = T##_f + q; \ 216 X##_f -= T##_f; \ 217 R##_f += q; \ 218 } \ 219 _FP_FRAC_SLL_1(X, 1); \ 220 q >>= 1; \ 221 } \ 222 } while (0) 223 224 /* 225 * Assembly/disassembly for converting to/from integral types. 226 * No shifting or overflow handled here. 227 */ 228 229 #define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f) 230 #define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r) 231 232 233 /* 234 * Convert FP values between word sizes 235 */ 236 237 #define _FP_FRAC_CONV_1_1(dfs, sfs, D, S) \ 238 do { \ 239 D##_f = S##_f; \ 240 if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs) \ 241 _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs), \ 242 _FP_WFRACBITS_##sfs); \ 243 else \ 244 D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs; \ 245 } while (0) 246