1 /* Software floating-point emulation. 2 Basic two-word fraction declaration and manipulation. 3 Copyright (C) 1997-2022 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 In addition to the permissions in the GNU Lesser General Public 12 License, the Free Software Foundation gives you unlimited 13 permission to link the compiled version of this file into 14 combinations with other programs, and to distribute those 15 combinations without any restriction coming from the use of this 16 file. (The Lesser General Public License restrictions do apply in 17 other respects; for example, they cover modification of the file, 18 and distribution when not linked into a combine executable.) 19 20 The GNU C Library is distributed in the hope that it will be useful, 21 but WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 Lesser General Public License for more details. 24 25 You should have received a copy of the GNU Lesser General Public 26 License along with the GNU C Library; if not, see 27 <https://www.gnu.org/licenses/>. */ 28 29 #ifndef SOFT_FP_OP_2_H 30 #define SOFT_FP_OP_2_H 1 31 32 #define _FP_FRAC_DECL_2(X) \ 33 _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT 34 #define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1) 35 #define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I) 36 #define _FP_FRAC_HIGH_2(X) (X##_f1) 37 #define _FP_FRAC_LOW_2(X) (X##_f0) 38 #define _FP_FRAC_WORD_2(X, w) (X##_f##w) 39 40 #define _FP_FRAC_SLL_2(X, N) \ 41 (void) (((N) < _FP_W_TYPE_SIZE) \ 42 ? ({ \ 43 if (__builtin_constant_p (N) && (N) == 1) \ 44 { \ 45 X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \ 46 X##_f0 += X##_f0; \ 47 } \ 48 else \ 49 { \ 50 X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ 51 X##_f0 <<= (N); \ 52 } \ 53 0; \ 54 }) \ 55 : ({ \ 56 X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ 57 X##_f0 = 0; \ 58 })) 59 60 61 #define _FP_FRAC_SRL_2(X, N) \ 62 (void) (((N) < _FP_W_TYPE_SIZE) \ 63 ? ({ \ 64 X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ 65 X##_f1 >>= (N); \ 66 }) \ 67 : ({ \ 68 X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ 69 X##_f1 = 0; \ 70 })) 71 72 /* Right shift with sticky-lsb. */ 73 #define _FP_FRAC_SRST_2(X, S, N, sz) \ 74 (void) (((N) < _FP_W_TYPE_SIZE) \ 75 ? ({ \ 76 S = (__builtin_constant_p (N) && (N) == 1 \ 77 ? X##_f0 & 1 \ 78 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \ 79 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \ 80 X##_f1 >>= (N); \ 81 }) \ 82 : ({ \ 83 S = ((((N) == _FP_W_TYPE_SIZE \ 84 ? 0 \ 85 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 86 | X##_f0) != 0); \ 87 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \ 88 X##_f1 = 0; \ 89 })) 90 91 #define _FP_FRAC_SRS_2(X, N, sz) \ 92 (void) (((N) < _FP_W_TYPE_SIZE) \ 93 ? ({ \ 94 X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \ 95 | (__builtin_constant_p (N) && (N) == 1 \ 96 ? X##_f0 & 1 \ 97 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ 98 X##_f1 >>= (N); \ 99 }) \ 100 : ({ \ 101 X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \ 102 | ((((N) == _FP_W_TYPE_SIZE \ 103 ? 0 \ 104 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \ 105 | X##_f0) != 0)); \ 106 X##_f1 = 0; \ 107 })) 108 109 #define _FP_FRAC_ADDI_2(X, I) \ 110 __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I) 111 112 #define _FP_FRAC_ADD_2(R, X, Y) \ 113 __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 114 115 #define _FP_FRAC_SUB_2(R, X, Y) \ 116 __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) 117 118 #define _FP_FRAC_DEC_2(X, Y) \ 119 __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0) 120 121 #define _FP_FRAC_CLZ_2(R, X) \ 122 do \ 123 { \ 124 if (X##_f1) \ 125 __FP_CLZ ((R), X##_f1); \ 126 else \ 127 { \ 128 __FP_CLZ ((R), X##_f0); \ 129 (R) += _FP_W_TYPE_SIZE; \ 130 } \ 131 } \ 132 while (0) 133 134 /* Predicates. */ 135 #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0) 136 #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) 137 #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs) 138 #define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs) 139 #define _FP_FRAC_HIGHBIT_DW_2(fs, X) \ 140 (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs) 141 #define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) 142 #define _FP_FRAC_GT_2(X, Y) \ 143 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) 144 #define _FP_FRAC_GE_2(X, Y) \ 145 (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) 146 147 #define _FP_ZEROFRAC_2 0, 0 148 #define _FP_MINFRAC_2 0, 1 149 #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0) 150 151 /* Internals. */ 152 153 #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1) 154 155 #define __FP_CLZ_2(R, xh, xl) \ 156 do \ 157 { \ 158 if (xh) \ 159 __FP_CLZ ((R), xh); \ 160 else \ 161 { \ 162 __FP_CLZ ((R), xl); \ 163 (R) += _FP_W_TYPE_SIZE; \ 164 } \ 165 } \ 166 while (0) 167 168 #if 0 169 170 # ifndef __FP_FRAC_ADDI_2 171 # define __FP_FRAC_ADDI_2(xh, xl, i) \ 172 (xh += ((xl += i) < i)) 173 # endif 174 # ifndef __FP_FRAC_ADD_2 175 # define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ 176 (rh = xh + yh + ((rl = xl + yl) < xl)) 177 # endif 178 # ifndef __FP_FRAC_SUB_2 179 # define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ 180 (rh = xh - yh - ((rl = xl - yl) > xl)) 181 # endif 182 # ifndef __FP_FRAC_DEC_2 183 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 184 do \ 185 { \ 186 UWtype __FP_FRAC_DEC_2_t = xl; \ 187 xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \ 188 } \ 189 while (0) 190 # endif 191 192 #else 193 194 # undef __FP_FRAC_ADDI_2 195 # define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i) 196 # undef __FP_FRAC_ADD_2 197 # define __FP_FRAC_ADD_2 add_ssaaaa 198 # undef __FP_FRAC_SUB_2 199 # define __FP_FRAC_SUB_2 sub_ddmmss 200 # undef __FP_FRAC_DEC_2 201 # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ 202 sub_ddmmss (xh, xl, xh, xl, yh, yl) 203 204 #endif 205 206 /* Unpack the raw bits of a native fp value. Do not classify or 207 normalize the data. */ 208 209 #define _FP_UNPACK_RAW_2(fs, X, val) \ 210 do \ 211 { \ 212 union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \ 213 _FP_UNPACK_RAW_2_flo.flt = (val); \ 214 \ 215 X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \ 216 X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \ 217 X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \ 218 X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \ 219 } \ 220 while (0) 221 222 #define _FP_UNPACK_RAW_2_P(fs, X, val) \ 223 do \ 224 { \ 225 union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \ 226 = (union _FP_UNION_##fs *) (val); \ 227 \ 228 X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \ 229 X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \ 230 X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \ 231 X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \ 232 } \ 233 while (0) 234 235 236 /* Repack the raw bits of a native fp value. */ 237 238 #define _FP_PACK_RAW_2(fs, val, X) \ 239 do \ 240 { \ 241 union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \ 242 \ 243 _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \ 244 _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \ 245 _FP_PACK_RAW_2_flo.bits.exp = X##_e; \ 246 _FP_PACK_RAW_2_flo.bits.sign = X##_s; \ 247 \ 248 (val) = _FP_PACK_RAW_2_flo.flt; \ 249 } \ 250 while (0) 251 252 #define _FP_PACK_RAW_2_P(fs, val, X) \ 253 do \ 254 { \ 255 union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \ 256 = (union _FP_UNION_##fs *) (val); \ 257 \ 258 _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \ 259 _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \ 260 _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \ 261 _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \ 262 } \ 263 while (0) 264 265 266 /* Multiplication algorithms: */ 267 268 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ 269 270 #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \ 271 do \ 272 { \ 273 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \ 274 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \ 275 \ 276 doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \ 277 X##_f0, Y##_f0); \ 278 doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \ 279 X##_f0, Y##_f1); \ 280 doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \ 281 X##_f1, Y##_f0); \ 282 doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 283 X##_f1, Y##_f1); \ 284 \ 285 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 286 _FP_FRAC_WORD_4 (R, 1), 0, \ 287 _FP_MUL_MEAT_DW_2_wide_b_f1, \ 288 _FP_MUL_MEAT_DW_2_wide_b_f0, \ 289 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 290 _FP_FRAC_WORD_4 (R, 1)); \ 291 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 292 _FP_FRAC_WORD_4 (R, 1), 0, \ 293 _FP_MUL_MEAT_DW_2_wide_c_f1, \ 294 _FP_MUL_MEAT_DW_2_wide_c_f0, \ 295 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 296 _FP_FRAC_WORD_4 (R, 1)); \ 297 } \ 298 while (0) 299 300 #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ 301 do \ 302 { \ 303 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \ 304 \ 305 _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \ 306 X, Y, doit); \ 307 \ 308 /* Normalize since we know where the msb of the multiplicands \ 309 were (bit B), we know that the msb of the of the product is \ 310 at either 2B or 2B-1. */ \ 311 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \ 312 2*(wfracbits)); \ 313 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \ 314 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \ 315 } \ 316 while (0) 317 318 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. 319 Do only 3 multiplications instead of four. This one is for machines 320 where multiplication is much more expensive than subtraction. */ 321 322 #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \ 323 do \ 324 { \ 325 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \ 326 _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \ 327 _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \ 328 int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \ 329 int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \ 330 \ 331 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \ 332 _FP_MUL_MEAT_DW_2_wide_3mul_c1 \ 333 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \ 334 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \ 335 _FP_MUL_MEAT_DW_2_wide_3mul_c2 \ 336 = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \ 337 doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \ 338 X##_f0, Y##_f0); \ 339 doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \ 340 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \ 341 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \ 342 doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ 343 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \ 344 \ 345 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \ 346 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \ 347 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \ 348 &= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \ 349 __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 350 _FP_FRAC_WORD_4 (R, 1), \ 351 (_FP_MUL_MEAT_DW_2_wide_3mul_c1 \ 352 & _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \ 353 _FP_MUL_MEAT_DW_2_wide_3mul_d, \ 354 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \ 355 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 356 _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \ 357 __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 358 _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \ 359 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 360 _FP_FRAC_WORD_4 (R, 1), \ 361 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \ 362 _FP_FRAC_WORD_4 (R, 0)); \ 363 __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 364 _FP_FRAC_WORD_4 (R, 1), 0, \ 365 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ 366 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \ 367 __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ 368 _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \ 369 _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \ 370 _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \ 371 } \ 372 while (0) 373 374 #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ 375 do \ 376 { \ 377 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \ 378 \ 379 _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \ 380 _FP_MUL_MEAT_2_wide_3mul_z, \ 381 X, Y, doit); \ 382 \ 383 /* Normalize since we know where the msb of the multiplicands \ 384 were (bit B), we know that the msb of the of the product is \ 385 at either 2B or 2B-1. */ \ 386 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \ 387 (wfracbits)-1, 2*(wfracbits)); \ 388 R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \ 389 R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \ 390 } \ 391 while (0) 392 393 #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \ 394 do \ 395 { \ 396 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \ 397 _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \ 398 _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \ 399 _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \ 400 _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \ 401 _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \ 402 \ 403 mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \ 404 _FP_MUL_MEAT_DW_2_gmp_y, 2); \ 405 } \ 406 while (0) 407 408 #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ 409 do \ 410 { \ 411 _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \ 412 \ 413 _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \ 414 \ 415 /* Normalize since we know where the msb of the multiplicands \ 416 were (bit B), we know that the msb of the of the product is \ 417 at either 2B or 2B-1. */ \ 418 _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \ 419 2*(wfracbits)); \ 420 R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \ 421 R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \ 422 } \ 423 while (0) 424 425 /* Do at most 120x120=240 bits multiplication using double floating 426 point multiplication. This is useful if floating point 427 multiplication has much bigger throughput than integer multiply. 428 It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits 429 between 106 and 120 only. 430 Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set. 431 SETFETZ is a macro which will disable all FPU exceptions and set rounding 432 towards zero, RESETFE should optionally reset it back. */ 433 434 #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \ 435 do \ 436 { \ 437 static const double _const[] = \ 438 { \ 439 /* 2^-24 */ 5.9604644775390625e-08, \ 440 /* 2^-48 */ 3.5527136788005009e-15, \ 441 /* 2^-72 */ 2.1175823681357508e-22, \ 442 /* 2^-96 */ 1.2621774483536189e-29, \ 443 /* 2^28 */ 2.68435456e+08, \ 444 /* 2^4 */ 1.600000e+01, \ 445 /* 2^-20 */ 9.5367431640625e-07, \ 446 /* 2^-44 */ 5.6843418860808015e-14, \ 447 /* 2^-68 */ 3.3881317890172014e-21, \ 448 /* 2^-92 */ 2.0194839173657902e-28, \ 449 /* 2^-116 */ 1.2037062152420224e-35 \ 450 }; \ 451 double _a240, _b240, _c240, _d240, _e240, _f240, \ 452 _g240, _h240, _i240, _j240, _k240; \ 453 union { double d; UDItype i; } _l240, _m240, _n240, _o240, \ 454 _p240, _q240, _r240, _s240; \ 455 UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ 456 \ 457 _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \ 458 "wfracbits out of range"); \ 459 \ 460 setfetz; \ 461 \ 462 _e240 = (double) (long) (X##_f0 & 0xffffff); \ 463 _j240 = (double) (long) (Y##_f0 & 0xffffff); \ 464 _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \ 465 _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \ 466 _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \ 467 _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \ 468 _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \ 469 _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \ 470 _a240 = (double) (long) (X##_f1 >> 32); \ 471 _f240 = (double) (long) (Y##_f1 >> 32); \ 472 _e240 *= _const[3]; \ 473 _j240 *= _const[3]; \ 474 _d240 *= _const[2]; \ 475 _i240 *= _const[2]; \ 476 _c240 *= _const[1]; \ 477 _h240 *= _const[1]; \ 478 _b240 *= _const[0]; \ 479 _g240 *= _const[0]; \ 480 _s240.d = _e240*_j240; \ 481 _r240.d = _d240*_j240 + _e240*_i240; \ 482 _q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240; \ 483 _p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \ 484 _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \ 485 _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \ 486 _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \ 487 _l240.d = _a240*_g240 + _b240*_f240; \ 488 _k240 = _a240*_f240; \ 489 _r240.d += _s240.d; \ 490 _q240.d += _r240.d; \ 491 _p240.d += _q240.d; \ 492 _o240.d += _p240.d; \ 493 _n240.d += _o240.d; \ 494 _m240.d += _n240.d; \ 495 _l240.d += _m240.d; \ 496 _k240 += _l240.d; \ 497 _s240.d -= ((_const[10]+_s240.d)-_const[10]); \ 498 _r240.d -= ((_const[9]+_r240.d)-_const[9]); \ 499 _q240.d -= ((_const[8]+_q240.d)-_const[8]); \ 500 _p240.d -= ((_const[7]+_p240.d)-_const[7]); \ 501 _o240.d += _const[7]; \ 502 _n240.d += _const[6]; \ 503 _m240.d += _const[5]; \ 504 _l240.d += _const[4]; \ 505 if (_s240.d != 0.0) \ 506 _y240 = 1; \ 507 if (_r240.d != 0.0) \ 508 _y240 = 1; \ 509 if (_q240.d != 0.0) \ 510 _y240 = 1; \ 511 if (_p240.d != 0.0) \ 512 _y240 = 1; \ 513 _t240 = (DItype) _k240; \ 514 _u240 = _l240.i; \ 515 _v240 = _m240.i; \ 516 _w240 = _n240.i; \ 517 _x240 = _o240.i; \ 518 R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \ 519 | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \ 520 R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \ 521 | ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \ 522 | ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \ 523 | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \ 524 | _y240); \ 525 resetfe; \ 526 } \ 527 while (0) 528 529 /* Division algorithms: */ 530 531 #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ 532 do \ 533 { \ 534 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \ 535 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \ 536 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \ 537 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \ 538 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \ 539 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \ 540 _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \ 541 if (_FP_FRAC_GE_2 (X, Y)) \ 542 { \ 543 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \ 544 _FP_DIV_MEAT_2_udiv_n_f1 \ 545 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ 546 _FP_DIV_MEAT_2_udiv_n_f0 \ 547 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ 548 } \ 549 else \ 550 { \ 551 R##_e--; \ 552 _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \ 553 _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \ 554 _FP_DIV_MEAT_2_udiv_n_f0 = 0; \ 555 } \ 556 \ 557 /* Normalize, i.e. make the most significant bit of the \ 558 denominator set. */ \ 559 _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \ 560 \ 561 udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \ 562 _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \ 563 Y##_f1); \ 564 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \ 565 R##_f1, Y##_f0); \ 566 _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \ 567 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \ 568 { \ 569 R##_f1--; \ 570 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 571 _FP_DIV_MEAT_2_udiv_r); \ 572 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \ 573 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ 574 _FP_DIV_MEAT_2_udiv_r)) \ 575 { \ 576 R##_f1--; \ 577 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 578 _FP_DIV_MEAT_2_udiv_r); \ 579 } \ 580 } \ 581 _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \ 582 \ 583 if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \ 584 { \ 585 /* This is a special case, not an optimization \ 586 (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \ 587 As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \ 588 R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \ 589 know what kind of bits it is (sticky, guard, round), \ 590 we don't care. We also don't care what the reminder is, \ 591 because the guard bit will be set anyway. -jj */ \ 592 R##_f0 = -1; \ 593 } \ 594 else \ 595 { \ 596 udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \ 597 _FP_DIV_MEAT_2_udiv_r_f1, \ 598 _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \ 599 umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \ 600 _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \ 601 _FP_DIV_MEAT_2_udiv_r_f0 = 0; \ 602 if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ 603 _FP_DIV_MEAT_2_udiv_r)) \ 604 { \ 605 R##_f0--; \ 606 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 607 _FP_DIV_MEAT_2_udiv_r); \ 608 if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \ 609 && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \ 610 _FP_DIV_MEAT_2_udiv_r)) \ 611 { \ 612 R##_f0--; \ 613 _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \ 614 _FP_DIV_MEAT_2_udiv_r); \ 615 } \ 616 } \ 617 if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \ 618 _FP_DIV_MEAT_2_udiv_m)) \ 619 R##_f0 |= _FP_WORK_STICKY; \ 620 } \ 621 } \ 622 while (0) 623 624 625 /* Square root algorithms: 626 We have just one right now, maybe Newton approximation 627 should be added for those machines where division is fast. */ 628 629 #define _FP_SQRT_MEAT_2(R, S, T, X, q) \ 630 do \ 631 { \ 632 while (q) \ 633 { \ 634 T##_f1 = S##_f1 + (q); \ 635 if (T##_f1 <= X##_f1) \ 636 { \ 637 S##_f1 = T##_f1 + (q); \ 638 X##_f1 -= T##_f1; \ 639 R##_f1 += (q); \ 640 } \ 641 _FP_FRAC_SLL_2 (X, 1); \ 642 (q) >>= 1; \ 643 } \ 644 (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ 645 while ((q) != _FP_WORK_ROUND) \ 646 { \ 647 T##_f0 = S##_f0 + (q); \ 648 T##_f1 = S##_f1; \ 649 if (T##_f1 < X##_f1 \ 650 || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ 651 { \ 652 S##_f0 = T##_f0 + (q); \ 653 S##_f1 += (T##_f0 > S##_f0); \ 654 _FP_FRAC_DEC_2 (X, T); \ 655 R##_f0 += (q); \ 656 } \ 657 _FP_FRAC_SLL_2 (X, 1); \ 658 (q) >>= 1; \ 659 } \ 660 if (X##_f0 | X##_f1) \ 661 { \ 662 if (S##_f1 < X##_f1 \ 663 || (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \ 664 R##_f0 |= _FP_WORK_ROUND; \ 665 R##_f0 |= _FP_WORK_STICKY; \ 666 } \ 667 } \ 668 while (0) 669 670 671 /* Assembly/disassembly for converting to/from integral types. 672 No shifting or overflow handled here. */ 673 674 #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ 675 (void) (((rsize) <= _FP_W_TYPE_SIZE) \ 676 ? ({ (r) = X##_f0; }) \ 677 : ({ \ 678 (r) = X##_f1; \ 679 (r) <<= _FP_W_TYPE_SIZE; \ 680 (r) += X##_f0; \ 681 })) 682 683 #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ 684 do \ 685 { \ 686 X##_f0 = (r); \ 687 X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \ 688 ? 0 \ 689 : (r) >> _FP_W_TYPE_SIZE); \ 690 } \ 691 while (0) 692 693 /* Convert FP values between word sizes. */ 694 695 #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0) 696 697 #define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0)) 698 699 #define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S) 700 701 #endif /* !SOFT_FP_OP_2_H */ 702