1.file "exp.s" 2 3 4// Copyright (c) 2000 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 2/02/00 Initial version 42// 3/07/00 exp(inf) = inf but now does NOT call error support 43// exp(-inf) = 0 but now does NOT call error support 44// 4/04/00 Unwind support added 45// 8/15/00 Bundle added after call to __libm_error_support to properly 46// set [the previously overwritten] GR_Parameter_RESULT. 47// 11/30/00 Reworked to shorten main path, widen main path to include all 48// args in normal range, and add quick exit for 0, nan, inf. 49// 12/05/00 Loaded constants earlier with setf to save 2 cycles. 50// 02/05/02 Corrected uninitialize predicate in POSSIBLE_UNDERFLOW path 51// 05/20/02 Cleaned up namespace and sf0 syntax 52// 09/07/02 Force inexact flag 53// 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path 54// 05/30/03 Set inexact flag on unmasked overflow/underflow 55// 03/31/05 Reformatted delimiters between data tables 56 57// API 58//============================================================== 59// double exp(double) 60 61// Overview of operation 62//============================================================== 63// Take the input x. w is "how many log2/128 in x?" 64// w = x * 128/log2 65// n = int(w) 66// x = n log2/128 + r + delta 67 68// n = 128M + index_1 + 2^4 index_2 69// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta 70 71// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta) 72// Construct 2^M 73// Get 2^(index_1/128) from table_1; 74// Get 2^(index_2/8) from table_2; 75// Calculate exp(r) by 5th order polynomial 76// r = x - n (log2/128)_high 77// delta = - n (log2/128)_low 78// Calculate exp(delta) as 1 + delta 79 80 81// Special values 82//============================================================== 83// exp(+0) = 1.0 84// exp(-0) = 1.0 85 86// exp(+qnan) = +qnan 87// exp(-qnan) = -qnan 88// exp(+snan) = +qnan 89// exp(-snan) = -qnan 90 91// exp(-inf) = +0 92// exp(+inf) = +inf 93 94// Overflow and Underflow 95//======================= 96// exp(x) = largest double normal when 97// x = 709.7827 = 0x40862e42fefa39ef 98 99// exp(x) = smallest double normal when 100// x = -708.396 = 0xc086232bdd7abcd2 101 102// exp(x) = largest round-to-nearest single zero when 103// x = -745.1332 = 0xc0874910d52d3052 104 105 106// Registers used 107//============================================================== 108// Floating Point registers used: 109// f8, input, output 110// f6 -> f15, f32 -> f49 111 112// General registers used: 113// r14 -> r40 114 115// Predicate registers used: 116// p6 -> p15 117 118// Assembly macros 119//============================================================== 120 121rRshf = r14 122rAD_TB1 = r15 123rAD_T1 = r15 124rAD_TB2 = r16 125rAD_T2 = r16 126rAD_P = r17 127rN = r18 128rIndex_1 = r19 129rIndex_2_16 = r20 130rM = r21 131rBiased_M = r21 132rIndex_1_16 = r21 133rSig_inv_ln2 = r22 134rExp_bias = r23 135rExp_mask = r24 136rTmp = r25 137rRshf_2to56 = r26 138rGt_ln = r27 139rExp_2tom56 = r28 140 141 142GR_SAVE_B0 = r33 143GR_SAVE_PFS = r34 144GR_SAVE_GP = r35 145GR_SAVE_SP = r36 146 147GR_Parameter_X = r37 148GR_Parameter_Y = r38 149GR_Parameter_RESULT = r39 150GR_Parameter_TAG = r40 151 152 153FR_X = f10 154FR_Y = f1 155FR_RESULT = f8 156 157fRSHF_2TO56 = f6 158fINV_LN2_2TO63 = f7 159fW_2TO56_RSH = f9 160f2TOM56 = f11 161fP5 = f12 162fP54 = f12 163fP5432 = f12 164fP4 = f13 165fP3 = f14 166fP32 = f14 167fP2 = f15 168fP = f15 169 170fLn2_by_128_hi = f33 171fLn2_by_128_lo = f34 172 173fRSHF = f35 174fNfloat = f36 175fNormX = f37 176fR = f38 177fF = f39 178 179fRsq = f40 180f2M = f41 181fS1 = f42 182fT1 = f42 183fS2 = f43 184fT2 = f43 185fS = f43 186fWre_urm_f8 = f44 187fFtz_urm_f8 = f44 188 189fMIN_DBL_OFLOW_ARG = f45 190fMAX_DBL_ZERO_ARG = f46 191fMAX_DBL_NORM_ARG = f47 192fMIN_DBL_NORM_ARG = f48 193fGt_pln = f49 194fTmp = f49 195 196 197// Data tables 198//============================================================== 199 200RODATA 201.align 16 202 203// ************* DO NOT CHANGE ORDER OF THESE TABLES ******************** 204 205// double-extended 1/ln(2) 206// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88 207// 3fff b8aa 3b29 5c17 f0bc 208// For speed the significand will be loaded directly with a movl and setf.sig 209// and the exponent will be bias+63 instead of bias+0. Thus subsequent 210// computations need to scale appropriately. 211// The constant 128/ln(2) is needed for the computation of w. This is also 212// obtained by scaling the computations. 213// 214// Two shifting constants are loaded directly with movl and setf.d. 215// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7) 216// This constant is added to x*1/ln2 to shift the integer part of 217// x*128/ln2 into the rightmost bits of the significand. 218// The result of this fma is fW_2TO56_RSH. 219// 2. fRSHF = 1.1000..00 * 2^(63) 220// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give 221// the integer part of w, n, as a floating-point number. 222// The result of this fms is fNfloat. 223 224 225LOCAL_OBJECT_START(exp_table_1) 226data8 0x40862e42fefa39f0 // smallest dbl overflow arg, +709.7827 227data8 0xc0874910d52d3052 // largest arg for rnd-to-nearest 0 result, -745.133 228data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result, +709.7827 229data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result, -708.396 230data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi 231data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo 232// 233// Table 1 is 2^(index_1/128) where 234// index_1 goes from 0 to 15 235// 236data8 0x8000000000000000 , 0x00003FFF 237data8 0x80B1ED4FD999AB6C , 0x00003FFF 238data8 0x8164D1F3BC030773 , 0x00003FFF 239data8 0x8218AF4373FC25EC , 0x00003FFF 240data8 0x82CD8698AC2BA1D7 , 0x00003FFF 241data8 0x8383594EEFB6EE37 , 0x00003FFF 242data8 0x843A28C3ACDE4046 , 0x00003FFF 243data8 0x84F1F656379C1A29 , 0x00003FFF 244data8 0x85AAC367CC487B15 , 0x00003FFF 245data8 0x8664915B923FBA04 , 0x00003FFF 246data8 0x871F61969E8D1010 , 0x00003FFF 247data8 0x87DB357FF698D792 , 0x00003FFF 248data8 0x88980E8092DA8527 , 0x00003FFF 249data8 0x8955EE03618E5FDD , 0x00003FFF 250data8 0x8A14D575496EFD9A , 0x00003FFF 251data8 0x8AD4C6452C728924 , 0x00003FFF 252LOCAL_OBJECT_END(exp_table_1) 253 254// Table 2 is 2^(index_1/8) where 255// index_2 goes from 0 to 7 256LOCAL_OBJECT_START(exp_table_2) 257data8 0x8000000000000000 , 0x00003FFF 258data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF 259data8 0x9837F0518DB8A96F , 0x00003FFF 260data8 0xA5FED6A9B15138EA , 0x00003FFF 261data8 0xB504F333F9DE6484 , 0x00003FFF 262data8 0xC5672A115506DADD , 0x00003FFF 263data8 0xD744FCCAD69D6AF4 , 0x00003FFF 264data8 0xEAC0C6E7DD24392F , 0x00003FFF 265LOCAL_OBJECT_END(exp_table_2) 266 267 268LOCAL_OBJECT_START(exp_p_table) 269data8 0x3f8111116da21757 //P5 270data8 0x3fa55555d787761c //P4 271data8 0x3fc5555555555414 //P3 272data8 0x3fdffffffffffd6a //P2 273LOCAL_OBJECT_END(exp_p_table) 274 275 276.section .text 277GLOBAL_IEEE754_ENTRY(exp) 278 279{ .mlx 280 nop.m 0 281 movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2 282} 283{ .mlx 284 addl rAD_TB1 = @ltoff(exp_table_1), gp 285 movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56) 286} 287;; 288 289{ .mfi 290 ld8 rAD_TB1 = [rAD_TB1] 291 fclass.m p8,p0 = f8,0x07 // Test for x=0 292 mov rExp_mask = 0x1ffff 293} 294{ .mfi 295 mov rExp_bias = 0xffff 296 fnorm.s1 fNormX = f8 297 mov rExp_2tom56 = 0xffff-56 298} 299;; 300 301// Form two constants we need 302// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128 303// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand 304 305{ .mfi 306 setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63 307 fclass.m p9,p0 = f8,0x22 // Test for x=-inf 308 nop.i 0 309} 310{ .mlx 311 setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56) 312 movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift 313} 314;; 315 316{ .mfi 317 ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_ZERO_ARG = [rAD_TB1],16 318 fclass.m p10,p0 = f8,0x1e1 // Test for x=+inf, nan, NaT 319 nop.i 0 320} 321{ .mfb 322 setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat 323(p9) fma.d.s0 f8 = f0,f0,f0 // quick exit for x=-inf 324(p9) br.ret.spnt b0 325} 326;; 327 328{ .mfi 329 ldfpd fMAX_DBL_NORM_ARG, fMIN_DBL_NORM_ARG = [rAD_TB1],16 330 nop.f 0 331 nop.i 0 332} 333{ .mfb 334 setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63 335(p8) fma.d.s0 f8 = f1,f1,f0 // quick exit for x=0 336(p8) br.ret.spnt b0 337} 338;; 339 340{ .mfb 341 ldfe fLn2_by_128_hi = [rAD_TB1],16 342(p10) fma.d.s0 f8 = f8,f8,f0 // Result if x=+inf, nan, NaT 343(p10) br.ret.spnt b0 // quick exit for x=+inf, nan, NaT 344} 345;; 346 347{ .mfi 348 ldfe fLn2_by_128_lo = [rAD_TB1],16 349 fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D 350 nop.i 0 351} 352;; 353 354// After that last load, rAD_TB1 points to the beginning of table 1 355 356// W = X * Inv_log2_by_128 357// By adding 1.10...0*2^63 we shift and get round_int(W) in significand. 358// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing. 359 360{ .mfi 361 nop.m 0 362 fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56 363 nop.i 0 364} 365;; 366 367// Divide arguments into the following categories: 368// Certain Underflow p11 - -inf < x <= MAX_DBL_ZERO_ARG 369// Possible Underflow p13 - MAX_DBL_ZERO_ARG < x < MIN_DBL_NORM_ARG 370// Certain Safe - MIN_DBL_NORM_ARG <= x <= MAX_DBL_NORM_ARG 371// Possible Overflow p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG 372// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= x < +inf 373// 374// If the input is really a double arg, then there will never be 375// "Possible Overflow" arguments. 376// 377 378{ .mfi 379 add rAD_TB2 = 0x100, rAD_TB1 380 fcmp.ge.s1 p15,p0 = fNormX,fMIN_DBL_OFLOW_ARG 381 nop.i 0 382} 383;; 384 385{ .mfi 386 add rAD_P = 0x80, rAD_TB2 387 fcmp.le.s1 p11,p0 = fNormX,fMAX_DBL_ZERO_ARG 388 nop.i 0 389} 390;; 391 392{ .mfb 393 ldfpd fP5, fP4 = [rAD_P] ,16 394 fcmp.gt.s1 p14,p0 = fNormX,fMAX_DBL_NORM_ARG 395(p15) br.cond.spnt EXP_CERTAIN_OVERFLOW 396} 397;; 398 399// Nfloat = round_int(W) 400// The signficand of fW_2TO56_RSH contains the rounded integer part of W, 401// as a twos complement number in the lower bits (that is, it may be negative). 402// That twos complement number (called N) is put into rN. 403 404// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56 405// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat. 406// Thus, fNfloat contains the floating point version of N 407 408{ .mfb 409 ldfpd fP3, fP2 = [rAD_P] 410 fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF 411(p11) br.cond.spnt EXP_CERTAIN_UNDERFLOW 412} 413;; 414 415{ .mfi 416 getf.sig rN = fW_2TO56_RSH 417 nop.f 0 418 nop.i 0 419} 420;; 421 422// rIndex_1 has index_1 423// rIndex_2_16 has index_2 * 16 424// rBiased_M has M 425// rIndex_1_16 has index_1 * 16 426 427// rM has true M 428// r = x - Nfloat * ln2_by_128_hi 429// f = 1 - Nfloat * ln2_by_128_lo 430{ .mfi 431 and rIndex_1 = 0x0f, rN 432 fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX 433 shr rM = rN, 0x7 434} 435{ .mfi 436 and rIndex_2_16 = 0x70, rN 437 fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1 438 nop.i 0 439} 440;; 441 442// rAD_T1 has address of T1 443// rAD_T2 has address if T2 444 445{ .mmi 446 add rBiased_M = rExp_bias, rM 447 add rAD_T2 = rAD_TB2, rIndex_2_16 448 shladd rAD_T1 = rIndex_1, 4, rAD_TB1 449} 450;; 451 452// Create Scale = 2^M 453{ .mmi 454 setf.exp f2M = rBiased_M 455 ldfe fT2 = [rAD_T2] 456 nop.i 0 457} 458;; 459 460// Load T1 and T2 461{ .mfi 462 ldfe fT1 = [rAD_T1] 463 fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact 464 nop.i 0 465} 466;; 467 468{ .mfi 469 nop.m 0 470 fma.s1 fRsq = fR, fR, f0 471 nop.i 0 472} 473{ .mfi 474 nop.m 0 475 fma.s1 fP54 = fR, fP5, fP4 476 nop.i 0 477} 478;; 479 480{ .mfi 481 nop.m 0 482 fcmp.lt.s1 p13,p0 = fNormX,fMIN_DBL_NORM_ARG 483 nop.i 0 484} 485{ .mfi 486 nop.m 0 487 fma.s1 fP32 = fR, fP3, fP2 488 nop.i 0 489} 490;; 491 492{ .mfi 493 nop.m 0 494 fma.s1 fP5432 = fRsq, fP54, fP32 495 nop.i 0 496} 497;; 498 499{ .mfi 500 nop.m 0 501 fma.s1 fS1 = f2M,fT1,f0 502 nop.i 0 503} 504{ .mfi 505 nop.m 0 506 fma.s1 fS2 = fF,fT2,f0 507 nop.i 0 508} 509;; 510 511{ .mfi 512 nop.m 0 513 fma.s1 fP = fRsq, fP5432, fR 514 nop.i 0 515} 516{ .mfi 517 nop.m 0 518 fma.s1 fS = fS1,fS2,f0 519 nop.i 0 520} 521;; 522 523{ .mbb 524 nop.m 0 525(p13) br.cond.spnt EXP_POSSIBLE_UNDERFLOW 526(p14) br.cond.spnt EXP_POSSIBLE_OVERFLOW 527} 528;; 529 530{ .mfb 531 nop.m 0 532 fma.d.s0 f8 = fS, fP, fS 533 br.ret.sptk b0 // Normal path exit 534} 535;; 536 537 538EXP_POSSIBLE_OVERFLOW: 539 540// Here if fMAX_DBL_NORM_ARG < x < fMIN_DBL_OFLOW_ARG 541// This cannot happen if input is a double, only if input higher precision. 542// Overflow is a possibility, not a certainty. 543 544// Recompute result using status field 2 with user's rounding mode, 545// and wre set. If result is larger than largest double, then we have 546// overflow 547 548{ .mfi 549 mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp 550 fsetc.s2 0x7F,0x42 // Get user's round mode, set wre 551 nop.i 0 552} 553;; 554 555{ .mfi 556 setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp 557 fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set 558 nop.i 0 559} 560;; 561 562{ .mfi 563 nop.m 0 564 fsetc.s2 0x7F,0x40 // Turn off wre in sf2 565 nop.i 0 566} 567;; 568 569{ .mfi 570 nop.m 0 571 fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow 572 nop.i 0 573} 574;; 575 576{ .mfb 577 nop.m 0 578 nop.f 0 579(p6) br.cond.spnt EXP_CERTAIN_OVERFLOW // Branch if overflow 580} 581;; 582 583{ .mfb 584 nop.m 0 585 fma.d.s0 f8 = fS, fP, fS 586 br.ret.sptk b0 // Exit if really no overflow 587} 588;; 589 590EXP_CERTAIN_OVERFLOW: 591{ .mmi 592 sub rTmp = rExp_mask, r0, 1 593;; 594 setf.exp fTmp = rTmp 595 nop.i 0 596} 597;; 598 599{ .mfi 600 alloc r32=ar.pfs,1,4,4,0 601 fmerge.s FR_X = f8,f8 602 nop.i 0 603} 604{ .mfb 605 mov GR_Parameter_TAG = 14 606 fma.d.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result 607 br.cond.sptk __libm_error_region 608} 609;; 610 611EXP_POSSIBLE_UNDERFLOW: 612 613// Here if fMAX_DBL_ZERO_ARG < x < fMIN_DBL_NORM_ARG 614// Underflow is a possibility, not a certainty 615 616// We define an underflow when the answer with 617// ftz set 618// is zero (tiny numbers become zero) 619 620// Notice (from below) that if we have an unlimited exponent range, 621// then there is an extra machine number E between the largest denormal and 622// the smallest normal. 623 624// So if with unbounded exponent we round to E or below, then we are 625// tiny and underflow has occurred. 626 627// But notice that you can be in a situation where we are tiny, namely 628// rounded to E, but when the exponent is bounded we round to smallest 629// normal. So the answer can be the smallest normal with underflow. 630 631// E 632// -----+--------------------+--------------------+----- 633// | | | 634// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe 635// 0.1...11 2^-3ffe (biased, 1) 636// largest dn smallest normal 637 638{ .mfi 639 nop.m 0 640 fsetc.s2 0x7F,0x41 // Get user's round mode, set ftz 641 nop.i 0 642} 643;; 644 645{ .mfi 646 nop.m 0 647 fma.d.s2 fFtz_urm_f8 = fS, fP, fS // Result with ftz set 648 nop.i 0 649} 650;; 651 652{ .mfi 653 nop.m 0 654 fsetc.s2 0x7F,0x40 // Turn off ftz in sf2 655 nop.i 0 656} 657;; 658 659{ .mfi 660 nop.m 0 661 fcmp.eq.s1 p6, p7 = fFtz_urm_f8, f0 // Test for underflow 662 nop.i 0 663} 664{ .mfi 665 nop.m 0 666 fma.d.s0 f8 = fS, fP, fS // Compute result, set I, maybe U 667 nop.i 0 668} 669;; 670 671{ .mbb 672 nop.m 0 673(p6) br.cond.spnt EXP_UNDERFLOW_COMMON // Branch if really underflow 674(p7) br.ret.sptk b0 // Exit if really no underflow 675} 676;; 677 678EXP_CERTAIN_UNDERFLOW: 679// Here if x < fMAX_DBL_ZERO_ARG 680// Result will be zero (or smallest denorm if round to +inf) with I, U set 681{ .mmi 682 mov rTmp = 1 683;; 684 setf.exp fTmp = rTmp // Form small normal 685 nop.i 0 686} 687;; 688 689{ .mfi 690 nop.m 0 691 fmerge.se fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1 692 nop.i 0 693} 694;; 695 696{ .mfb 697 nop.m 0 698 fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result 699 br.cond.sptk EXP_UNDERFLOW_COMMON 700} 701;; 702 703EXP_UNDERFLOW_COMMON: 704// Determine if underflow result is zero or nonzero 705{ .mfi 706 alloc r32=ar.pfs,1,4,4,0 707 fcmp.eq.s1 p6, p0 = f8, f0 708 nop.i 0 709} 710;; 711 712{ .mfb 713 nop.m 0 714 fmerge.s FR_X = fNormX,fNormX 715(p6) br.cond.spnt EXP_UNDERFLOW_ZERO 716} 717;; 718 719EXP_UNDERFLOW_NONZERO: 720// Here if x < fMIN_DBL_NORM_ARG and result nonzero; 721// I, U are set 722{ .mfb 723 mov GR_Parameter_TAG = 15 724 nop.f 0 // FR_RESULT already set 725 br.cond.sptk __libm_error_region 726} 727;; 728 729EXP_UNDERFLOW_ZERO: 730// Here if x < fMIN_DBL_NORM_ARG and result zero; 731// I, U are set 732{ .mfb 733 mov GR_Parameter_TAG = 15 734 nop.f 0 // FR_RESULT already set 735 br.cond.sptk __libm_error_region 736} 737;; 738 739GLOBAL_IEEE754_END(exp) 740libm_alias_double_other (__exp, exp) 741#ifdef SHARED 742.symver exp,exp@@GLIBC_2.29 743.weak __exp_compat 744.set __exp_compat,__exp 745.symver __exp_compat,exp@GLIBC_2.2 746#endif 747 748 749LOCAL_LIBM_ENTRY(__libm_error_region) 750.prologue 751{ .mfi 752 add GR_Parameter_Y=-32,sp // Parameter 2 value 753 nop.f 0 754.save ar.pfs,GR_SAVE_PFS 755 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 756} 757{ .mfi 758.fframe 64 759 add sp=-64,sp // Create new stack 760 nop.f 0 761 mov GR_SAVE_GP=gp // Save gp 762};; 763{ .mmi 764 stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack 765 add GR_Parameter_X = 16,sp // Parameter 1 address 766.save b0, GR_SAVE_B0 767 mov GR_SAVE_B0=b0 // Save b0 768};; 769.body 770{ .mib 771 stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack 772 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 773 nop.b 0 774} 775{ .mib 776 stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack 777 add GR_Parameter_Y = -16,GR_Parameter_Y 778 br.call.sptk b0=__libm_error_support# // Call error handling function 779};; 780{ .mmi 781 add GR_Parameter_RESULT = 48,sp 782 nop.m 0 783 nop.i 0 784};; 785{ .mmi 786 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack 787.restore sp 788 add sp = 64,sp // Restore stack pointer 789 mov b0 = GR_SAVE_B0 // Restore return address 790};; 791{ .mib 792 mov gp = GR_SAVE_GP // Restore gp 793 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 794 br.ret.sptk b0 // Return 795};; 796 797LOCAL_LIBM_END(__libm_error_region) 798.type __libm_error_support#,@function 799.global __libm_error_support# 800