1.file "cosh.s" 2 3 4// Copyright (c) 2000 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 02/02/00 Initial version 42// 04/04/00 Unwind support added 43// 08/15/00 Bundle added after call to __libm_error_support to properly 44// set [the previously overwritten] GR_Parameter_RESULT. 45// 05/07/01 Reworked to improve speed of all paths 46// 05/20/02 Cleaned up namespace and sf0 syntax 47// 11/15/02 Improved speed with new algorithm 48// 03/31/05 Reformatted delimiters between data tables 49 50// API 51//============================================================== 52// double cosh(double) 53 54// Overview of operation 55//============================================================== 56// Case 1: 0 < |x| < 0.25 57// Evaluate cosh(x) by a 12th order polynomial 58// Care is take for the order of multiplication; and A2 is not exactly 1/4!, 59// A3 is not exactly 1/6!, etc. 60// cosh(x) = 1 + (A1*x^2 + A2*x^4 + A3*x^6 + A4*x^8 + A5*x^10 + A6*x^12) 61// 62// Case 2: 0.25 < |x| < 710.47586 63// Algorithm is based on the identity cosh(x) = ( exp(x) + exp(-x) ) / 2. 64// The algorithm for exp is described as below. There are a number of 65// economies from evaluating both exp(x) and exp(-x). Although we 66// are evaluating both quantities, only where the quantities diverge do we 67// duplicate the computations. The basic algorithm for exp(x) is described 68// below. 69// 70// Take the input x. w is "how many log2/128 in x?" 71// w = x * 128/log2 72// n = int(w) 73// x = n log2/128 + r + delta 74 75// n = 128M + index_1 + 2^4 index_2 76// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta 77 78// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta) 79// Construct 2^M 80// Get 2^(index_1/128) from table_1; 81// Get 2^(index_2/8) from table_2; 82// Calculate exp(r) by 5th order polynomial 83// r = x - n (log2/128)_high 84// delta = - n (log2/128)_low 85// Calculate exp(delta) as 1 + delta 86 87 88// Special values 89//============================================================== 90// cosh(+0) = 1.0 91// cosh(-0) = 1.0 92 93// cosh(+qnan) = +qnan 94// cosh(-qnan) = -qnan 95// cosh(+snan) = +qnan 96// cosh(-snan) = -qnan 97 98// cosh(-inf) = +inf 99// cosh(+inf) = +inf 100 101// Overflow and Underflow 102//======================= 103// cosh(x) = largest double normal when 104// x = 710.47586 = 0x408633ce8fb9f87d 105// 106// There is no underflow. 107 108// Registers used 109//============================================================== 110// Floating Point registers used: 111// f8, input, output 112// f6 -> f15, f32 -> f61 113 114// General registers used: 115// r14 -> r40 116 117// Predicate registers used: 118// p6 -> p15 119 120// Assembly macros 121//============================================================== 122 123rRshf = r14 124rN_neg = r14 125rAD_TB1 = r15 126rAD_TB2 = r16 127rAD_P = r17 128rN = r18 129rIndex_1 = r19 130rIndex_2_16 = r20 131rM = r21 132rBiased_M = r21 133rSig_inv_ln2 = r22 134rIndex_1_neg = r22 135rExp_bias = r23 136rExp_bias_minus_1 = r23 137rExp_mask = r24 138rTmp = r24 139rGt_ln = r24 140rIndex_2_16_neg = r24 141rM_neg = r25 142rBiased_M_neg = r25 143rRshf_2to56 = r26 144rAD_T1_neg = r26 145rExp_2tom56 = r28 146rAD_T2_neg = r28 147rAD_T1 = r29 148rAD_T2 = r30 149rSignexp_x = r31 150rExp_x = r31 151 152GR_SAVE_B0 = r33 153GR_SAVE_PFS = r34 154GR_SAVE_GP = r35 155GR_SAVE_SP = r36 156 157GR_Parameter_X = r37 158GR_Parameter_Y = r38 159GR_Parameter_RESULT = r39 160GR_Parameter_TAG = r40 161 162 163FR_X = f10 164FR_Y = f1 165FR_RESULT = f8 166 167fRSHF_2TO56 = f6 168fINV_LN2_2TO63 = f7 169fW_2TO56_RSH = f9 170f2TOM56 = f11 171fP5 = f12 172fP4 = f13 173fP3 = f14 174fP2 = f15 175 176fLn2_by_128_hi = f33 177fLn2_by_128_lo = f34 178 179fRSHF = f35 180fNfloat = f36 181fNormX = f37 182fR = f38 183fF = f39 184 185fRsq = f40 186f2M = f41 187fS1 = f42 188fT1 = f42 189fS2 = f43 190fT2 = f43 191fS = f43 192fWre_urm_f8 = f44 193fAbsX = f44 194 195fMIN_DBL_OFLOW_ARG = f45 196fMAX_DBL_NORM_ARG = f46 197fXsq = f47 198fX4 = f48 199fGt_pln = f49 200fTmp = f49 201 202fP54 = f50 203fP5432 = f50 204fP32 = f51 205fP = f52 206fP54_neg = f53 207fP5432_neg = f53 208fP32_neg = f54 209fP_neg = f55 210fF_neg = f56 211 212f2M_neg = f57 213fS1_neg = f58 214fT1_neg = f58 215fS2_neg = f59 216fT2_neg = f59 217fS_neg = f59 218fExp = f60 219fExp_neg = f61 220 221fA6 = f50 222fA65 = f50 223fA6543 = f50 224fA654321 = f50 225fA5 = f51 226fA4 = f52 227fA43 = f52 228fA3 = f53 229fA2 = f54 230fA21 = f54 231fA1 = f55 232 233// Data tables 234//============================================================== 235 236RODATA 237.align 16 238 239// ************* DO NOT CHANGE ORDER OF THESE TABLES ******************** 240 241// double-extended 1/ln(2) 242// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88 243// 3fff b8aa 3b29 5c17 f0bc 244// For speed the significand will be loaded directly with a movl and setf.sig 245// and the exponent will be bias+63 instead of bias+0. Thus subsequent 246// computations need to scale appropriately. 247// The constant 128/ln(2) is needed for the computation of w. This is also 248// obtained by scaling the computations. 249// 250// Two shifting constants are loaded directly with movl and setf.d. 251// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7) 252// This constant is added to x*1/ln2 to shift the integer part of 253// x*128/ln2 into the rightmost bits of the significand. 254// The result of this fma is fW_2TO56_RSH. 255// 2. fRSHF = 1.1000..00 * 2^(63) 256// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give 257// the integer part of w, n, as a floating-point number. 258// The result of this fms is fNfloat. 259 260 261LOCAL_OBJECT_START(exp_table_1) 262data8 0x408633ce8fb9f87e // smallest dbl overflow arg 263data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result 264data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi 265data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo 266// 267// Table 1 is 2^(index_1/128) where 268// index_1 goes from 0 to 15 269// 270data8 0x8000000000000000 , 0x00003FFF 271data8 0x80B1ED4FD999AB6C , 0x00003FFF 272data8 0x8164D1F3BC030773 , 0x00003FFF 273data8 0x8218AF4373FC25EC , 0x00003FFF 274data8 0x82CD8698AC2BA1D7 , 0x00003FFF 275data8 0x8383594EEFB6EE37 , 0x00003FFF 276data8 0x843A28C3ACDE4046 , 0x00003FFF 277data8 0x84F1F656379C1A29 , 0x00003FFF 278data8 0x85AAC367CC487B15 , 0x00003FFF 279data8 0x8664915B923FBA04 , 0x00003FFF 280data8 0x871F61969E8D1010 , 0x00003FFF 281data8 0x87DB357FF698D792 , 0x00003FFF 282data8 0x88980E8092DA8527 , 0x00003FFF 283data8 0x8955EE03618E5FDD , 0x00003FFF 284data8 0x8A14D575496EFD9A , 0x00003FFF 285data8 0x8AD4C6452C728924 , 0x00003FFF 286LOCAL_OBJECT_END(exp_table_1) 287 288// Table 2 is 2^(index_1/8) where 289// index_2 goes from 0 to 7 290LOCAL_OBJECT_START(exp_table_2) 291data8 0x8000000000000000 , 0x00003FFF 292data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF 293data8 0x9837F0518DB8A96F , 0x00003FFF 294data8 0xA5FED6A9B15138EA , 0x00003FFF 295data8 0xB504F333F9DE6484 , 0x00003FFF 296data8 0xC5672A115506DADD , 0x00003FFF 297data8 0xD744FCCAD69D6AF4 , 0x00003FFF 298data8 0xEAC0C6E7DD24392F , 0x00003FFF 299LOCAL_OBJECT_END(exp_table_2) 300 301LOCAL_OBJECT_START(exp_p_table) 302data8 0x3f8111116da21757 //P5 303data8 0x3fa55555d787761c //P4 304data8 0x3fc5555555555414 //P3 305data8 0x3fdffffffffffd6a //P2 306LOCAL_OBJECT_END(exp_p_table) 307 308LOCAL_OBJECT_START(cosh_p_table) 309data8 0x8FA02AC65BCBD5BC, 0x00003FE2 // A6 310data8 0xD00D00D1021D7370, 0x00003FEF // A4 311data8 0xAAAAAAAAAAAAAB80, 0x00003FFA // A2 312data8 0x93F27740C0C2F1CC, 0x00003FE9 // A5 313data8 0xB60B60B60B4FE884, 0x00003FF5 // A3 314data8 0x8000000000000000, 0x00003FFE // A1 315LOCAL_OBJECT_END(cosh_p_table) 316 317 318.section .text 319GLOBAL_IEEE754_ENTRY(cosh) 320 321{ .mlx 322 getf.exp rSignexp_x = f8 // Must recompute if x unorm 323 movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2 324} 325{ .mlx 326 addl rAD_TB1 = @ltoff(exp_table_1), gp 327 movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56) 328} 329;; 330 331{ .mfi 332 ld8 rAD_TB1 = [rAD_TB1] 333 fclass.m p6,p0 = f8,0x0b // Test for x=unorm 334 mov rExp_mask = 0x1ffff 335} 336{ .mfi 337 mov rExp_bias = 0xffff 338 fnorm.s1 fNormX = f8 339 mov rExp_2tom56 = 0xffff-56 340} 341;; 342 343// Form two constants we need 344// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128 345// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand 346 347{ .mfi 348 setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63 349 fclass.m p8,p0 = f8,0x07 // Test for x=0 350 nop.i 999 351} 352{ .mlx 353 setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56) 354 movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift 355} 356;; 357 358{ .mfi 359 ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16 360 fclass.m p10,p0 = f8,0x1e3 // Test for x=inf, nan, NaT 361 nop.i 0 362} 363{ .mfb 364 setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat 365 nop.f 0 366(p6) br.cond.spnt COSH_UNORM // Branch if x=unorm 367} 368;; 369 370COSH_COMMON: 371{ .mfi 372 ldfe fLn2_by_128_hi = [rAD_TB1],16 373 nop.f 0 374 nop.i 0 375} 376{ .mfb 377 setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63 378(p8) fma.d.s0 f8 = f1,f1,f0 // quick exit for x=0 379(p8) br.ret.spnt b0 380} 381;; 382 383{ .mfi 384 ldfe fLn2_by_128_lo = [rAD_TB1],16 385 nop.f 0 386 nop.i 0 387} 388{ .mfb 389 and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x 390(p10) fma.d.s0 f8 = f8,f8,f0 // Result if x=inf, nan, NaT 391(p10) br.ret.spnt b0 // quick exit for x=inf, nan, NaT 392} 393;; 394 395// After that last load rAD_TB1 points to the beginning of table 1 396{ .mfi 397 nop.m 0 398 fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D 399 sub rExp_x = rExp_x, rExp_bias // True exponent of x 400} 401;; 402 403{ .mfi 404 nop.m 0 405 fmerge.s fAbsX = f0, fNormX // Form |x| 406 nop.i 0 407} 408{ .mfb 409 cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2) 410 fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path 411(p7) br.cond.spnt COSH_SMALL // Branch if 0 < |x| < 2^-2 412} 413;; 414 415// W = X * Inv_log2_by_128 416// By adding 1.10...0*2^63 we shift and get round_int(W) in significand. 417// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing. 418 419{ .mfi 420 add rAD_P = 0x180, rAD_TB1 421 fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56 422 add rAD_TB2 = 0x100, rAD_TB1 423} 424;; 425 426// Divide arguments into the following categories: 427// Certain Safe - 0.25 <= |x| <= MAX_DBL_NORM_ARG 428// Possible Overflow p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG 429// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf 430// 431// If the input is really a double arg, then there will never be 432// "Possible Overflow" arguments. 433// 434 435{ .mfi 436 ldfpd fP5, fP4 = [rAD_P] ,16 437 fcmp.ge.s1 p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG 438 nop.i 0 439} 440;; 441 442// Nfloat = round_int(W) 443// The signficand of fW_2TO56_RSH contains the rounded integer part of W, 444// as a twos complement number in the lower bits (that is, it may be negative). 445// That twos complement number (called N) is put into rN. 446 447// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56 448// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat. 449// Thus, fNfloat contains the floating point version of N 450 451{ .mfi 452 ldfpd fP3, fP2 = [rAD_P] 453(p14) fcmp.gt.unc.s1 p14,p0 = fAbsX,fMAX_DBL_NORM_ARG 454 nop.i 0 455} 456{ .mfb 457 nop.m 0 458 fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF 459(p15) br.cond.spnt COSH_CERTAIN_OVERFLOW 460} 461;; 462 463{ .mfi 464 getf.sig rN = fW_2TO56_RSH 465 nop.f 0 466 mov rExp_bias_minus_1 = 0xfffe 467} 468;; 469 470// rIndex_1 has index_1 471// rIndex_2_16 has index_2 * 16 472// rBiased_M has M 473 474// rM has true M 475// r = x - Nfloat * ln2_by_128_hi 476// f = 1 - Nfloat * ln2_by_128_lo 477{ .mfi 478 and rIndex_1 = 0x0f, rN 479 fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX 480 shr rM = rN, 0x7 481} 482{ .mfi 483 and rIndex_2_16 = 0x70, rN 484 fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1 485 sub rN_neg = r0, rN 486} 487;; 488 489{ .mmi 490 and rIndex_1_neg = 0x0f, rN_neg 491 add rBiased_M = rExp_bias_minus_1, rM 492 shr rM_neg = rN_neg, 0x7 493} 494{ .mmi 495 and rIndex_2_16_neg = 0x70, rN_neg 496 add rAD_T2 = rAD_TB2, rIndex_2_16 497 shladd rAD_T1 = rIndex_1, 4, rAD_TB1 498} 499;; 500 501// rAD_T1 has address of T1 502// rAD_T2 has address if T2 503 504{ .mmi 505 setf.exp f2M = rBiased_M 506 ldfe fT2 = [rAD_T2] 507 nop.i 0 508} 509{ .mmi 510 add rBiased_M_neg = rExp_bias_minus_1, rM_neg 511 add rAD_T2_neg = rAD_TB2, rIndex_2_16_neg 512 shladd rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1 513} 514;; 515 516// Create Scale = 2^M 517// Load T1 and T2 518{ .mmi 519 ldfe fT1 = [rAD_T1] 520 nop.m 0 521 nop.i 0 522} 523{ .mmf 524 setf.exp f2M_neg = rBiased_M_neg 525 ldfe fT2_neg = [rAD_T2_neg] 526 fma.s1 fF_neg = fNfloat, fLn2_by_128_lo, f1 527} 528;; 529 530{ .mfi 531 nop.m 0 532 fma.s1 fRsq = fR, fR, f0 533 nop.i 0 534} 535{ .mfi 536 ldfe fT1_neg = [rAD_T1_neg] 537 fma.s1 fP54 = fR, fP5, fP4 538 nop.i 0 539} 540;; 541 542{ .mfi 543 nop.m 0 544 fma.s1 fP32 = fR, fP3, fP2 545 nop.i 0 546} 547{ .mfi 548 nop.m 0 549 fnma.s1 fP54_neg = fR, fP5, fP4 550 nop.i 0 551} 552;; 553 554{ .mfi 555 nop.m 0 556 fnma.s1 fP32_neg = fR, fP3, fP2 557 nop.i 0 558} 559;; 560 561{ .mfi 562 nop.m 0 563 fma.s1 fP5432 = fRsq, fP54, fP32 564 nop.i 0 565} 566{ .mfi 567 nop.m 0 568 fma.s1 fS2 = fF,fT2,f0 569 nop.i 0 570} 571;; 572 573{ .mfi 574 nop.m 0 575 fma.s1 fS1 = f2M,fT1,f0 576 nop.i 0 577} 578{ .mfi 579 nop.m 0 580 fma.s1 fP5432_neg = fRsq, fP54_neg, fP32_neg 581 nop.i 0 582} 583;; 584 585{ .mfi 586 nop.m 0 587 fma.s1 fS1_neg = f2M_neg,fT1_neg,f0 588 nop.i 0 589} 590{ .mfi 591 nop.m 0 592 fma.s1 fS2_neg = fF_neg,fT2_neg,f0 593 nop.i 0 594} 595;; 596 597{ .mfi 598 nop.m 0 599 fma.s1 fP = fRsq, fP5432, fR 600 nop.i 0 601} 602{ .mfi 603 nop.m 0 604 fma.s1 fS = fS1,fS2,f0 605 nop.i 0 606} 607;; 608 609{ .mfi 610 nop.m 0 611 fms.s1 fP_neg = fRsq, fP5432_neg, fR 612 nop.i 0 613} 614{ .mfi 615 nop.m 0 616 fma.s1 fS_neg = fS1_neg,fS2_neg,f0 617 nop.i 0 618} 619;; 620 621{ .mfb 622 nop.m 0 623 fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact 624(p14) br.cond.spnt COSH_POSSIBLE_OVERFLOW 625} 626;; 627 628{ .mfi 629 nop.m 0 630 fma.s1 fExp = fS, fP, fS 631 nop.i 0 632} 633{ .mfi 634 nop.m 0 635 fma.s1 fExp_neg = fS_neg, fP_neg, fS_neg 636 nop.i 0 637} 638;; 639 640{ .mfb 641 nop.m 0 642 fma.d.s0 f8 = fExp, f1, fExp_neg 643 br.ret.sptk b0 // Normal path exit 644} 645;; 646 647// Here if 0 < |x| < 0.25 648COSH_SMALL: 649{ .mmf 650 add rAD_T1 = 0x1a0, rAD_TB1 651 add rAD_T2 = 0x1d0, rAD_TB1 652} 653;; 654 655{ .mmf 656 ldfe fA6 = [rAD_T1],16 657 ldfe fA5 = [rAD_T2],16 658 nop.f 0 659} 660;; 661 662{ .mmi 663 ldfe fA4 = [rAD_T1],16 664 ldfe fA3 = [rAD_T2],16 665 nop.i 0 666} 667;; 668 669{ .mmi 670 ldfe fA2 = [rAD_T1],16 671 ldfe fA1 = [rAD_T2],16 672 nop.i 0 673} 674;; 675 676{ .mfi 677 nop.m 0 678 fma.s1 fX4 = fXsq, fXsq, f0 679 nop.i 0 680} 681;; 682 683{ .mfi 684 nop.m 0 685 fma.s1 fA65 = fXsq, fA6, fA5 686 nop.i 0 687} 688{ .mfi 689 nop.m 0 690 fma.s1 fA43 = fXsq, fA4, fA3 691 nop.i 0 692} 693;; 694 695{ .mfi 696 nop.m 0 697 fma.s1 fA21 = fXsq, fA2, fA1 698 nop.i 0 699} 700;; 701 702{ .mfi 703 nop.m 0 704 fma.s1 fA6543 = fX4, fA65, fA43 705 nop.i 0 706} 707;; 708 709{ .mfi 710 nop.m 0 711 fma.s1 fA654321 = fX4, fA6543, fA21 712 nop.i 0 713} 714;; 715 716// Dummy multiply to generate inexact 717{ .mfi 718 nop.m 0 719 fmpy.s0 fTmp = fA6, fA6 720 nop.i 0 721} 722{ .mfb 723 nop.m 0 724 fma.d.s0 f8 = fA654321, fXsq, f1 725 br.ret.sptk b0 // Exit if 0 < |x| < 0.25 726} 727;; 728 729 730COSH_POSSIBLE_OVERFLOW: 731 732// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG 733// This cannot happen if input is a double, only if input higher precision. 734// Overflow is a possibility, not a certainty. 735 736// Recompute result using status field 2 with user's rounding mode, 737// and wre set. If result is larger than largest double, then we have 738// overflow 739 740{ .mfi 741 mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp 742 fsetc.s2 0x7F,0x42 // Get user's round mode, set wre 743 nop.i 0 744} 745;; 746 747{ .mfi 748 setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp 749 fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set 750 nop.i 0 751} 752;; 753 754{ .mfi 755 nop.m 0 756 fsetc.s2 0x7F,0x40 // Turn off wre in sf2 757 nop.i 0 758} 759;; 760 761{ .mfi 762 nop.m 0 763 fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow 764 nop.i 0 765} 766;; 767 768{ .mfb 769 nop.m 0 770 nop.f 0 771(p6) br.cond.spnt COSH_CERTAIN_OVERFLOW // Branch if overflow 772} 773;; 774 775{ .mfb 776 nop.m 0 777 fma.d.s0 f8 = fS, fP, fS 778 br.ret.sptk b0 // Exit if really no overflow 779} 780;; 781 782COSH_CERTAIN_OVERFLOW: 783{ .mmi 784 sub rTmp = rExp_mask, r0, 1 785;; 786 setf.exp fTmp = rTmp 787 nop.i 0 788} 789;; 790 791{ .mfi 792 alloc r32=ar.pfs,1,4,4,0 793 fmerge.s FR_X = f8,f8 794 nop.i 0 795} 796{ .mfb 797 mov GR_Parameter_TAG = 64 798 fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result 799 br.cond.sptk __libm_error_region 800} 801;; 802 803// Here if x unorm 804COSH_UNORM: 805{ .mfb 806 getf.exp rSignexp_x = fNormX // Must recompute if x unorm 807 fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag 808 br.cond.sptk COSH_COMMON 809} 810;; 811 812GLOBAL_IEEE754_END(cosh) 813libm_alias_double_other (__cosh, cosh) 814 815 816LOCAL_LIBM_ENTRY(__libm_error_region) 817.prologue 818{ .mfi 819 add GR_Parameter_Y=-32,sp // Parameter 2 value 820 nop.f 0 821.save ar.pfs,GR_SAVE_PFS 822 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 823} 824{ .mfi 825.fframe 64 826 add sp=-64,sp // Create new stack 827 nop.f 0 828 mov GR_SAVE_GP=gp // Save gp 829};; 830{ .mmi 831 stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack 832 add GR_Parameter_X = 16,sp // Parameter 1 address 833.save b0, GR_SAVE_B0 834 mov GR_SAVE_B0=b0 // Save b0 835};; 836.body 837{ .mib 838 stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack 839 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 840 nop.b 0 841} 842{ .mib 843 stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack 844 add GR_Parameter_Y = -16,GR_Parameter_Y 845 br.call.sptk b0=__libm_error_support# // Call error handling function 846};; 847{ .mmi 848 add GR_Parameter_RESULT = 48,sp 849 nop.m 0 850 nop.i 0 851};; 852{ .mmi 853 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack 854.restore sp 855 add sp = 64,sp // Restore stack pointer 856 mov b0 = GR_SAVE_B0 // Restore return address 857};; 858{ .mib 859 mov gp = GR_SAVE_GP // Restore gp 860 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 861 br.ret.sptk b0 // Return 862};; 863 864LOCAL_LIBM_END(__libm_error_region) 865.type __libm_error_support#,@function 866.global __libm_error_support# 867