1.file "sinhf.s" 2 3 4// Copyright (c) 2000 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38 39// History 40//********************************************************************* 41// 02/02/00 Initial version 42// 04/04/00 Unwind support added 43// 08/15/00 Bundle added after call to __libm_error_support to properly 44// set [the previously overwritten] GR_Parameter_RESULT. 45// 10/12/00 Update to set denormal operand and underflow flags 46// 01/22/01 Fixed to set inexact flag for small args. 47// 05/02/01 Reworked to improve speed of all paths 48// 05/20/02 Cleaned up namespace and sf0 syntax 49// 11/20/02 Improved algorithm based on expf 50// 03/31/05 Reformatted delimiters between data tables 51// 52// API 53//********************************************************************* 54// float sinhf(float) 55// 56// Overview of operation 57//********************************************************************* 58// Case 1: 0 < |x| < 2^-60 59// Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding 60// 61// Case 2: 2^-60 < |x| < 0.25 62// Evaluate sinh(x) by a 9th order polynomial 63// Care is take for the order of multiplication; and A2 is not exactly 1/5!, 64// A3 is not exactly 1/7!, etc. 65// sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9) 66// 67// Case 3: 0.25 < |x| < 89.41598 68// Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2. 69// The algorithm for exp is described as below. There are a number of 70// economies from evaluating both exp(x) and exp(-x). Although we 71// are evaluating both quantities, only where the quantities diverge do we 72// duplicate the computations. The basic algorithm for exp(x) is described 73// below. 74// 75// Take the input x. w is "how many log2/128 in x?" 76// w = x * 64/log2 77// NJ = int(w) 78// x = NJ*log2/64 + R 79 80// NJ = 64*n + j 81// x = n*log2 + (log2/64)*j + R 82// 83// So, exp(x) = 2^n * 2^(j/64)* exp(R) 84// 85// T = 2^n * 2^(j/64) 86// Construct 2^n 87// Get 2^(j/64) table 88// actually all the entries of 2^(j/64) table are stored in DP and 89// with exponent bits set to 0 -> multiplication on 2^n can be 90// performed by doing logical "or" operation with bits presenting 2^n 91 92// exp(R) = 1 + (exp(R) - 1) 93// P = exp(R) - 1 approximated by Taylor series of 3rd degree 94// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2 95// 96 97// The final result is reconstructed as follows 98// exp(x) = T + T*P 99 100// Special values 101//********************************************************************* 102// sinhf(+0) = +0 103// sinhf(-0) = -0 104 105// sinhf(+qnan) = +qnan 106// sinhf(-qnan) = -qnan 107// sinhf(+snan) = +qnan 108// sinhf(-snan) = -qnan 109 110// sinhf(-inf) = -inf 111// sinhf(+inf) = +inf 112 113// Overflow and Underflow 114//********************************************************************* 115// sinhf(x) = largest single normal when 116// x = 89.41598 = 0x42b2d4fc 117// 118// Underflow is handled as described in case 1 above 119 120// Registers used 121//********************************************************************* 122// Floating Point registers used: 123// f8 input, output 124// f6,f7, f9 -> f15, f32 -> f45 125 126// General registers used: 127// r2, r3, r16 -> r38 128 129// Predicate registers used: 130// p6 -> p15 131 132// Assembly macros 133//********************************************************************* 134// integer registers used 135// scratch 136rNJ = r2 137rNJ_neg = r3 138 139rJ_neg = r16 140rN_neg = r17 141rSignexp_x = r18 142rExp_x = r18 143rExp_mask = r19 144rExp_bias = r20 145rAd1 = r21 146rAd2 = r22 147rJ = r23 148rN = r24 149rTblAddr = r25 150rA3 = r26 151rExpHalf = r27 152rLn2Div64 = r28 153rGt_ln = r29 154r17ones_m1 = r29 155rRightShifter = r30 156rJ_mask = r30 157r64DivLn2 = r31 158rN_mask = r31 159// stacked 160GR_SAVE_PFS = r32 161GR_SAVE_B0 = r33 162GR_SAVE_GP = r34 163GR_Parameter_X = r35 164GR_Parameter_Y = r36 165GR_Parameter_RESULT = r37 166GR_Parameter_TAG = r38 167 168// floating point registers used 169FR_X = f10 170FR_Y = f1 171FR_RESULT = f8 172// scratch 173fRightShifter = f6 174f64DivLn2 = f7 175fNormX = f9 176fNint = f10 177fN = f11 178fR = f12 179fLn2Div64 = f13 180fA2 = f14 181fA3 = f15 182// stacked 183fP = f32 184fT = f33 185fMIN_SGL_OFLOW_ARG = f34 186fMAX_SGL_NORM_ARG = f35 187fRSqr = f36 188fA1 = f37 189fA21 = f37 190fA4 = f38 191fA43 = f38 192fA4321 = f38 193fX4 = f39 194fTmp = f39 195fGt_pln = f39 196fWre_urm_f8 = f40 197fXsq = f40 198fP_neg = f41 199fX3 = f41 200fT_neg = f42 201fExp = f43 202fExp_neg = f44 203fAbsX = f45 204 205 206RODATA 207.align 16 208 209LOCAL_OBJECT_START(_sinhf_table) 210data4 0x42b2d4fd // Smallest single arg to overflow single result 211data4 0x42b2d4fc // Largest single arg to give normal single result 212data4 0x00000000 // pad 213data4 0x00000000 // pad 214// 215// 2^(j/64) table, j goes from 0 to 63 216data8 0x0000000000000000 // 2^(0/64) 217data8 0x00002C9A3E778061 // 2^(1/64) 218data8 0x000059B0D3158574 // 2^(2/64) 219data8 0x0000874518759BC8 // 2^(3/64) 220data8 0x0000B5586CF9890F // 2^(4/64) 221data8 0x0000E3EC32D3D1A2 // 2^(5/64) 222data8 0x00011301D0125B51 // 2^(6/64) 223data8 0x0001429AAEA92DE0 // 2^(7/64) 224data8 0x000172B83C7D517B // 2^(8/64) 225data8 0x0001A35BEB6FCB75 // 2^(9/64) 226data8 0x0001D4873168B9AA // 2^(10/64) 227data8 0x0002063B88628CD6 // 2^(11/64) 228data8 0x0002387A6E756238 // 2^(12/64) 229data8 0x00026B4565E27CDD // 2^(13/64) 230data8 0x00029E9DF51FDEE1 // 2^(14/64) 231data8 0x0002D285A6E4030B // 2^(15/64) 232data8 0x000306FE0A31B715 // 2^(16/64) 233data8 0x00033C08B26416FF // 2^(17/64) 234data8 0x000371A7373AA9CB // 2^(18/64) 235data8 0x0003A7DB34E59FF7 // 2^(19/64) 236data8 0x0003DEA64C123422 // 2^(20/64) 237data8 0x0004160A21F72E2A // 2^(21/64) 238data8 0x00044E086061892D // 2^(22/64) 239data8 0x000486A2B5C13CD0 // 2^(23/64) 240data8 0x0004BFDAD5362A27 // 2^(24/64) 241data8 0x0004F9B2769D2CA7 // 2^(25/64) 242data8 0x0005342B569D4F82 // 2^(26/64) 243data8 0x00056F4736B527DA // 2^(27/64) 244data8 0x0005AB07DD485429 // 2^(28/64) 245data8 0x0005E76F15AD2148 // 2^(29/64) 246data8 0x0006247EB03A5585 // 2^(30/64) 247data8 0x0006623882552225 // 2^(31/64) 248data8 0x0006A09E667F3BCD // 2^(32/64) 249data8 0x0006DFB23C651A2F // 2^(33/64) 250data8 0x00071F75E8EC5F74 // 2^(34/64) 251data8 0x00075FEB564267C9 // 2^(35/64) 252data8 0x0007A11473EB0187 // 2^(36/64) 253data8 0x0007E2F336CF4E62 // 2^(37/64) 254data8 0x00082589994CCE13 // 2^(38/64) 255data8 0x000868D99B4492ED // 2^(39/64) 256data8 0x0008ACE5422AA0DB // 2^(40/64) 257data8 0x0008F1AE99157736 // 2^(41/64) 258data8 0x00093737B0CDC5E5 // 2^(42/64) 259data8 0x00097D829FDE4E50 // 2^(43/64) 260data8 0x0009C49182A3F090 // 2^(44/64) 261data8 0x000A0C667B5DE565 // 2^(45/64) 262data8 0x000A5503B23E255D // 2^(46/64) 263data8 0x000A9E6B5579FDBF // 2^(47/64) 264data8 0x000AE89F995AD3AD // 2^(48/64) 265data8 0x000B33A2B84F15FB // 2^(49/64) 266data8 0x000B7F76F2FB5E47 // 2^(50/64) 267data8 0x000BCC1E904BC1D2 // 2^(51/64) 268data8 0x000C199BDD85529C // 2^(52/64) 269data8 0x000C67F12E57D14B // 2^(53/64) 270data8 0x000CB720DCEF9069 // 2^(54/64) 271data8 0x000D072D4A07897C // 2^(55/64) 272data8 0x000D5818DCFBA487 // 2^(56/64) 273data8 0x000DA9E603DB3285 // 2^(57/64) 274data8 0x000DFC97337B9B5F // 2^(58/64) 275data8 0x000E502EE78B3FF6 // 2^(59/64) 276data8 0x000EA4AFA2A490DA // 2^(60/64) 277data8 0x000EFA1BEE615A27 // 2^(61/64) 278data8 0x000F50765B6E4540 // 2^(62/64) 279data8 0x000FA7C1819E90D8 // 2^(63/64) 280LOCAL_OBJECT_END(_sinhf_table) 281 282LOCAL_OBJECT_START(sinh_p_table) 283data8 0x3ec749d84bc96d7d // A4 284data8 0x3f2a0168d09557cf // A3 285data8 0x3f811111326ed15a // A2 286data8 0x3fc55555552ed1e2 // A1 287LOCAL_OBJECT_END(sinh_p_table) 288 289 290.section .text 291GLOBAL_IEEE754_ENTRY(sinhf) 292 293{ .mlx 294 getf.exp rSignexp_x = f8 // Must recompute if x unorm 295 movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2) 296} 297{ .mlx 298 addl rTblAddr = @ltoff(_sinhf_table),gp 299 movl rRightShifter = 0x43E8000000000000 // DP Right Shifter 300} 301;; 302 303{ .mfi 304 // point to the beginning of the table 305 ld8 rTblAddr = [rTblAddr] 306 fclass.m p6, p0 = f8, 0x0b // Test for x=unorm 307 addl rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP 308} 309{ .mfi 310 nop.m 0 311 fnorm.s1 fNormX = f8 // normalized x 312 addl rExpHalf = 0xFFFE, r0 // exponent of 1/2 313} 314;; 315 316{ .mfi 317 setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg 318 fclass.m p15, p0 = f8, 0x1e3 // test for NaT,NaN,Inf 319 nop.i 0 320} 321{ .mlx 322 // load Right Shifter to FP reg 323 setf.d fRightShifter = rRightShifter 324 movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR 325} 326;; 327 328{ .mfi 329 mov rExp_mask = 0x1ffff 330 fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0 331 shl rA3 = rA3, 12 // 0x3E2AA000, approx to 1.0/6.0 in SP 332} 333{ .mfb 334 nop.m 0 335 nop.f 0 336(p6) br.cond.spnt SINH_UNORM // Branch if x=unorm 337} 338;; 339 340SINH_COMMON: 341{ .mfi 342 setf.exp fA2 = rExpHalf // load A2 to FP reg 343 nop.f 0 344 mov rExp_bias = 0xffff 345} 346{ .mfb 347 setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg 348(p15) fma.s.s0 f8 = f8, f1, f0 // result if x = NaT,NaN,Inf 349(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,Inf 350} 351;; 352 353{ .mfi 354 // min overflow and max normal threshold 355 ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8 356 nop.f 0 357 and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x 358} 359{ .mfb 360 setf.s fA3 = rA3 // load A3 to FP reg 361 nop.f 0 362(p13) br.ret.spnt b0 // exit here if x=0.0, return x 363} 364;; 365 366{ .mfi 367 sub rExp_x = rExp_x, rExp_bias // True exponent of x 368 fmerge.s fAbsX = f0, fNormX // Form |x| 369 nop.i 0 370} 371;; 372 373{ .mfi 374 nop.m 0 375 // x*(64/ln(2)) + Right Shifter 376 fma.s1 fNint = fNormX, f64DivLn2, fRightShifter 377 add rTblAddr = 8, rTblAddr 378} 379{ .mfb 380 cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2) 381 fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path 382(p7) br.cond.spnt SINH_SMALL // Branch if 0 < |x| < 2^-2 383} 384;; 385 386{ .mfi 387 nop.m 0 388 // check for overflow 389 fcmp.ge.s1 p12, p13 = fAbsX, fMIN_SGL_OFLOW_ARG 390 mov rJ_mask = 0x3f // 6-bit mask for J 391} 392;; 393 394{ .mfb 395 nop.m 0 396 fms.s1 fN = fNint, f1, fRightShifter // n in FP register 397 // branch out if overflow 398(p12) br.cond.spnt SINH_CERTAIN_OVERFLOW 399} 400;; 401 402{ .mfi 403 getf.sig rNJ = fNint // bits of n, j 404 // check for possible overflow 405 fcmp.gt.s1 p13, p0 = fAbsX, fMAX_SGL_NORM_ARG 406 nop.i 0 407} 408;; 409 410{ .mfi 411 addl rN = 0xFFBF - 63, rNJ // biased and shifted n-1,j 412 fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64 413 and rJ = rJ_mask, rNJ // bits of j 414} 415{ .mfi 416 sub rNJ_neg = r0, rNJ // bits of n, j for -x 417 nop.f 0 418 andcm rN_mask = -1, rJ_mask // 0xff...fc0 to mask N 419} 420;; 421 422{ .mfi 423 shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table 424 nop.f 0 425 and rN = rN_mask, rN // biased, shifted n-1 426} 427{ .mfi 428 addl rN_neg = 0xFFBF - 63, rNJ_neg // -x biased, shifted n-1,j 429 nop.f 0 430 and rJ_neg = rJ_mask, rNJ_neg // bits of j for -x 431} 432;; 433 434{ .mfi 435 ld8 rJ = [rJ] // Table value 436 nop.f 0 437 shl rN = rN, 46 // 2^(n-1) bits in DP format 438} 439{ .mfi 440 shladd rJ_neg = rJ_neg, 3, rTblAddr // addr in 2^(j/64) table -x 441 nop.f 0 442 and rN_neg = rN_mask, rN_neg // biased, shifted n-1 for -x 443} 444;; 445 446{ .mfi 447 ld8 rJ_neg = [rJ_neg] // Table value for -x 448 nop.f 0 449 shl rN_neg = rN_neg, 46 // 2^(n-1) bits in DP format for -x 450} 451;; 452 453{ .mfi 454 or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format 455 nop.f 0 456 nop.i 0 457} 458;; 459 460{ .mmf 461 setf.d fT = rN // 2^(n-1) * 2^(j/64) 462 or rN_neg = rN_neg, rJ_neg // -x bits of 2^n * 2^(j/64) in DP 463 fma.s1 fRSqr = fR, fR, f0 // R^2 464} 465;; 466 467{ .mfi 468 setf.d fT_neg = rN_neg // 2^(n-1) * 2^(j/64) for -x 469 fma.s1 fP = fA3, fR, fA2 // A3*R + A2 470 nop.i 0 471} 472{ .mfi 473 nop.m 0 474 fnma.s1 fP_neg = fA3, fR, fA2 // A3*R + A2 for -x 475 nop.i 0 476} 477;; 478 479{ .mfi 480 nop.m 0 481 fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R 482 nop.i 0 483} 484{ .mfi 485 nop.m 0 486 fms.s1 fP_neg = fP_neg, fRSqr, fR // P = (A3*R + A2)*R^2 + R, -x 487 nop.i 0 488} 489;; 490 491{ .mfi 492 nop.m 0 493 fmpy.s0 fTmp = fLn2Div64, fLn2Div64 // Force inexact 494 nop.i 0 495} 496;; 497 498{ .mfi 499 nop.m 0 500 fma.s1 fExp = fP, fT, fT // exp(x)/2 501 nop.i 0 502} 503{ .mfb 504 nop.m 0 505 fma.s1 fExp_neg = fP_neg, fT_neg, fT_neg // exp(-x)/2 506 // branch out if possible overflow result 507(p13) br.cond.spnt SINH_POSSIBLE_OVERFLOW 508} 509;; 510 511{ .mfb 512 nop.m 0 513 // final result in the absence of overflow 514 fms.s.s0 f8 = fExp, f1, fExp_neg // result = (exp(x)-exp(-x))/2 515 // exit here in the absence of overflow 516 br.ret.sptk b0 // Exit main path, 0.25 <= |x| < 89.41598 517} 518;; 519 520// Here if 0 < |x| < 0.25. Evaluate 9th order polynomial. 521SINH_SMALL: 522{ .mfi 523 add rAd1 = 0x200, rTblAddr 524 fcmp.lt.s1 p7, p8 = fNormX, f0 // Test sign of x 525 cmp.gt p6, p0 = -60, rExp_x // Test |x| < 2^(-60) 526} 527{ .mfi 528 add rAd2 = 0x210, rTblAddr 529 nop.f 0 530 nop.i 0 531} 532;; 533 534{ .mmb 535 ldfpd fA4, fA3 = [rAd1] 536 ldfpd fA2, fA1 = [rAd2] 537(p6) br.cond.spnt SINH_VERY_SMALL // Branch if |x| < 2^(-60) 538} 539;; 540 541{ .mfi 542 nop.m 0 543 fma.s1 fX3 = fXsq, fNormX, f0 544 nop.i 0 545} 546{ .mfi 547 nop.m 0 548 fma.s1 fX4 = fXsq, fXsq, f0 549 nop.i 0 550} 551;; 552 553{ .mfi 554 nop.m 0 555 fma.s1 fA43 = fXsq, fA4, fA3 556 nop.i 0 557} 558{ .mfi 559 nop.m 0 560 fma.s1 fA21 = fXsq, fA2, fA1 561 nop.i 0 562} 563;; 564 565{ .mfi 566 nop.m 0 567 fma.s1 fA4321 = fX4, fA43, fA21 568 nop.i 0 569} 570;; 571 572// Dummy multiply to generate inexact 573{ .mfi 574 nop.m 0 575 fmpy.s0 fTmp = fA4, fA4 576 nop.i 0 577} 578{ .mfb 579 nop.m 0 580 fma.s.s0 f8 = fA4321, fX3, fNormX 581 br.ret.sptk b0 // Exit if 2^-60 < |x| < 0.25 582} 583;; 584 585SINH_VERY_SMALL: 586// Here if 0 < |x| < 2^-60 587// Compute result by x + sgn(x)*x^2 to get properly rounded result 588.pred.rel "mutex",p7,p8 589{ .mfi 590 nop.m 0 591(p7) fnma.s.s0 f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2 592 nop.i 0 593} 594{ .mfb 595 nop.m 0 596(p8) fma.s.s0 f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2 597 br.ret.sptk b0 // Exit if |x| < 2^-60 598} 599;; 600 601SINH_POSSIBLE_OVERFLOW: 602 603// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG 604// This cannot happen if input is a single, only if input higher precision. 605// Overflow is a possibility, not a certainty. 606 607// Recompute result using status field 2 with user's rounding mode, 608// and wre set. If result is larger than largest single, then we have 609// overflow 610 611{ .mfi 612 mov rGt_ln = 0x1007f // Exponent for largest single + 1 ulp 613 fsetc.s2 0x7F,0x42 // Get user's round mode, set wre 614 nop.i 0 615} 616;; 617 618{ .mfi 619 setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp 620 fma.s.s2 fWre_urm_f8 = fP, fT, fT // Result with wre set 621 nop.i 0 622} 623;; 624 625{ .mfi 626 nop.m 0 627 fsetc.s2 0x7F,0x40 // Turn off wre in sf2 628 nop.i 0 629} 630;; 631 632{ .mfi 633 nop.m 0 634 fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow 635 nop.i 0 636} 637;; 638 639{ .mfb 640 nop.m 0 641 nop.f 0 642(p6) br.cond.spnt SINH_CERTAIN_OVERFLOW // Branch if overflow 643} 644;; 645 646{ .mfb 647 nop.m 0 648 fma.s.s0 f8 = fP, fT, fT 649 br.ret.sptk b0 // Exit if really no overflow 650} 651;; 652 653// here if overflow 654SINH_CERTAIN_OVERFLOW: 655{ .mfi 656 addl r17ones_m1 = 0x1FFFE, r0 657 fcmp.lt.s1 p6, p7 = fNormX, f0 // Test for x < 0 658 nop.i 0 659} 660;; 661 662{ .mmf 663 alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers 664 setf.exp fTmp = r17ones_m1 665 fmerge.s FR_X = f8,f8 666} 667;; 668 669{ .mfi 670 mov GR_Parameter_TAG = 128 671(p6) fnma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and -INF result 672 nop.i 0 673} 674{ .mfb 675 nop.m 0 676(p7) fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result 677 br.cond.sptk __libm_error_region 678} 679;; 680 681// Here if x unorm 682SINH_UNORM: 683{ .mfb 684 getf.exp rSignexp_x = fNormX // Must recompute if x unorm 685 fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag 686 br.cond.sptk SINH_COMMON // Return to main path 687} 688;; 689 690GLOBAL_IEEE754_END(sinhf) 691libm_alias_float_other (__sinh, sinh) 692 693 694LOCAL_LIBM_ENTRY(__libm_error_region) 695.prologue 696{ .mfi 697 add GR_Parameter_Y=-32,sp // Parameter 2 value 698 nop.f 0 699.save ar.pfs,GR_SAVE_PFS 700 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 701} 702{ .mfi 703.fframe 64 704 add sp=-64,sp // Create new stack 705 nop.f 0 706 mov GR_SAVE_GP=gp // Save gp 707};; 708{ .mmi 709 stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack 710 add GR_Parameter_X = 16,sp // Parameter 1 address 711.save b0, GR_SAVE_B0 712 mov GR_SAVE_B0=b0 // Save b0 713};; 714.body 715{ .mfi 716 stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack 717 nop.f 0 718 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 719} 720{ .mib 721 stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack 722 add GR_Parameter_Y = -16,GR_Parameter_Y 723 br.call.sptk b0=__libm_error_support# // Call error handling function 724};; 725 726{ .mmi 727 add GR_Parameter_RESULT = 48,sp 728 nop.m 0 729 nop.i 0 730};; 731 732{ .mmi 733 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack 734.restore sp 735 add sp = 64,sp // Restore stack pointer 736 mov b0 = GR_SAVE_B0 // Restore return address 737};; 738{ .mib 739 mov gp = GR_SAVE_GP // Restore gp 740 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 741 br.ret.sptk b0 // Return 742};; 743 744LOCAL_LIBM_END(__libm_error_region) 745 746 747.type __libm_error_support#,@function 748.global __libm_error_support# 749