1.file "scalbl.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 02/02/00 Initial version 42// 01/26/01 Scalb completely reworked and now standalone version 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 02/10/03 Reordered header: .section, .global, .proc, .align 45// 08/06/03 Improved performance 46// 47// API 48//============================================================== 49// long double = scalbl (long double x, long double n) 50// input floating point f8 and floating point f9 51// output floating point f8 52// 53// int_type = 0 if int is 32 bits 54// int_type = 1 if int is 64 bits 55// 56// Returns x* 2**n using an fma and detects overflow 57// and underflow. 58// 59// 60// Strategy: 61// Compute biased exponent of result exp_Result = N + exp_X 62// Break into ranges: 63// exp_Result > 0x13ffe -> Certain overflow 64// exp_Result = 0x13ffe -> Possible overflow 65// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path) 66// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow 67// exp_Result < 0x0c001 - 63 -> Certain underflow 68 69FR_Big = f6 70FR_NBig = f7 71FR_Floating_X = f8 72FR_Result = f8 73FR_Floating_N = f9 74FR_Result2 = f9 75FR_Result3 = f10 76FR_Norm_X = f11 77FR_Two_N = f12 78FR_N_float_int = f13 79FR_Norm_N = f14 80 81GR_neg_ov_limit= r14 82GR_big_exp = r14 83GR_N_Biased = r15 84GR_Big = r16 85GR_exp_Result = r18 86GR_pos_ov_limit= r19 87GR_exp_sure_ou = r19 88GR_Bias = r20 89GR_N_as_int = r21 90GR_signexp_X = r22 91GR_exp_X = r23 92GR_exp_mask = r24 93GR_max_exp = r25 94GR_min_exp = r26 95GR_min_den_exp = r27 96GR_Scratch = r28 97GR_signexp_N = r29 98GR_exp_N = r30 99 100GR_SAVE_B0 = r32 101GR_SAVE_GP = r33 102GR_SAVE_PFS = r34 103GR_Parameter_X = r35 104GR_Parameter_Y = r36 105GR_Parameter_RESULT = r37 106GR_Tag = r38 107 108.section .text 109GLOBAL_IEEE754_ENTRY(scalbl) 110 111// 112// Is x NAN, INF, ZERO, +-? 113// Build the exponent Bias 114// 115{ .mfi 116 getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n 117 fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero 118 mov GR_Bias = 0x0ffff 119} 120{ .mfi 121 mov GR_Big = 35000 // If N this big then certain overflow 122 fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand 123 nop.i 0 124} 125;; 126 127{ .mfi 128 getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x 129 fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm 130 nop.i 0 131} 132// 133// Normalize n 134// 135{ .mfi 136 mov GR_exp_mask = 0x1ffff // Exponent mask 137 fnorm.s1 FR_Norm_N = FR_Floating_N 138 nop.i 0 139} 140;; 141 142// 143// Is n NAN, INF, ZERO, +-? 144// 145{ .mfi 146 mov GR_big_exp = 0x1003e // Exponent at which n is integer 147 fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero 148 mov GR_max_exp = 0x13ffe // Exponent of maximum long double 149} 150// 151// Normalize x 152// 153{ .mfb 154 nop.m 0 155 fnorm.s1 FR_Norm_X = FR_Floating_X 156(p7) br.cond.spnt SCALBL_N_UNORM // Branch if n=unorm 157} 158;; 159 160SCALBL_COMMON1: 161// Main path continues. Also return here from u=unorm path. 162// Handle special cases if x = Nan, Inf, Zero 163{ .mfb 164 nop.m 0 165 fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative 166(p6) br.cond.spnt SCALBL_NAN_INF_ZERO 167} 168;; 169 170// Handle special cases if n = Nan, Inf, Zero 171{ .mfi 172 getf.sig GR_N_as_int = FR_N_float_int // Get n from significand 173 fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm 174 mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under 175} 176{ .mfb 177 mov GR_min_exp = 0x0c001 // Exponent of minimum long double 178 fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer 179(p9) br.cond.spnt SCALBL_NAN_INF_ZERO 180} 181;; 182 183{ .mmi 184 and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N 185(p7) sub GR_Big = r0, GR_Big // Limit for N 186 nop.i 0 187} 188;; 189 190{ .mib 191 cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer? 192 cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under? 193(p8) br.cond.spnt SCALBL_X_UNORM // Branch if x=unorm 194} 195;; 196 197SCALBL_COMMON2: 198// Main path continues. Also return here from x=unorm path. 199// Create biased exponent for 2**N 200{ .mmi 201(p6) mov GR_N_as_int = GR_Big // Limit N 202;; 203 add GR_N_Biased = GR_Bias,GR_N_as_int 204 nop.i 0 205} 206;; 207 208{ .mfi 209 setf.exp FR_Two_N = GR_N_Biased // Form 2**N 210(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer 211 and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X 212} 213;; 214 215// 216// Compute biased result exponent 217// Branch if N is not an integer 218// 219{ .mib 220 add GR_exp_Result = GR_exp_X, GR_N_as_int 221 mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble 222(p9) br.cond.spnt SCALBL_N_NOT_INT 223} 224;; 225 226// 227// Raise Denormal operand flag with compare 228// Do final operation 229// 230{ .mfi 231 cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow 232 fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm 233 cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow 234} 235{ .mfb 236 nop.m 0 237 fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 238(p9) br.cond.spnt SCALBL_UNDERFLOW // Branch if certain underflow 239} 240;; 241 242{ .mib 243(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow 244(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow 245(p7) br.ret.sptk b0 // Return from main path 246} 247;; 248 249{ .bbb 250(p6) br.cond.spnt SCALBL_OVERFLOW // Branch if certain overflow 251(p8) br.cond.spnt SCALBL_POSSIBLE_OVERFLOW // Branch if possible overflow 252(p9) br.cond.spnt SCALBL_POSSIBLE_UNDERFLOW // Branch if possible underflow 253} 254;; 255 256// Here if possible underflow. 257// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001 258SCALBL_POSSIBLE_UNDERFLOW: 259// 260// Here if possible overflow. 261// Resulting exponent: 0x13ffe = exp_Result 262SCALBL_POSSIBLE_OVERFLOW: 263 264// Set up necessary status fields 265// 266// S0 user supplied status 267// S2 user supplied status + WRE + TD (Overflows) 268// S3 user supplied status + FZ + TD (Underflows) 269// 270{ .mfi 271 mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow 272 fsetc.s3 0x7F,0x41 273 nop.i 0 274} 275{ .mfi 276 mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow 277 fsetc.s2 0x7F,0x42 278 nop.i 0 279} 280;; 281 282// 283// Do final operation with s2 and s3 284// 285{ .mfi 286 setf.exp FR_NBig = GR_neg_ov_limit 287 fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 288 nop.i 0 289} 290{ .mfi 291 setf.exp FR_Big = GR_pos_ov_limit 292 fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 293 nop.i 0 294} 295;; 296 297// Check for overflow or underflow. 298// Restore s3 299// Restore s2 300// 301{ .mfi 302 nop.m 0 303 fsetc.s3 0x7F,0x40 304 nop.i 0 305} 306{ .mfi 307 nop.m 0 308 fsetc.s2 0x7F,0x40 309 nop.i 0 310} 311;; 312 313// 314// Is the result zero? 315// 316{ .mfi 317 nop.m 0 318 fclass.m p6, p0 = FR_Result3, 0x007 319 nop.i 0 320} 321{ .mfi 322 nop.m 0 323 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big 324 nop.i 0 325} 326;; 327 328// 329// Detect masked underflow - Tiny + Inexact Only 330// 331{ .mfi 332 nop.m 0 333(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 334 nop.i 0 335} 336;; 337 338// 339// Is result bigger the allowed range? 340// Branch out for underflow 341// 342{ .mfb 343 nop.m 0 344(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig 345(p6) br.cond.spnt SCALBL_UNDERFLOW 346} 347;; 348 349// 350// Branch out for overflow 351// 352{ .bbb 353(p7) br.cond.spnt SCALBL_OVERFLOW 354(p9) br.cond.spnt SCALBL_OVERFLOW 355 br.ret.sptk b0 // Return from main path. 356} 357;; 358 359// Here if result overflows 360SCALBL_OVERFLOW: 361{ .mib 362 alloc r32=ar.pfs,3,0,4,0 363 addl GR_Tag = 51, r0 // Set error tag for overflow 364 br.cond.sptk __libm_error_region // Call error support for overflow 365} 366;; 367 368// Here if result underflows 369SCALBL_UNDERFLOW: 370{ .mib 371 alloc r32=ar.pfs,3,0,4,0 372 addl GR_Tag = 52, r0 // Set error tag for underflow 373 br.cond.sptk __libm_error_region // Call error support for underflow 374} 375;; 376 377SCALBL_NAN_INF_ZERO: 378 379// 380// Before entry, N has been converted to a fp integer in significand of 381// FR_N_float_int 382// 383// Convert N_float_int to floating point value 384// 385{ .mfi 386 getf.sig GR_N_as_int = FR_N_float_int 387 fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan 388 nop.i 0 389} 390{ .mfi 391 addl GR_Scratch = 1,r0 392 fcvt.xf FR_N_float_int = FR_N_float_int 393 nop.i 0 394} 395;; 396 397{ .mfi 398 nop.m 0 399 fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan 400 shl GR_Scratch = GR_Scratch,63 401} 402;; 403 404{ .mfi 405 nop.m 0 406 fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf 407 nop.i 0 408} 409{ .mfi 410 nop.m 0 411 fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf 412 nop.i 0 413} 414;; 415 416// 417// Either X or N is a Nan, return result and possible raise invalid. 418// 419{ .mfb 420 nop.m 0 421(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 422(p6) br.ret.spnt b0 423} 424;; 425 426{ .mfb 427 nop.m 0 428(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 429(p7) br.ret.spnt b0 430} 431;; 432 433// 434// If N + Inf do something special 435// For N = -Inf, create Int 436// 437{ .mfb 438 nop.m 0 439(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0 440(p8) br.ret.spnt b0 441} 442{ .mfi 443 nop.m 0 444(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0 445 nop.i 0 446} 447;; 448 449// 450// If N==-Inf,return x/(-N) 451// 452{ .mfb 453 cmp.ne p7,p0 = GR_N_as_int,GR_Scratch 454(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N 455(p9) br.ret.spnt b0 456} 457;; 458 459// 460// Is N an integer. 461// 462{ .mfi 463 nop.m 0 464(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int 465 nop.i 0 466} 467;; 468 469// 470// If N not an int, return NaN and raise invalid. 471// 472{ .mfb 473 nop.m 0 474(p7) frcpa.s0 FR_Result,p0 = f0,f0 475(p7) br.ret.spnt b0 476} 477;; 478 479// 480// Always return x in other path. 481// 482{ .mfb 483 nop.m 0 484 fma.s0 FR_Result = FR_Floating_X,f1,f0 485 br.ret.sptk b0 486} 487;; 488 489// Here if n not int 490// Return NaN and raise invalid. 491SCALBL_N_NOT_INT: 492{ .mfb 493 nop.m 0 494 frcpa.s0 FR_Result,p0 = f0,f0 495 br.ret.sptk b0 496} 497;; 498 499// Here if n=unorm 500SCALBL_N_UNORM: 501{ .mfb 502 getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n 503 fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand 504 br.cond.sptk SCALBL_COMMON1 // Return to main path 505} 506;; 507 508// Here if x=unorm 509SCALBL_X_UNORM: 510{ .mib 511 getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x 512 nop.i 0 513 br.cond.sptk SCALBL_COMMON2 // Return to main path 514} 515;; 516 517GLOBAL_IEEE754_END(scalbl) 518LOCAL_LIBM_ENTRY(__libm_error_region) 519 520// 521// Get stack address of N 522// 523.prologue 524{ .mfi 525 add GR_Parameter_Y=-32,sp 526 nop.f 0 527.save ar.pfs,GR_SAVE_PFS 528 mov GR_SAVE_PFS=ar.pfs 529} 530// 531// Adjust sp 532// 533{ .mfi 534.fframe 64 535 add sp=-64,sp 536 nop.f 0 537 mov GR_SAVE_GP=gp 538};; 539 540// 541// Store N on stack in correct position 542// Locate the address of x on stack 543// 544{ .mmi 545 stfe [GR_Parameter_Y] = FR_Norm_N,16 546 add GR_Parameter_X = 16,sp 547.save b0, GR_SAVE_B0 548 mov GR_SAVE_B0=b0 549};; 550 551// 552// Store x on the stack. 553// Get address for result on stack. 554// 555.body 556{ .mib 557 stfe [GR_Parameter_X] = FR_Norm_X 558 add GR_Parameter_RESULT = 0,GR_Parameter_Y 559 nop.b 0 560} 561{ .mib 562 stfe [GR_Parameter_Y] = FR_Result 563 add GR_Parameter_Y = -16,GR_Parameter_Y 564 br.call.sptk b0=__libm_error_support# 565};; 566 567// 568// Get location of result on stack 569// 570{ .mmi 571 add GR_Parameter_RESULT = 48,sp 572 nop.m 0 573 nop.i 0 574};; 575 576// 577// Get the new result 578// 579{ .mmi 580 ldfe FR_Result = [GR_Parameter_RESULT] 581.restore sp 582 add sp = 64,sp 583 mov b0 = GR_SAVE_B0 584};; 585 586// 587// Restore gp, ar.pfs and return 588// 589{ .mib 590 mov gp = GR_SAVE_GP 591 mov ar.pfs = GR_SAVE_PFS 592 br.ret.sptk b0 593};; 594 595LOCAL_LIBM_END(__libm_error_region) 596 597.type __libm_error_support#,@function 598.global __libm_error_support# 599