1.file "libm_scalblnf.s" 2 3 4// Copyright (c) 2001 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 08/03/01 Initial version 42// 08/23/01 Corrected error tag number 43// 02/06/02 Corrected to handle 32- or 64-bit integers 44// 05/20/02 Cleaned up namespace and sf0 syntax 45// 02/10/03 Reordered header: .section, .global, .proc, .align 46// 08/25/03 Improved performance 47// 48// API 49//============================================================== 50// float __libm_scalblnf (float x, long int n, int long_int_type) 51// input floating point f8 and long int n (r33) 52// input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits 53// output floating point f8 54// 55// Returns x* 2**n using an fma and detects overflow 56// and underflow. 57// 58// 59// Strategy: 60// Compute biased exponent of result exp_Result = N + exp_X 61// Break into ranges: 62// exp_Result > 0x1007e -> Certain overflow 63// exp_Result = 0x1007e -> Possible overflow 64// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path) 65// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow 66// exp_Result < 0x0ff81 - 23 -> Certain underflow 67 68FR_Big = f6 69FR_NBig = f7 70FR_Floating_X = f8 71FR_Result = f8 72FR_Result2 = f9 73FR_Result3 = f10 74FR_Norm_X = f11 75FR_Two_N = f12 76 77GR_neg_ov_limit= r14 78GR_N_Biased = r15 79GR_Big = r16 80GR_NBig = r17 81GR_exp_Result = r18 82GR_pos_ov_limit= r19 83GR_Bias = r20 84GR_N_as_int = r21 85GR_signexp_X = r22 86GR_exp_X = r23 87GR_exp_mask = r24 88GR_max_exp = r25 89GR_min_exp = r26 90GR_min_den_exp = r27 91 92GR_SAVE_B0 = r32 93GR_SAVE_GP = r33 94GR_SAVE_PFS = r34 95GR_Parameter_X = r35 96GR_Parameter_Y = r36 97GR_Parameter_RESULT = r37 98GR_Tag = r38 99 100.section .text 101GLOBAL_LIBM_ENTRY(__libm_scalblnf) 102 103// 104// Is x NAN, INF, ZERO, +-? 105// Build the exponent Bias 106// 107{ .mfi 108 getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x 109 fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero 110 mov GR_Bias = 0x0ffff 111} 112// 113// Normalize x 114// Is long integer type 32 bits? 115// 116{ .mfi 117 mov GR_Big = 35000 // If N this big then certain overflow 118 fnorm.s1 FR_Norm_X = FR_Floating_X 119 cmp.eq p8,p9 = r34,r0 120} 121;; 122 123// Sign extend N if long int is 32 bits 124{ .mfi 125(p9) mov GR_N_as_int = r33 // Copy N if long int is 64 bits 126 fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm 127(p8) sxt4 GR_N_as_int = r33 // Sign extend N if long int is 32 bits 128} 129{ .mfi 130 mov GR_NBig = -35000 // If N this small then certain underflow 131 nop.f 0 132 mov GR_max_exp = 0x1007e // Exponent of maximum float 133} 134;; 135 136// Create biased exponent for 2**N 137{ .mfi 138 add GR_N_Biased = GR_Bias,GR_N_as_int 139 nop.f 0 140 cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? 141} 142{ .mib 143 cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? 144 mov GR_min_exp = 0x0ff81 // Exponent of minimum float 145(p9) br.cond.spnt SCALBNF_UNORM // Branch if x=unorm 146} 147;; 148 149SCALBNF_COMMON: 150// Main path continues. Also return here from x=unorm path. 151// Create 2**N 152.pred.rel "mutex",p7,p8 153{ .mfi 154 setf.exp FR_Two_N = GR_N_Biased 155 nop.f 0 156(p7) mov GR_N_as_int = GR_Big // Limit max N 157} 158{ .mfi 159(p8) mov GR_N_as_int = GR_NBig // Limit min N 160 nop.f 0 161(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big 162} 163;; 164 165// 166// Create biased exponent for 2**N for N big 167// Is N zero? 168// 169{ .mfi 170(p7) add GR_N_Biased = GR_Bias,GR_N_as_int 171 nop.f 0 172 cmp.eq.or p6,p0 = r33,r0 173} 174{ .mfi 175 mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow 176 nop.f 0 177 mov GR_exp_mask = 0x1ffff // Exponent mask 178} 179;; 180 181// 182// Create 2**N for N big 183// Return x when N = 0 or X = Nan, Inf, Zero 184// 185{ .mfi 186(p7) setf.exp FR_Two_N = GR_N_Biased 187 nop.f 0 188 mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float 189} 190{ .mfb 191 and GR_exp_X = GR_exp_mask, GR_signexp_X 192(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0 193(p6) br.ret.spnt b0 194} 195;; 196 197// 198// Raise Denormal operand flag with compare 199// Compute biased result exponent 200// 201{ .mfi 202 add GR_exp_Result = GR_exp_X, GR_N_as_int 203 fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 204 mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow 205} 206;; 207 208// 209// Do final operation 210// 211{ .mfi 212 cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow 213 fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 214 cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow 215} 216{ .mfb 217 nop.m 0 218 nop.f 0 219(p9) br.cond.spnt SCALBNF_UNDERFLOW // Branch if certain underflow 220} 221;; 222 223{ .mib 224(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow 225(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow 226(p7) br.ret.sptk b0 // Return from main path 227} 228;; 229 230{ .bbb 231(p6) br.cond.spnt SCALBNF_OVERFLOW // Branch if certain overflow 232(p8) br.cond.spnt SCALBNF_POSSIBLE_OVERFLOW // Branch if possible overflow 233(p9) br.cond.spnt SCALBNF_POSSIBLE_UNDERFLOW // Branch if possible underflow 234} 235;; 236 237// Here if possible underflow. 238// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81 239SCALBNF_POSSIBLE_UNDERFLOW: 240// 241// Here if possible overflow. 242// Resulting exponent: 0x1007e = exp_Result 243SCALBNF_POSSIBLE_OVERFLOW: 244 245// Set up necessary status fields 246// 247// S0 user supplied status 248// S2 user supplied status + WRE + TD (Overflows) 249// S3 user supplied status + FZ + TD (Underflows) 250// 251{ .mfi 252 nop.m 0 253 fsetc.s3 0x7F,0x41 254 nop.i 0 255} 256{ .mfi 257 nop.m 0 258 fsetc.s2 0x7F,0x42 259 nop.i 0 260} 261;; 262 263// 264// Do final operation with s2 and s3 265// 266{ .mfi 267 setf.exp FR_NBig = GR_neg_ov_limit 268 fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 269 nop.i 0 270} 271{ .mfi 272 setf.exp FR_Big = GR_pos_ov_limit 273 fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 274 nop.i 0 275} 276;; 277 278// Check for overflow or underflow. 279// Restore s3 280// Restore s2 281// 282{ .mfi 283 nop.m 0 284 fsetc.s3 0x7F,0x40 285 nop.i 0 286} 287{ .mfi 288 nop.m 0 289 fsetc.s2 0x7F,0x40 290 nop.i 0 291} 292;; 293 294// 295// Is the result zero? 296// 297{ .mfi 298 nop.m 0 299 fclass.m p6, p0 = FR_Result3, 0x007 300 nop.i 0 301} 302{ .mfi 303 nop.m 0 304 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big 305 nop.i 0 306} 307;; 308 309// 310// Detect masked underflow - Tiny + Inexact Only 311// 312{ .mfi 313 nop.m 0 314(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 315 nop.i 0 316} 317;; 318 319// 320// Is result bigger the allowed range? 321// Branch out for underflow 322// 323{ .mfb 324 nop.m 0 325(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig 326(p6) br.cond.spnt SCALBNF_UNDERFLOW 327} 328;; 329 330// 331// Branch out for overflow 332// 333{ .bbb 334(p7) br.cond.spnt SCALBNF_OVERFLOW 335(p9) br.cond.spnt SCALBNF_OVERFLOW 336 br.ret.sptk b0 // Return from main path. 337} 338;; 339 340// Here if result overflows 341SCALBNF_OVERFLOW: 342{ .mib 343 alloc r32=ar.pfs,3,0,4,0 344 addl GR_Tag = 205, r0 // Set error tag for overflow 345 br.cond.sptk __libm_error_region // Call error support for overflow 346} 347;; 348 349// Here if result underflows 350SCALBNF_UNDERFLOW: 351{ .mib 352 alloc r32=ar.pfs,3,0,4,0 353 addl GR_Tag = 206, r0 // Set error tag for underflow 354 br.cond.sptk __libm_error_region // Call error support for underflow 355} 356;; 357 358// Here if x=unorm 359SCALBNF_UNORM: 360{ .mib 361 getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x 362 nop.i 0 363 br.cond.sptk SCALBNF_COMMON // Return to main path 364} 365;; 366 367 368GLOBAL_LIBM_END(__libm_scalblnf) 369LOCAL_LIBM_ENTRY(__libm_error_region) 370 371// 372// Get stack address of N 373// 374.prologue 375{ .mfi 376 add GR_Parameter_Y=-32,sp 377 nop.f 0 378.save ar.pfs,GR_SAVE_PFS 379 mov GR_SAVE_PFS=ar.pfs 380} 381// 382// Adjust sp 383// 384{ .mfi 385.fframe 64 386 add sp=-64,sp 387 nop.f 0 388 mov GR_SAVE_GP=gp 389};; 390 391// 392// Store N on stack in correct position 393// Locate the address of x on stack 394// 395{ .mmi 396 st8 [GR_Parameter_Y] = GR_N_as_int,16 397 add GR_Parameter_X = 16,sp 398.save b0, GR_SAVE_B0 399 mov GR_SAVE_B0=b0 400};; 401 402// 403// Store x on the stack. 404// Get address for result on stack. 405// 406.body 407{ .mib 408 stfs [GR_Parameter_X] = FR_Norm_X 409 add GR_Parameter_RESULT = 0,GR_Parameter_Y 410 nop.b 0 411} 412{ .mib 413 stfs [GR_Parameter_Y] = FR_Result 414 add GR_Parameter_Y = -16,GR_Parameter_Y 415 br.call.sptk b0=__libm_error_support# 416};; 417 418// 419// Get location of result on stack 420// 421{ .mmi 422 add GR_Parameter_RESULT = 48,sp 423 nop.m 0 424 nop.i 0 425};; 426 427// 428// Get the new result 429// 430{ .mmi 431 ldfs FR_Result = [GR_Parameter_RESULT] 432.restore sp 433 add sp = 64,sp 434 mov b0 = GR_SAVE_B0 435};; 436 437// 438// Restore gp, ar.pfs and return 439// 440{ .mib 441 mov gp = GR_SAVE_GP 442 mov ar.pfs = GR_SAVE_PFS 443 br.ret.sptk b0 444};; 445 446LOCAL_LIBM_END(__libm_error_region) 447 448.type __libm_error_support#,@function 449.global __libm_error_support# 450