1.file "libm_ldexp.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 02/02/00 Initial version 42// 01/26/01 ldexp completely reworked and now standalone version 43// 01/04/02 Added handling for int 32 or 64 bits 44// 05/20/02 Cleaned up namespace and sf0 syntax 45// 02/10/03 Reordered header: .section, .global, .proc, .align 46// 08/04/03 Improved performance 47// 48// API 49//============================================================== 50// double __libm_ldexp (double x, int n, int int_type) 51// input floating point f8 and int n (r33), int int_type (r34) 52// output floating point f8 53// 54// int_type = 0 if int is 32 bits 55// int_type = 1 if int is 64 bits 56// 57// Returns x* 2**n using an fma and detects overflow 58// and underflow. 59// 60// 61// Strategy: 62// Compute biased exponent of result exp_Result = N + exp_X 63// Break into ranges: 64// exp_Result > 0x103fe -> Certain overflow 65// exp_Result = 0x103fe -> Possible overflow 66// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path) 67// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow 68// exp_Result < 0x0fc01 - 52 -> Certain underflow 69 70FR_Big = f6 71FR_NBig = f7 72FR_Floating_X = f8 73FR_Result = f8 74FR_Result2 = f9 75FR_Result3 = f10 76FR_Norm_X = f11 77FR_Two_N = f12 78 79GR_neg_ov_limit= r14 80GR_N_Biased = r15 81GR_Big = r16 82GR_NBig = r17 83GR_exp_Result = r18 84GR_pos_ov_limit= r19 85GR_Bias = r20 86GR_N_as_int = r21 87GR_signexp_X = r22 88GR_exp_X = r23 89GR_exp_mask = r24 90GR_max_exp = r25 91GR_min_exp = r26 92GR_min_den_exp = r27 93 94GR_SAVE_B0 = r32 95GR_SAVE_GP = r33 96GR_SAVE_PFS = r34 97GR_Parameter_X = r35 98GR_Parameter_Y = r36 99GR_Parameter_RESULT = r37 100GR_Tag = r38 101 102.section .text 103GLOBAL_LIBM_ENTRY(__libm_ldexp) 104 105// 106// Is x NAN, INF, ZERO, +-? 107// Build the exponent Bias 108// 109{ .mfi 110 getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x 111 fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero 112 mov GR_Bias = 0x0ffff 113} 114// 115// Normalize x 116// Is integer type 32 bits? 117// 118{ .mfi 119 mov GR_Big = 35000 // If N this big then certain overflow 120 fnorm.s1 FR_Norm_X = FR_Floating_X 121 cmp.eq p8,p9 = r34,r0 122} 123;; 124 125// Sign extend N if int is 32 bits 126{ .mfi 127(p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits 128 fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm 129(p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits 130} 131{ .mfi 132 mov GR_NBig = -35000 // If N this small then certain underflow 133 nop.f 0 134 mov GR_max_exp = 0x103fe // Exponent of maximum double 135} 136;; 137 138// Create biased exponent for 2**N 139{ .mfi 140 add GR_N_Biased = GR_Bias,GR_N_as_int 141 nop.f 0 142 cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? 143} 144{ .mib 145 cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? 146 mov GR_min_exp = 0x0fc01 // Exponent of minimum double 147(p9) br.cond.spnt LDEXP_UNORM // Branch if x=unorm 148} 149;; 150 151LDEXP_COMMON: 152// Main path continues. Also return here from x=unorm path. 153// Create 2**N 154.pred.rel "mutex",p7,p8 155{ .mfi 156 setf.exp FR_Two_N = GR_N_Biased 157 nop.f 0 158(p7) mov GR_N_as_int = GR_Big // Limit max N 159} 160{ .mfi 161(p8) mov GR_N_as_int = GR_NBig // Limit min N 162 nop.f 0 163(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big 164} 165;; 166 167// 168// Create biased exponent for 2**N for N big 169// Is N zero? 170// 171{ .mfi 172(p7) add GR_N_Biased = GR_Bias,GR_N_as_int 173 nop.f 0 174 cmp.eq.or p6,p0 = r33,r0 175} 176{ .mfi 177 mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow 178 nop.f 0 179 mov GR_exp_mask = 0x1ffff // Exponent mask 180} 181;; 182 183// 184// Create 2**N for N big 185// Return x when N = 0 or X = Nan, Inf, Zero 186// 187{ .mfi 188(p7) setf.exp FR_Two_N = GR_N_Biased 189 nop.f 0 190 mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble 191} 192{ .mfb 193 and GR_exp_X = GR_exp_mask, GR_signexp_X 194(p6) fma.d.s0 FR_Result = FR_Floating_X, f1, f0 195(p6) br.ret.spnt b0 196} 197;; 198 199// 200// Raise Denormal operand flag with compare 201// Compute biased result exponent 202// 203{ .mfi 204 add GR_exp_Result = GR_exp_X, GR_N_as_int 205 fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 206 mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow 207} 208;; 209 210// 211// Do final operation 212// 213{ .mfi 214 cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow 215 fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 216 cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow 217} 218{ .mfb 219 nop.m 0 220 nop.f 0 221(p9) br.cond.spnt LDEXP_UNDERFLOW // Branch if certain underflow 222} 223;; 224 225{ .mib 226(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow 227(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow 228(p7) br.ret.sptk b0 // Return from main path 229} 230;; 231 232{ .bbb 233(p6) br.cond.spnt LDEXP_OVERFLOW // Branch if certain overflow 234(p8) br.cond.spnt LDEXP_POSSIBLE_OVERFLOW // Branch if possible overflow 235(p9) br.cond.spnt LDEXP_POSSIBLE_UNDERFLOW // Branch if possible underflow 236} 237;; 238 239// Here if possible underflow. 240// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01 241LDEXP_POSSIBLE_UNDERFLOW: 242// 243// Here if possible overflow. 244// Resulting exponent: 0x103fe = exp_Result 245LDEXP_POSSIBLE_OVERFLOW: 246 247// Set up necessary status fields 248// 249// S0 user supplied status 250// S2 user supplied status + WRE + TD (Overflows) 251// S3 user supplied status + FZ + TD (Underflows) 252// 253{ .mfi 254 nop.m 0 255 fsetc.s3 0x7F,0x41 256 nop.i 0 257} 258{ .mfi 259 nop.m 0 260 fsetc.s2 0x7F,0x42 261 nop.i 0 262} 263;; 264 265// 266// Do final operation with s2 and s3 267// 268{ .mfi 269 setf.exp FR_NBig = GR_neg_ov_limit 270 fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 271 nop.i 0 272} 273{ .mfi 274 setf.exp FR_Big = GR_pos_ov_limit 275 fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 276 nop.i 0 277} 278;; 279 280// Check for overflow or underflow. 281// Restore s3 282// Restore s2 283// 284{ .mfi 285 nop.m 0 286 fsetc.s3 0x7F,0x40 287 nop.i 0 288} 289{ .mfi 290 nop.m 0 291 fsetc.s2 0x7F,0x40 292 nop.i 0 293} 294;; 295 296// 297// Is the result zero? 298// 299{ .mfi 300 nop.m 0 301 fclass.m p6, p0 = FR_Result3, 0x007 302 nop.i 0 303} 304{ .mfi 305 nop.m 0 306 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big 307 nop.i 0 308} 309;; 310 311// 312// Detect masked underflow - Tiny + Inexact Only 313// 314{ .mfi 315 nop.m 0 316(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 317 nop.i 0 318} 319;; 320 321// 322// Is result bigger the allowed range? 323// Branch out for underflow 324// 325{ .mfb 326 nop.m 0 327(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig 328(p6) br.cond.spnt LDEXP_UNDERFLOW 329} 330;; 331 332// 333// Branch out for overflow 334// 335{ .bbb 336(p7) br.cond.spnt LDEXP_OVERFLOW 337(p9) br.cond.spnt LDEXP_OVERFLOW 338 br.ret.sptk b0 // Return from main path. 339} 340;; 341 342// Here if result overflows 343LDEXP_OVERFLOW: 344{ .mib 345 alloc r32=ar.pfs,3,0,4,0 346 addl GR_Tag = 146, r0 // Set error tag for overflow 347 br.cond.sptk __libm_error_region // Call error support for overflow 348} 349;; 350 351// Here if result underflows 352LDEXP_UNDERFLOW: 353{ .mib 354 alloc r32=ar.pfs,3,0,4,0 355 addl GR_Tag = 147, r0 // Set error tag for underflow 356 br.cond.sptk __libm_error_region // Call error support for underflow 357} 358;; 359 360// Here if x=unorm 361LDEXP_UNORM: 362{ .mib 363 getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x 364 nop.i 0 365 br.cond.sptk LDEXP_COMMON // Return to main path 366} 367;; 368 369 370GLOBAL_LIBM_END(__libm_ldexp) 371LOCAL_LIBM_ENTRY(__libm_error_region) 372 373// 374// Get stack address of N 375// 376.prologue 377{ .mfi 378 add GR_Parameter_Y=-32,sp 379 nop.f 0 380.save ar.pfs,GR_SAVE_PFS 381 mov GR_SAVE_PFS=ar.pfs 382} 383// 384// Adjust sp 385// 386{ .mfi 387.fframe 64 388 add sp=-64,sp 389 nop.f 0 390 mov GR_SAVE_GP=gp 391};; 392 393// 394// Store N on stack in correct position 395// Locate the address of x on stack 396// 397{ .mmi 398 st8 [GR_Parameter_Y] = GR_N_as_int,16 399 add GR_Parameter_X = 16,sp 400.save b0, GR_SAVE_B0 401 mov GR_SAVE_B0=b0 402};; 403 404// 405// Store x on the stack. 406// Get address for result on stack. 407// 408.body 409{ .mib 410 stfd [GR_Parameter_X] = FR_Norm_X 411 add GR_Parameter_RESULT = 0,GR_Parameter_Y 412 nop.b 0 413} 414{ .mib 415 stfd [GR_Parameter_Y] = FR_Result 416 add GR_Parameter_Y = -16,GR_Parameter_Y 417 br.call.sptk b0=__libm_error_support# 418};; 419 420// 421// Get location of result on stack 422// 423{ .mmi 424 add GR_Parameter_RESULT = 48,sp 425 nop.m 0 426 nop.i 0 427};; 428 429// 430// Get the new result 431// 432{ .mmi 433 ldfd FR_Result = [GR_Parameter_RESULT] 434.restore sp 435 add sp = 64,sp 436 mov b0 = GR_SAVE_B0 437};; 438 439// 440// Restore gp, ar.pfs and return 441// 442{ .mib 443 mov gp = GR_SAVE_GP 444 mov ar.pfs = GR_SAVE_PFS 445 br.ret.sptk b0 446};; 447 448LOCAL_LIBM_END(__libm_error_region) 449 450.type __libm_error_support#,@function 451.global __libm_error_support# 452