1.file "nexttoward.s" 2 3 4// Copyright (c) 2001 - 2004, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 08/15/01 Initial version 42// 08/23/01 Corrected error tag number 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 02/10/03 Reordered header: .section, .global, .proc, .align 45// 12/14/04 Added error handling on underflow. 46// 47// API 48//============================================================== 49// double nexttoward( double x, long double y ); 50// input floating point f8, f9 51// output floating point f8 52// 53// Registers used 54//============================================================== 55GR_max_pexp = r14 56GR_min_pexp = r15 57GR_exp = r16 58GR_sig = r17 59GR_lnorm_sig = r18 60GR_sign_mask = r19 61GR_exp_mask = r20 62GR_sden_sig = r21 63GR_new_sig = r22 64GR_new_exp = r23 65GR_lden_sig = r24 66GR_snorm_sig = r25 67GR_exp1 = r26 68GR_x_exp = r27 69GR_min_den_rexp = r28 70// r36-39 parameters for libm_error_support 71 72GR_SAVE_B0 = r34 73GR_SAVE_GP = r35 74GR_SAVE_PFS = r32 75 76GR_Parameter_X = r36 77GR_Parameter_Y = r37 78GR_Parameter_RESULT = r38 79GR_Parameter_TAG = r39 80 81FR_lnorm_sig = f10 82FR_lnorm_exp = f11 83FR_lnorm = f12 84FR_sden_sig = f13 85FR_sden_exp = f14 86FR_sden = f15 87FR_save_f8 = f33 88FR_new_exp = f34 89FR_new_sig = f35 90FR_lden_sig = f36 91FR_snorm_sig = f37 92FR_exp1 = f38 93FR_tmp = f39 94 95// 96// Overview of operation 97//============================================================== 98// nexttoward determines the next representable value 99// after x in the direction of y. 100 101 102.section .text 103GLOBAL_LIBM_ENTRY(nexttoward) 104 105// Extract signexp from x 106// Is x < y ? p10 if yes, p11 if no 107// Form smallest denormal significand = ulp size 108{ .mfi 109 getf.exp GR_exp = f8 110 fcmp.lt.s1 p10,p11 = f8, f9 111 addl GR_sden_sig = 0x800, r0 112} 113// Form largest normal significand 0xfffffffffffff800 114// Form smallest normal exponent 115{ .mfi 116 addl GR_lnorm_sig = -0x800,r0 117 nop.f 999 118 addl GR_min_pexp = 0x0fc01, r0 ;; 119} 120// Extract significand from x 121// Is x=y? 122// Form largest normal exponent 123{ .mfi 124 getf.sig GR_sig = f8 125 fcmp.eq.s0 p6,p0 = f8, f9 126 addl GR_max_pexp = 0x103fe, r0 127} 128// Move largest normal significand to fp reg for special cases 129{ .mfi 130 setf.sig FR_lnorm_sig = GR_lnorm_sig 131 nop.f 999 132 addl GR_sign_mask = 0x20000, r0 ;; 133} 134 135// Move smallest denormal significand and signexp to fp regs 136// Is x=nan? 137// Set p12 and p13 based on whether significand increases or decreases 138// It increases (p12 set) if x<y and x>=0 or if x>y and x<0 139// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 140{ .mfi 141 setf.sig FR_sden_sig = GR_sden_sig 142 fclass.m p8,p0 = f8, 0xc3 143(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask 144} 145{ .mfi 146 setf.exp FR_sden_exp = GR_min_pexp 147(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; 148} 149 150.pred.rel "mutex",p12,p13 151 152// Form expected new significand, adding or subtracting 1 ulp increment 153// If x=y set result to y 154// Form smallest normal significand and largest denormal significand 155{ .mfi 156(p12) add GR_new_sig = GR_sig, GR_sden_sig 157(p6) fnorm.d.s0 f8=f9 //Normalise 158 dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 159} 160{ .mlx 161(p13) sub GR_new_sig = GR_sig, GR_sden_sig 162 movl GR_lden_sig = 0x7ffffffffffff800 ;; 163} 164 165// Move expected result significand and signexp to fp regs 166// Is y=nan? 167// Form new exponent in case result exponent needs incrementing or decrementing 168{ .mfi 169 setf.exp FR_new_exp = GR_exp 170 fclass.m p9,p0 = f9, 0xc3 171(p12) add GR_exp1 = 1, GR_exp 172} 173{ .mib 174 setf.sig FR_new_sig = GR_new_sig 175(p13) add GR_exp1 = -1, GR_exp 176(p6) br.ret.spnt b0 ;; // Exit if x=y 177} 178 179// Move largest normal signexp to fp reg for special cases 180// Is x=zero? 181{ .mfi 182 setf.exp FR_lnorm_exp = GR_max_pexp 183 fclass.m p7,p0 = f8, 0x7 184 nop.i 999 185} 186{ .mfb 187 nop.m 999 188(p8) fma.s0 f8 = f8,f1,f9 189(p8) br.ret.spnt b0 ;; // Exit if x=nan 190} 191 192// Move exp+-1 and smallest normal significand to fp regs for special cases 193// Is x=inf? 194{ .mfi 195 setf.exp FR_exp1 = GR_exp1 196 fclass.m p6,p0 = f8, 0x23 197 addl GR_exp_mask = 0x1ffff, r0 198} 199{ .mfb 200 setf.sig FR_snorm_sig = GR_snorm_sig 201(p9) fma.s0 f8 = f8,f1,f9 202(p9) br.ret.spnt b0 ;; // Exit if y=nan 203} 204 205// Move largest denormal significand to fp regs for special cases 206// Save x 207{ .mfb 208 setf.sig FR_lden_sig = GR_lden_sig 209 mov FR_save_f8 = f8 210(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 211} 212 213// Mask off the sign to get x_exp 214{ .mfb 215 and GR_x_exp = GR_exp_mask, GR_exp 216 nop.f 999 217(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf 218} 219 220// Check 6 special cases when significand rolls over: 221// 1 sig size incr, x_sig=max_sig, x_exp < max_exp 222// Set p6, result is sig=min_sig, exp++ 223// 2 sig size incr, x_sig=max_sig, x_exp >= max_exp 224// Set p7, result is inf, signal overflow 225// 3 sig size decr, x_sig=min_sig, x_exp > min_exp 226// Set p8, result is sig=max_sig, exp-- 227// 4 sig size decr, x_sig=min_sig, x_exp = min_exp 228// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact 229// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp 230// Set p10, result is zero, sign of x, signal underflow and inexact 231// 6 sig size decr, x_sig=min_sig, x_exp < min_exp 232// Set p14, result is zero, sign of x, signal underflow and inexact 233// 234// Form exponent of smallest double denormal (if normalized register format) 235{ .mmi 236 adds GR_min_den_rexp = -52, GR_min_pexp 237(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 238(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; 239} 240 241{ .mmi 242(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp 243(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp 244(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; 245} 246 247// Create small normal in case need to generate underflow flag 248{ .mfi 249(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp 250 fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig 251(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp 252} 253// Branch if cases 1, 2, 3 254{ .bbb 255(p6) br.cond.spnt NEXT_EXPUP 256(p7) br.cond.spnt NEXT_OVERFLOW 257(p8) br.cond.spnt NEXT_EXPDOWN ;; 258} 259 260// Branch if cases 4, 5, 6 261{ .bbb 262(p9) br.cond.spnt NEXT_NORM_TO_DENORM 263(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO 264(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; 265} 266 267// Here if no special cases 268// Set p6 if result will be a denormal, so can force underflow flag 269// Case 1: x_exp=min_exp, x_sig=unnormalized 270// Case 2: x_exp<min_exp 271{ .mfi 272 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 273 fmerge.se f8 = FR_new_exp, FR_new_sig 274 nop.i 999 ;; 275} 276 277{ .mfi 278 nop.m 999 279 nop.f 999 280(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; 281} 282 283NEXT_COMMON_FINISH: 284// Force underflow and inexact if denormal result 285{ .mfi 286 nop.m 999 287(p6) fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 288 nop.i 999 289} 290{ .mfb 291 nop.m 999 292 fnorm.d.s0 f8 = f8 // Final normalization to result precision 293(p6) br.cond.spnt NEXT_UNDERFLOW ;; 294} 295 296{ .mfb 297 nop.m 999 298 nop.f 999 299 br.ret.sptk b0;; 300} 301 302//Special cases 303NEXT_EXPUP: 304{ .mfb 305 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 306 fmerge.se f8 = FR_exp1, FR_snorm_sig 307 br.cond.sptk NEXT_COMMON_FINISH ;; 308} 309 310NEXT_EXPDOWN: 311{ .mfb 312 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 313 fmerge.se f8 = FR_exp1, FR_lnorm_sig 314 br.cond.sptk NEXT_COMMON_FINISH ;; 315} 316 317NEXT_NORM_TO_DENORM: 318{ .mfi 319 nop.m 999 320 fmerge.se f8 = FR_new_exp, FR_lden_sig 321 nop.i 999 322} 323// Force underflow and inexact if denormal result 324{ .mfb 325 nop.m 999 326 fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 327 br.cond.sptk NEXT_UNDERFLOW ;; 328} 329 330NEXT_UNDERFLOW_TO_ZERO: 331{ .mfb 332 cmp.eq p6,p0 = r0,r0 333 fmerge.s f8 = FR_save_f8,f0 334 br.cond.sptk NEXT_COMMON_FINISH ;; 335} 336 337NEXT_INF: 338// Here if f8 is +- infinity 339// INF 340// if f8 is +inf, no matter what y is return largest double 341// if f8 is -inf, no matter what y is return -largest double 342 343{ .mfi 344 nop.m 999 345 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 346 nop.i 999 ;; 347} 348 349{ .mfb 350 nop.m 999 351 fmerge.s f8 = f8,FR_lnorm 352 br.ret.sptk b0 ;; 353} 354 355NEXT_ZERO: 356 357// Here if f8 is +- zero 358// ZERO 359// if f8 is zero and y is +, return + smallest double denormal 360// if f8 is zero and y is -, return - smallest double denormal 361 362{ .mfi 363 nop.m 999 364 fmerge.se FR_sden = FR_sden_exp,FR_sden_sig 365 nop.i 999 ;; 366} 367 368// Create small normal to generate underflow flag 369{ .mfi 370 nop.m 999 371 fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig 372 nop.i 999 ;; 373} 374 375// Add correct sign from direction arg 376{ .mfi 377 nop.m 999 378 fmerge.s f8 = f9,FR_sden 379 nop.i 999 ;; 380} 381 382// Force underflow and inexact flags 383{ .mfb 384 nop.m 999 385 fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 386 br.cond.sptk NEXT_UNDERFLOW ;; 387} 388 389NEXT_UNDERFLOW: 390// Here if result is a denorm, or input is finite and result is zero 391// Call error support to report possible range error 392{ .mib 393 alloc r32=ar.pfs,2,2,4,0 394 mov GR_Parameter_TAG = 271 // Error code 395 br.cond.sptk __libm_error_region // Branch to error call 396} 397;; 398 399NEXT_OVERFLOW: 400// Here if input is finite, but result will be infinite 401// Use frcpa to generate infinity of correct sign 402// Call error support to report possible range error 403{ .mfi 404 alloc r32=ar.pfs,2,2,4,0 405 frcpa.s1 f8,p6 = FR_save_f8, f0 406 nop.i 999 ;; 407} 408 409// Create largest double 410{ .mfi 411 nop.m 999 412 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 413 nop.i 999 ;; 414} 415 416// Force overflow and inexact flags to be set 417{ .mfb 418 mov GR_Parameter_TAG = 199 // Error code 419 fma.d.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 420 br.cond.sptk __libm_error_region // Branch to error call 421} 422;; 423 424GLOBAL_LIBM_END(nexttoward) 425 426 427LOCAL_LIBM_ENTRY(__libm_error_region) 428.prologue 429 430// (1) 431{ .mfi 432 add GR_Parameter_Y=-32,sp // Parameter 2 value 433 nop.f 0 434.save ar.pfs,GR_SAVE_PFS 435 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 436} 437{ .mfi 438.fframe 64 439 add sp=-64,sp // Create new stack 440 nop.f 0 441 mov GR_SAVE_GP=gp // Save gp 442};; 443 444 445// (2) 446{ .mmi 447 stfd [GR_Parameter_Y] = f9,16 // STORE Parameter 2 on stack 448 add GR_Parameter_X = 16,sp // Parameter 1 address 449.save b0, GR_SAVE_B0 450 mov GR_SAVE_B0=b0 // Save b0 451};; 452 453.body 454// (3) 455{ .mib 456 stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack 457 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 458 nop.b 0 459} 460{ .mib 461 stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack 462 add GR_Parameter_Y = -16,GR_Parameter_Y 463 br.call.sptk b0=__libm_error_support# // Call error handling function 464};; 465{ .mmi 466 nop.m 0 467 nop.m 0 468 add GR_Parameter_RESULT = 48,sp 469};; 470 471// (4) 472{ .mmi 473 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack 474.restore sp 475 add sp = 64,sp // Restore stack pointer 476 mov b0 = GR_SAVE_B0 // Restore return address 477};; 478{ .mib 479 mov gp = GR_SAVE_GP // Restore gp 480 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 481 br.ret.sptk b0 // Return 482};; 483 484LOCAL_LIBM_END(__libm_error_region) 485 486 487.type __libm_error_support#,@function 488.global __libm_error_support# 489