1.file "nexttowardf.s" 2 3 4// Copyright (c) 2001 - 2004, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 08/15/01 Initial version 42// 08/23/01 Corrected error tag number 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 02/10/03 Reordered header: .section, .global, .proc, .align 45// 12/14/04 Added error handling on underflow. 46// 47// API 48//============================================================== 49// float nexttowardf( float x, long double y ); 50// input floating point f8, f9 51// output floating point f8 52// 53// Registers used 54//============================================================== 55GR_max_pexp = r14 56GR_min_pexp = r15 57GR_exp = r16 58GR_sig = r17 59GR_lnorm_sig = r18 60GR_sign_mask = r19 61GR_exp_mask = r20 62GR_sden_sig = r21 63GR_new_sig = r22 64GR_new_exp = r23 65GR_lden_sig = r24 66GR_snorm_sig = r25 67GR_exp1 = r26 68GR_x_exp = r27 69GR_min_den_rexp = r28 70// r36-39 parameters for libm_error_support 71 72GR_SAVE_B0 = r34 73GR_SAVE_GP = r35 74GR_SAVE_PFS = r32 75 76GR_Parameter_X = r36 77GR_Parameter_Y = r37 78GR_Parameter_RESULT = r38 79GR_Parameter_TAG = r39 80 81FR_lnorm_sig = f10 82FR_lnorm_exp = f11 83FR_lnorm = f12 84FR_sden_sig = f13 85FR_sden_exp = f14 86FR_sden = f15 87FR_save_f8 = f33 88FR_new_exp = f34 89FR_new_sig = f35 90FR_lden_sig = f36 91FR_snorm_sig = f37 92FR_exp1 = f38 93FR_tmp = f39 94 95// 96// Overview of operation 97//============================================================== 98// nexttowardf determines the next representable value 99// after x in the direction of y. 100 101 102.section .text 103GLOBAL_LIBM_ENTRY(nexttowardf) 104 105// Extract signexp from x 106// Form smallest denormal significand = ulp size 107{ .mlx 108 getf.exp GR_exp = f8 109 movl GR_sden_sig = 0x0000010000000000 110} 111// Form largest normal exponent 112// Is x < y ? p10 if yes, p11 if no 113// Form smallest normal exponent 114{ .mfi 115 addl GR_max_pexp = 0x1007e, r0 116 fcmp.lt.s1 p10,p11 = f8, f9 117 addl GR_min_pexp = 0x0ff81, r0 ;; 118} 119 120// Is x=y? 121{ .mfi 122 getf.sig GR_sig = f8 123 fcmp.eq.s0 p6,p0 = f8, f9 124 nop.i 0 125} 126// Extract significand from x 127// Form largest normal significand 128{ .mlx 129 nop.m 0 130 movl GR_lnorm_sig = 0xffffff0000000000 ;; 131} 132 133// Move largest normal significand to fp reg for special cases 134{ .mfi 135 setf.sig FR_lnorm_sig = GR_lnorm_sig 136 nop.f 0 137 addl GR_sign_mask = 0x20000, r0 ;; 138} 139 140// Move smallest denormal significand and signexp to fp regs 141// Is x=nan? 142// Set p12 and p13 based on whether significand increases or decreases 143// It increases (p12 set) if x<y and x>=0 or if x>y and x<0 144// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 145{ .mfi 146 setf.sig FR_sden_sig = GR_sden_sig 147 fclass.m p8,p0 = f8, 0xc3 148(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask 149} 150{ .mfi 151 setf.exp FR_sden_exp = GR_min_pexp 152 nop.f 999 153(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; 154} 155 156.pred.rel "mutex",p12,p13 157 158// Form expected new significand, adding or subtracting 1 ulp increment 159// If x=y set result to y 160// Form smallest normal significand and largest denormal significand 161{ .mfi 162(p12) add GR_new_sig = GR_sig, GR_sden_sig 163(p6) fnorm.s.s0 f8=f9 //Normalise 164 dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 165} 166{ .mlx 167(p13) sub GR_new_sig = GR_sig, GR_sden_sig 168 movl GR_lden_sig = 0x7fffff0000000000 ;; 169} 170 171// Move expected result significand and signexp to fp regs 172// Is y=nan? 173// Form new exponent in case result exponent needs incrementing or decrementing 174{ .mfi 175 setf.exp FR_new_exp = GR_exp 176 fclass.m p9,p0 = f9, 0xc3 177(p12) add GR_exp1 = 1, GR_exp 178} 179{ .mib 180 setf.sig FR_new_sig = GR_new_sig 181(p13) add GR_exp1 = -1, GR_exp 182(p6) br.ret.spnt b0 ;; // Exit if x=y 183} 184 185// Move largest normal signexp to fp reg for special cases 186// Is x=zero? 187{ .mfi 188 setf.exp FR_lnorm_exp = GR_max_pexp 189 fclass.m p7,p0 = f8, 0x7 190 nop.i 999 191} 192{ .mfb 193 nop.m 999 194(p8) fma.s0 f8 = f8,f1,f9 195(p8) br.ret.spnt b0 ;; // Exit if x=nan 196} 197 198// Move exp+-1 and smallest normal significand to fp regs for special cases 199// Is x=inf? 200{ .mfi 201 setf.exp FR_exp1 = GR_exp1 202 fclass.m p6,p0 = f8, 0x23 203 addl GR_exp_mask = 0x1ffff, r0 204} 205{ .mfb 206 setf.sig FR_snorm_sig = GR_snorm_sig 207(p9) fma.s0 f8 = f8,f1,f9 208(p9) br.ret.spnt b0 ;; // Exit if y=nan 209} 210 211// Move largest denormal significand to fp regs for special cases 212// Save x 213{ .mfb 214 setf.sig FR_lden_sig = GR_lden_sig 215 mov FR_save_f8 = f8 216(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 217} 218 219// Mask off the sign to get x_exp 220{ .mfb 221 and GR_x_exp = GR_exp_mask, GR_exp 222 nop.f 999 223(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf 224} 225 226// Check 6 special cases when significand rolls over: 227// 1 sig size incr, x_sig=max_sig, x_exp < max_exp 228// Set p6, result is sig=min_sig, exp++ 229// 2 sig size incr, x_sig=max_sig, x_exp >= max_exp 230// Set p7, result is inf, signal overflow 231// 3 sig size decr, x_sig=min_sig, x_exp > min_exp 232// Set p8, result is sig=max_sig, exp-- 233// 4 sig size decr, x_sig=min_sig, x_exp = min_exp 234// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact 235// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp 236// Set p10, result is zero, sign of x, signal underflow and inexact 237// 6 sig size decr, x_sig=min_sig, x_exp < min_exp 238// Set p14, result is zero, sign of x, signal underflow and inexact 239// 240// Form exponent of smallest float denormal (if normalized register format) 241{ .mmi 242 adds GR_min_den_rexp = -23, GR_min_pexp 243(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 244(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; 245} 246 247{ .mmi 248(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp 249(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp 250(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; 251} 252 253// Create small normal in case need to generate underflow flag 254{ .mfi 255(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp 256 fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig 257(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp 258} 259// Branch if cases 1, 2, 3 260{ .bbb 261(p6) br.cond.spnt NEXT_EXPUP 262(p7) br.cond.spnt NEXT_OVERFLOW 263(p8) br.cond.spnt NEXT_EXPDOWN ;; 264} 265 266// Branch if cases 4, 5, 6 267{ .bbb 268(p9) br.cond.spnt NEXT_NORM_TO_DENORM 269(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO 270(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; 271} 272 273// Here if no special cases 274// Set p6 if result will be a denormal, so can force underflow flag 275// Case 1: x_exp=min_exp, x_sig=unnormalized 276// Case 2: x_exp<min_exp 277{ .mfi 278 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 279 fmerge.se f8 = FR_new_exp, FR_new_sig 280 nop.i 999 ;; 281} 282 283{ .mfi 284 nop.m 999 285 nop.f 999 286(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; 287} 288 289NEXT_COMMON_FINISH: 290// Force underflow and inexact if denormal result 291{ .mfi 292 nop.m 999 293(p6) fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 294 nop.i 999 295} 296{ .mfb 297 nop.m 999 298 fnorm.s.s0 f8 = f8 // Final normalization to result precision 299(p6) br.cond.spnt NEXT_UNDERFLOW ;; 300} 301 302{ .mfb 303 nop.m 999 304 nop.f 999 305 br.ret.sptk b0;; 306} 307 308//Special cases 309NEXT_EXPUP: 310{ .mfb 311 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 312 fmerge.se f8 = FR_exp1, FR_snorm_sig 313 br.cond.sptk NEXT_COMMON_FINISH ;; 314} 315 316NEXT_EXPDOWN: 317{ .mfb 318 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 319 fmerge.se f8 = FR_exp1, FR_lnorm_sig 320 br.cond.sptk NEXT_COMMON_FINISH ;; 321} 322 323NEXT_NORM_TO_DENORM: 324{ .mfi 325 nop.m 999 326 fmerge.se f8 = FR_new_exp, FR_lden_sig 327 nop.i 999 328} 329// Force underflow and inexact 330{ .mfb 331 nop.m 999 332 fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 333 br.cond.sptk NEXT_UNDERFLOW ;; 334} 335 336NEXT_UNDERFLOW_TO_ZERO: 337{ .mfb 338 cmp.eq p6,p0 = r0,r0 339 fmerge.s f8 = FR_save_f8,f0 340 br.cond.sptk NEXT_COMMON_FINISH ;; 341} 342 343NEXT_INF: 344// Here if f8 is +- infinity 345// INF 346// if f8 is +inf, no matter what y is return largest float 347// if f8 is -inf, no matter what y is return -largest float 348 349{ .mfi 350 nop.m 999 351 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 352 nop.i 999 ;; 353} 354 355{ .mfb 356 nop.m 999 357 fmerge.s f8 = f8,FR_lnorm 358 br.ret.sptk b0 ;; 359} 360 361NEXT_ZERO: 362 363// Here if f8 is +- zero 364// ZERO 365// if f8 is zero and y is +, return + smallest float denormal 366// if f8 is zero and y is -, return - smallest float denormal 367 368{ .mfi 369 nop.m 999 370 fmerge.se FR_sden = FR_sden_exp,FR_sden_sig 371 nop.i 999 ;; 372} 373 374// Create small normal to generate underflow flag 375{ .mfi 376 nop.m 999 377 fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig 378 nop.i 999 ;; 379} 380 381// Add correct sign from direction arg 382{ .mfi 383 nop.m 999 384 fmerge.s f8 = f9,FR_sden 385 nop.i 999 ;; 386} 387 388// Force underflow and inexact flags 389{ .mfb 390 nop.m 999 391 fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 392 br.cond.sptk NEXT_UNDERFLOW ;; 393} 394 395NEXT_UNDERFLOW: 396// Here if result is a denorm, or input is finite and result is zero 397// Call error support to report possible range error 398{ .mib 399 alloc r32=ar.pfs,2,2,4,0 400 mov GR_Parameter_TAG = 272 // Error code 401 br.cond.sptk __libm_error_region // Branch to error call 402} 403;; 404 405NEXT_OVERFLOW: 406// Here if input is finite, but result will be infinite 407// Use frcpa to generate infinity of correct sign 408// Call error support to report possible range error 409{ .mfi 410 alloc r32=ar.pfs,2,2,4,0 411 frcpa.s1 f8,p6 = FR_save_f8, f0 412 nop.i 999 ;; 413} 414 415// Create largest double 416{ .mfi 417 nop.m 999 418 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 419 nop.i 999 ;; 420} 421 422// Force overflow and inexact flags to be set 423{ .mfb 424 mov GR_Parameter_TAG = 200 // Error code 425 fma.s.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 426 br.cond.sptk __libm_error_region // Branch to error call 427} 428;; 429 430GLOBAL_LIBM_END(nexttowardf) 431 432 433LOCAL_LIBM_ENTRY(__libm_error_region) 434.prologue 435 436// (1) 437{ .mfi 438 add GR_Parameter_Y=-32,sp // Parameter 2 value 439 nop.f 0 440.save ar.pfs,GR_SAVE_PFS 441 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 442} 443{ .mfi 444.fframe 64 445 add sp=-64,sp // Create new stack 446 nop.f 0 447 mov GR_SAVE_GP=gp // Save gp 448};; 449 450 451// (2) 452{ .mmi 453 stfs [GR_Parameter_Y] = f9,16 // STORE Parameter 2 on stack 454 add GR_Parameter_X = 16,sp // Parameter 1 address 455.save b0, GR_SAVE_B0 456 mov GR_SAVE_B0=b0 // Save b0 457};; 458 459.body 460// (3) 461{ .mib 462 stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack 463 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 464 nop.b 0 465} 466{ .mib 467 stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack 468 add GR_Parameter_Y = -16,GR_Parameter_Y 469 br.call.sptk b0=__libm_error_support# // Call error handling function 470};; 471{ .mmi 472 nop.m 0 473 nop.m 0 474 add GR_Parameter_RESULT = 48,sp 475};; 476 477// (4) 478{ .mmi 479 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack 480.restore sp 481 add sp = 64,sp // Restore stack pointer 482 mov b0 = GR_SAVE_B0 // Restore return address 483};; 484{ .mib 485 mov gp = GR_SAVE_GP // Restore gp 486 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 487 br.ret.sptk b0 // Return 488};; 489 490LOCAL_LIBM_END(__libm_error_region) 491 492 493.type __libm_error_support#,@function 494.global __libm_error_support# 495