1.file "nextafterl.s" 2 3 4// Copyright (c) 2000 - 2004, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 02/02/00 Initial version 42// 03/03/00 Modified to conform to C9X, and improve speed of main path 43// 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed 44// 04/04/00 Unwind support added 45// 05/12/00 Fixed erroneous denormal flag setting for exponent change cases 1,3 46// 08/15/00 Bundle added after call to __libm_error_support to properly 47// set [the previously overwritten] GR_Parameter_RESULT. 48// 09/09/00 Updated fcmp so that qnans do not raise invalid. 49// 12/15/00 Fixed case of smallest long double normal to largest denormal, 50// now adhere to C99 for two zero args, and fixed flag settings 51// for several cases 52// 05/20/02 Cleaned up namespace and sf0 syntax 53// 02/10/03 Reordered header: .section, .global, .proc, .align 54// 12/14/04 Added error handling on underflow. 55// 56// API 57//============================================================== 58// long double nextafterl( long double x, long double y ); 59// input floating point f8, f9 60// output floating point f8 61// 62// Registers used 63//============================================================== 64GR_max_pexp = r14 65GR_min_pexp = r15 66GR_exp = r16 67GR_sig = r17 68GR_lnorm_sig = r18 69GR_sign_mask = r19 70GR_exp_mask = r20 71GR_sden_sig = r21 72GR_new_sig = r22 73GR_new_exp = r23 74GR_lden_sig = r24 75GR_snorm_sig = r25 76GR_exp1 = r26 77GR_x_exp = r27 78// r36-39 parameters for libm_error_support 79 80GR_SAVE_B0 = r34 81GR_SAVE_GP = r35 82GR_SAVE_PFS = r32 83 84GR_Parameter_X = r36 85GR_Parameter_Y = r37 86GR_Parameter_RESULT = r38 87GR_Parameter_TAG = r39 88 89FR_lnorm_sig = f10 90FR_lnorm_exp = f11 91FR_lnorm = f12 92FR_sden_sig = f13 93FR_den_exp = f14 94FR_sden = f15 95FR_snorm_exp = f32 96FR_save_f8 = f33 97FR_new_exp = f34 98FR_new_sig = f35 99FR_lden_sig = f36 100FR_snorm_sig = f37 101FR_exp1 = f38 102FR_tmp = f39 103 104// 105// Overview of operation 106//============================================================== 107// nextafterl determines the next representable value 108// after x in the direction of y. 109 110 111.section .text 112GLOBAL_LIBM_ENTRY(nextafterl) 113 114// Extract signexp from x 115// Is x < y ? p10 if yes, p11 if no 116// Form smallest denormal significand = ulp size 117{ .mfi 118 getf.exp GR_exp = f8 119 fcmp.lt.s1 p10,p11 = f8, f9 120 addl GR_sden_sig = 0x1, r0 121} 122// Form largest normal significand 0xffffffffffffffff 123// Form smallest normal exponent 124{ .mfi 125 addl GR_lnorm_sig = -0x1,r0 126 nop.f 999 127 addl GR_min_pexp = 0x0c001, r0 ;; 128} 129 130// Extract significand from x 131// Is x=y? This fcmp also sets Invalid and Denormal if required 132// Form largest normal exponent 133{ .mfi 134 getf.sig GR_sig = f8 135 fcmp.eq.s0 p6,p0 = f8, f9 136 addl GR_max_pexp = 0x13ffe, r0 137} 138// Move largest normal significand to fp reg for special cases 139{ .mfi 140 setf.sig FR_lnorm_sig = GR_lnorm_sig 141 nop.f 999 142 addl GR_sign_mask = 0x20000, r0 ;; 143} 144 145// Move smallest denormal significand and exp to fp regs 146// Is x=nan? 147// Set p12 and p13 based on whether significand increases or decreases 148// It increases (p12 set) if x<y and x>=0 or if x>y and x<0 149// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 150{ .mfi 151 setf.sig FR_sden_sig = GR_sden_sig 152 fclass.m p8,p0 = f8, 0xc3 153(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask 154} 155// Move smallest normal exp to fp regs 156{ .mfi 157 setf.exp FR_snorm_exp = GR_min_pexp 158 nop.f 999 159(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; 160} 161 162.pred.rel "mutex",p12,p13 163 164// Form expected new significand, adding or subtracting 1 ulp increment 165// If x=y set result to y 166// Form smallest normal significand and largest denormal significand 167{ .mfi 168(p12) add GR_new_sig = GR_sig, GR_sden_sig 169(p6) fmerge.s f8=f9,f9 170 dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 171} 172{ .mlx 173(p13) sub GR_new_sig = GR_sig, GR_sden_sig 174 movl GR_lden_sig = 0x7fffffffffffffff ;; 175} 176 177// Move expected result significand and signexp to fp regs 178// Is y=nan? 179// Form new exponent in case result exponent needs incrementing or decrementing 180{ .mfi 181 setf.exp FR_new_exp = GR_exp 182 fclass.m p9,p0 = f9, 0xc3 183(p12) add GR_exp1 = 1, GR_exp 184} 185{ .mib 186 setf.sig FR_new_sig = GR_new_sig 187(p13) add GR_exp1 = -1, GR_exp 188(p6) br.ret.spnt b0 ;; // Exit if x=y 189} 190 191// Move largest normal signexp to fp reg for special cases 192// Is x=zero? 193{ .mfi 194 setf.exp FR_lnorm_exp = GR_max_pexp 195 fclass.m p7,p0 = f8, 0x7 196 nop.i 999 197} 198{ .mfb 199 setf.exp FR_den_exp = GR_min_pexp 200(p8) fma.s0 f8 = f8,f1,f9 201(p8) br.ret.spnt b0 ;; // Exit if x=nan 202} 203 204// Move exp+-1 and smallest normal significand to fp regs for special cases 205// Is x=inf? 206{ .mfi 207 setf.exp FR_exp1 = GR_exp1 208 fclass.m p6,p0 = f8, 0x23 209 addl GR_exp_mask = 0x1ffff, r0 210} 211{ .mfb 212 setf.sig FR_snorm_sig = GR_snorm_sig 213(p9) fma.s0 f8 = f8,f1,f9 214(p9) br.ret.spnt b0 ;; // Exit if y=nan 215} 216 217// Move largest denormal significand to fp regs for special cases 218// Save x 219{ .mfb 220 setf.sig FR_lden_sig = GR_lden_sig 221 mov FR_save_f8 = f8 222(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 223} 224 225// Mask off the sign to get x_exp 226{ .mfb 227 and GR_x_exp = GR_exp_mask, GR_exp 228 nop.f 999 229(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf 230} 231 232// Check 5 special cases when significand rolls over: 233// 1 sig size incr, x_sig=max_sig, x_exp < max_exp 234// Set p6, result is sig=min_sig, exp++ 235// 2 sig size incr, x_sig=max_sig, x_exp >= max_exp 236// Set p7, result is inf, signal overflow 237// 3 sig size decr, x_sig=min_sig, x_exp > min_exp 238// Set p8, result is sig=max_sig, exp-- 239// 4 sig size decr, x_sig=min_sig, x_exp = min_exp 240// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact 241// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp 242// Set p10, result is zero, sign of x, signal underflow and inexact 243// 244{ .mmi 245(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 246(p13) cmp.eq.unc p9,p10 = GR_new_sig, GR_lden_sig 247 nop.i 999 248;; 249} 250 251{ .mmi 252(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp 253(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 254(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp 255;; 256} 257 258// Create small normal in case need to generate underflow flag 259{ .mfi 260 nop.m 999 261 fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig 262 nop.i 999 263} 264// Branch if cases 1, 2, 3 265{ .bbb 266(p6) br.cond.spnt NEXT_EXPUP 267(p7) br.cond.spnt NEXT_OVERFLOW 268(p8) br.cond.spnt NEXT_EXPDOWN ;; 269} 270 271// Branch if cases 4, 5 272{ .mbb 273 nop.m 999 274(p9) br.cond.spnt NEXT_NORM_TO_DENORM 275(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO 276;; 277} 278 279// Here if no special cases 280// Set p6 if result will be a denormal, so can force underflow flag 281// Case 1: x_exp=min_exp, x_sig=unnormalized 282// Case 2: x_exp<min_exp 283{ .mfi 284 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 285 fmerge.se f8 = FR_new_exp, FR_new_sig 286 nop.i 999 ;; 287} 288 289{ .mfi 290 nop.m 999 291 nop.f 999 292(p6) tbit.z p6,p0 = GR_new_sig, 63 ;; 293} 294 295NEXT_COMMON_FINISH: 296// Force underflow and inexact if denormal result 297{ .mfi 298 nop.m 999 299(p6) fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 300 nop.i 999 301} 302{ .mfb 303 nop.m 999 304 fnorm.s0 f8 = f8 // Final normalization to result precision 305(p6) br.cond.spnt NEXT_UNDERFLOW ;; 306} 307 308{ .mfb 309 nop.m 999 310 nop.f 999 311 br.ret.sptk b0;; 312} 313 314//Special cases 315NEXT_EXPUP: 316{ .mfb 317 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 318 fmerge.se f8 = FR_exp1, FR_snorm_sig 319 br.cond.sptk NEXT_COMMON_FINISH ;; 320} 321 322NEXT_EXPDOWN: 323{ .mfb 324 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 325 fmerge.se f8 = FR_exp1, FR_lnorm_sig 326 br.cond.sptk NEXT_COMMON_FINISH ;; 327} 328 329NEXT_NORM_TO_DENORM: 330{ .mfi 331 nop.m 999 332 fmerge.se f8 = FR_exp1, FR_lden_sig 333 nop.i 999 334} 335// Force underflow and inexact 336{ .mfb 337 nop.m 999 338 fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 339 br.cond.sptk NEXT_UNDERFLOW ;; 340} 341 342NEXT_UNDERFLOW_TO_ZERO: 343{ .mfb 344 cmp.eq p6,p0 = r0,r0 345 fmerge.s f8 = FR_save_f8,f0 346 br.cond.sptk NEXT_COMMON_FINISH ;; 347} 348 349NEXT_INF: 350// Here if f8 is +- infinity 351// INF 352// if f8 is +inf, no matter what y is return largest long double 353// if f8 is -inf, no matter what y is return -largest long double 354 355// Create largest long double 356{ .mfi 357 nop.m 999 358 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 359 nop.i 999 ;; 360} 361 362{ .mfb 363 nop.m 999 364 fmerge.s f8 = f8,FR_lnorm 365 br.ret.sptk b0 ;; 366} 367 368NEXT_ZERO: 369 370// Here if f8 is +- zero 371// ZERO 372// if f8 is zero and y is +, return + smallest long double denormal 373// if f8 is zero and y is -, return - smallest long double denormal 374 375{ .mfi 376 nop.m 999 377 fmerge.se FR_sden = f0,FR_sden_sig 378 nop.i 999 ;; 379} 380 381// Create small normal to generate underflow flag 382{ .mfi 383 nop.m 999 384 fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig 385 nop.i 999 ;; 386} 387 388// Add correct sign from direction arg 389{ .mfi 390 nop.m 999 391 fmerge.s f8 = f9,FR_sden 392 nop.i 999 ;; 393} 394 395// Force underflow and inexact flags 396{ .mfb 397 nop.m 999 398 fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 399 br.cond.sptk NEXT_UNDERFLOW ;; 400} 401 402NEXT_UNDERFLOW: 403// Here if result is a denorm, or input is finite and result is zero 404// Call error support to report possible range error 405{ .mib 406 alloc r32=ar.pfs,2,2,4,0 407 mov GR_Parameter_TAG = 267 // Error code 408 br.cond.sptk __libm_error_region // Branch to error call 409} 410;; 411 412NEXT_OVERFLOW: 413// Here if input is finite, but result will be infinite 414// Use frcpa to generate infinity of correct sign 415// Call error support to report possible range error 416{ .mfi 417 alloc r32=ar.pfs,2,2,4,0 418 frcpa.s1 f8,p6 = FR_save_f8, f0 419 nop.i 999 ;; 420} 421 422// Create largest double 423{ .mfi 424 nop.m 999 425 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 426 nop.i 999 ;; 427} 428 429// Force overflow and inexact flags to be set 430{ .mfb 431 mov GR_Parameter_TAG = 153 // Error code 432 fma.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 433 br.cond.sptk __libm_error_region // Branch to error call 434} 435;; 436 437GLOBAL_LIBM_END(nextafterl) 438libm_alias_ldouble_other (nextafter, nextafter) 439 440 441LOCAL_LIBM_ENTRY(__libm_error_region) 442.prologue 443 444// (1) 445{ .mfi 446 add GR_Parameter_Y=-32,sp // Parameter 2 value 447 nop.f 0 448.save ar.pfs,GR_SAVE_PFS 449 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 450} 451{ .mfi 452.fframe 64 453 add sp=-64,sp // Create new stack 454 nop.f 0 455 mov GR_SAVE_GP=gp // Save gp 456};; 457 458 459// (2) 460{ .mmi 461 stfe [GR_Parameter_Y] = f9,16 // STORE Parameter 2 on stack 462 add GR_Parameter_X = 16,sp // Parameter 1 address 463.save b0, GR_SAVE_B0 464 mov GR_SAVE_B0=b0 // Save b0 465};; 466 467.body 468// (3) 469{ .mib 470 stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack 471 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 472 nop.b 0 473} 474{ .mib 475 stfe [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack 476 add GR_Parameter_Y = -16,GR_Parameter_Y 477 br.call.sptk b0=__libm_error_support# // Call error handling function 478};; 479{ .mmi 480 nop.m 0 481 nop.m 0 482 add GR_Parameter_RESULT = 48,sp 483};; 484 485// (4) 486{ .mmi 487 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack 488.restore sp 489 add sp = 64,sp // Restore stack pointer 490 mov b0 = GR_SAVE_B0 // Restore return address 491};; 492{ .mib 493 mov gp = GR_SAVE_GP // Restore gp 494 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 495 br.ret.sptk b0 // Return 496};; 497 498LOCAL_LIBM_END(__libm_error_region) 499 500 501.type __libm_error_support#,@function 502.global __libm_error_support# 503