1.file "nexttowardl.s" 2 3 4// Copyright (c) 2001 - 2004, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 08/15/01 Initial version 42// 08/23/01 Corrected error tag number 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 02/10/03 Reordered header: .section, .global, .proc, .align 45// 12/14/04 Added error handling on underflow. 46// 47// API 48//============================================================== 49// long double nexttowardl( long double x, long double y ); 50// input floating point f8, f9 51// output floating point f8 52// 53// Registers used 54//============================================================== 55GR_max_pexp = r14 56GR_min_pexp = r15 57GR_exp = r16 58GR_sig = r17 59GR_lnorm_sig = r18 60GR_sign_mask = r19 61GR_exp_mask = r20 62GR_sden_sig = r21 63GR_new_sig = r22 64GR_new_exp = r23 65GR_lden_sig = r24 66GR_snorm_sig = r25 67GR_exp1 = r26 68GR_x_exp = r27 69// r36-39 parameters for libm_error_support 70 71GR_SAVE_B0 = r34 72GR_SAVE_GP = r35 73GR_SAVE_PFS = r32 74 75GR_Parameter_X = r36 76GR_Parameter_Y = r37 77GR_Parameter_RESULT = r38 78GR_Parameter_TAG = r39 79 80FR_lnorm_sig = f10 81FR_lnorm_exp = f11 82FR_lnorm = f12 83FR_sden_sig = f13 84FR_den_exp = f14 85FR_sden = f15 86FR_snorm_exp = f32 87FR_save_f8 = f33 88FR_new_exp = f34 89FR_new_sig = f35 90FR_lden_sig = f36 91FR_snorm_sig = f37 92FR_exp1 = f38 93FR_tmp = f39 94 95// 96// Overview of operation 97//============================================================== 98// nexttowardl determines the next representable value 99// after x in the direction of y. 100 101 102.section .text 103GLOBAL_LIBM_ENTRY(nexttowardl) 104 105// Extract signexp from x 106// Is x < y ? p10 if yes, p11 if no 107// Form smallest denormal significand = ulp size 108{ .mfi 109 getf.exp GR_exp = f8 110 fcmp.lt.s1 p10,p11 = f8, f9 111 addl GR_sden_sig = 0x1, r0 112} 113// Form largest normal significand 0xffffffffffffffff 114// Form smallest normal exponent 115{ .mfi 116 addl GR_lnorm_sig = -0x1,r0 117 nop.f 999 118 addl GR_min_pexp = 0x0c001, r0 ;; 119} 120 121// Extract significand from x 122// Is x=y? This fcmp also sets Invalid and Denormal if required 123// Form largest normal exponent 124{ .mfi 125 getf.sig GR_sig = f8 126 fcmp.eq.s0 p6,p0 = f8, f9 127 addl GR_max_pexp = 0x13ffe, r0 128} 129// Move largest normal significand to fp reg for special cases 130{ .mfi 131 setf.sig FR_lnorm_sig = GR_lnorm_sig 132 nop.f 999 133 addl GR_sign_mask = 0x20000, r0 ;; 134} 135 136// Move smallest denormal significand and exp to fp regs 137// Is x=nan? 138// Set p12 and p13 based on whether significand increases or decreases 139// It increases (p12 set) if x<y and x>=0 or if x>y and x<0 140// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 141{ .mfi 142 setf.sig FR_sden_sig = GR_sden_sig 143 fclass.m p8,p0 = f8, 0xc3 144(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask 145} 146// Move smallest normal exp to fp regs 147{ .mfi 148 setf.exp FR_snorm_exp = GR_min_pexp 149 nop.f 999 150(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; 151} 152 153.pred.rel "mutex",p12,p13 154 155// Form expected new significand, adding or subtracting 1 ulp increment 156// If x=y set result to y 157// Form smallest normal significand and largest denormal significand 158{ .mfi 159(p12) add GR_new_sig = GR_sig, GR_sden_sig 160(p6) fmerge.s f8=f9,f9 161 dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 162} 163{ .mlx 164(p13) sub GR_new_sig = GR_sig, GR_sden_sig 165 movl GR_lden_sig = 0x7fffffffffffffff ;; 166} 167 168// Move expected result significand and signexp to fp regs 169// Is y=nan? 170// Form new exponent in case result exponent needs incrementing or decrementing 171{ .mfi 172 setf.exp FR_new_exp = GR_exp 173 fclass.m p9,p0 = f9, 0xc3 174(p12) add GR_exp1 = 1, GR_exp 175} 176{ .mib 177 setf.sig FR_new_sig = GR_new_sig 178(p13) add GR_exp1 = -1, GR_exp 179(p6) br.ret.spnt b0 ;; // Exit if x=y 180} 181 182// Move largest normal signexp to fp reg for special cases 183// Is x=zero? 184{ .mfi 185 setf.exp FR_lnorm_exp = GR_max_pexp 186 fclass.m p7,p0 = f8, 0x7 187 nop.i 999 188} 189{ .mfb 190 setf.exp FR_den_exp = GR_min_pexp 191(p8) fma.s0 f8 = f8,f1,f9 192(p8) br.ret.spnt b0 ;; // Exit if x=nan 193} 194 195// Move exp+-1 and smallest normal significand to fp regs for special cases 196// Is x=inf? 197{ .mfi 198 setf.exp FR_exp1 = GR_exp1 199 fclass.m p6,p0 = f8, 0x23 200 addl GR_exp_mask = 0x1ffff, r0 201} 202{ .mfb 203 setf.sig FR_snorm_sig = GR_snorm_sig 204(p9) fma.s0 f8 = f8,f1,f9 205(p9) br.ret.spnt b0 ;; // Exit if y=nan 206} 207 208// Move largest denormal significand to fp regs for special cases 209// Save x 210{ .mfb 211 setf.sig FR_lden_sig = GR_lden_sig 212 mov FR_save_f8 = f8 213(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 214} 215 216// Mask off the sign to get x_exp 217{ .mfb 218 and GR_x_exp = GR_exp_mask, GR_exp 219 nop.f 999 220(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf 221} 222 223// Check 5 special cases when significand rolls over: 224// 1 sig size incr, x_sig=max_sig, x_exp < max_exp 225// Set p6, result is sig=min_sig, exp++ 226// 2 sig size incr, x_sig=max_sig, x_exp >= max_exp 227// Set p7, result is inf, signal overflow 228// 3 sig size decr, x_sig=min_sig, x_exp > min_exp 229// Set p8, result is sig=max_sig, exp-- 230// 4 sig size decr, x_sig=min_sig, x_exp = min_exp 231// Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact 232// 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp 233// Set p10, result is zero, sign of x, signal underflow and inexact 234// 235{ .mmi 236(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 237(p13) cmp.eq.unc p9,p10 = GR_new_sig, GR_lden_sig 238 nop.i 999 239;; 240} 241 242{ .mmi 243(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp 244(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 245(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp 246;; 247} 248 249// Create small normal in case need to generate underflow flag 250{ .mfi 251 nop.m 999 252 fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig 253 nop.i 999 254} 255// Branch if cases 1, 2, 3 256{ .bbb 257(p6) br.cond.spnt NEXT_EXPUP 258(p7) br.cond.spnt NEXT_OVERFLOW 259(p8) br.cond.spnt NEXT_EXPDOWN ;; 260} 261 262// Branch if cases 4, 5 263{ .mbb 264 nop.m 999 265(p9) br.cond.spnt NEXT_NORM_TO_DENORM 266(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO 267;; 268} 269 270// Here if no special cases 271// Set p6 if result will be a denormal, so can force underflow flag 272// Case 1: x_exp=min_exp, x_sig=unnormalized 273// Case 2: x_exp<min_exp 274{ .mfi 275 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 276 fmerge.se f8 = FR_new_exp, FR_new_sig 277 nop.i 999 ;; 278} 279 280{ .mfi 281 nop.m 999 282 nop.f 999 283(p6) tbit.z p6,p0 = GR_new_sig, 63 ;; 284} 285 286NEXT_COMMON_FINISH: 287// Force underflow and inexact if denormal result 288{ .mfi 289 nop.m 999 290(p6) fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 291 nop.i 999 292} 293{ .mfb 294 nop.m 999 295 fnorm.s0 f8 = f8 // Final normalization to result precision 296(p6) br.cond.spnt NEXT_UNDERFLOW ;; 297} 298 299{ .mfb 300 nop.m 999 301 nop.f 999 302 br.ret.sptk b0;; 303} 304 305//Special cases 306NEXT_EXPUP: 307{ .mfb 308 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 309 fmerge.se f8 = FR_exp1, FR_snorm_sig 310 br.cond.sptk NEXT_COMMON_FINISH ;; 311} 312 313NEXT_EXPDOWN: 314{ .mfb 315 cmp.lt p6,p7 = GR_x_exp, GR_min_pexp 316 fmerge.se f8 = FR_exp1, FR_lnorm_sig 317 br.cond.sptk NEXT_COMMON_FINISH ;; 318} 319 320NEXT_NORM_TO_DENORM: 321{ .mfi 322 nop.m 999 323 fmerge.se f8 = FR_exp1, FR_lden_sig 324 nop.i 999 325} 326// Force underflow and inexact 327{ .mfb 328 nop.m 999 329 fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 330 br.cond.sptk NEXT_UNDERFLOW ;; 331} 332 333NEXT_UNDERFLOW_TO_ZERO: 334{ .mfb 335 cmp.eq p6,p0 = r0,r0 336 fmerge.s f8 = FR_save_f8,f0 337 br.cond.sptk NEXT_COMMON_FINISH ;; 338} 339 340NEXT_INF: 341// Here if f8 is +- infinity 342// INF 343// if f8 is +inf, no matter what y is return largest long double 344// if f8 is -inf, no matter what y is return -largest long double 345 346// Create largest long double 347{ .mfi 348 nop.m 999 349 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 350 nop.i 999 ;; 351} 352 353{ .mfb 354 nop.m 999 355 fmerge.s f8 = f8,FR_lnorm 356 br.ret.sptk b0 ;; 357} 358 359NEXT_ZERO: 360 361// Here if f8 is +- zero 362// ZERO 363// if f8 is zero and y is +, return + smallest long double denormal 364// if f8 is zero and y is -, return - smallest long double denormal 365 366{ .mfi 367 nop.m 999 368 fmerge.se FR_sden = f0,FR_sden_sig 369 nop.i 999 ;; 370} 371 372// Create small normal to generate underflow flag 373{ .mfi 374 nop.m 999 375 fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig 376 nop.i 999 ;; 377} 378 379// Add correct sign from direction arg 380{ .mfi 381 nop.m 999 382 fmerge.s f8 = f9,FR_sden 383 nop.i 999 ;; 384} 385 386// Force underflow and inexact flags 387{ .mfb 388 nop.m 999 389 fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 390 br.cond.sptk NEXT_UNDERFLOW ;; 391} 392 393NEXT_UNDERFLOW: 394// Here if result is a denorm, or input is finite and result is zero 395// Call error support to report possible range error 396{ .mib 397 alloc r32=ar.pfs,2,2,4,0 398 mov GR_Parameter_TAG = 270 // Error code 399 br.cond.sptk __libm_error_region // Branch to error call 400} 401;; 402 403NEXT_OVERFLOW: 404// Here if input is finite, but result will be infinite 405// Use frcpa to generate infinity of correct sign 406// Call error support to report possible range error 407{ .mfi 408 alloc r32=ar.pfs,2,2,4,0 409 frcpa.s1 f8,p6 = FR_save_f8, f0 410 nop.i 999 ;; 411} 412 413// Create largest double 414{ .mfi 415 nop.m 999 416 fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig 417 nop.i 999 ;; 418} 419 420// Force overflow and inexact flags to be set 421{ .mfb 422 mov GR_Parameter_TAG = 198 // Error code 423 fma.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 424 br.cond.sptk __libm_error_region // Branch to error call 425} 426;; 427 428GLOBAL_LIBM_END(nexttowardl) 429 430 431LOCAL_LIBM_ENTRY(__libm_error_region) 432.prologue 433 434// (1) 435{ .mfi 436 add GR_Parameter_Y=-32,sp // Parameter 2 value 437 nop.f 0 438.save ar.pfs,GR_SAVE_PFS 439 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 440} 441{ .mfi 442.fframe 64 443 add sp=-64,sp // Create new stack 444 nop.f 0 445 mov GR_SAVE_GP=gp // Save gp 446};; 447 448 449// (2) 450{ .mmi 451 stfe [GR_Parameter_Y] = f9,16 // STORE Parameter 2 on stack 452 add GR_Parameter_X = 16,sp // Parameter 1 address 453.save b0, GR_SAVE_B0 454 mov GR_SAVE_B0=b0 // Save b0 455};; 456 457.body 458// (3) 459{ .mib 460 stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack 461 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 462 nop.b 0 463} 464{ .mib 465 stfe [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack 466 add GR_Parameter_Y = -16,GR_Parameter_Y 467 br.call.sptk b0=__libm_error_support# // Call error handling function 468};; 469{ .mmi 470 nop.m 0 471 nop.m 0 472 add GR_Parameter_RESULT = 48,sp 473};; 474 475// (4) 476{ .mmi 477 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack 478.restore sp 479 add sp = 64,sp // Restore stack pointer 480 mov b0 = GR_SAVE_B0 // Restore return address 481};; 482{ .mib 483 mov gp = GR_SAVE_GP // Restore gp 484 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 485 br.ret.sptk b0 // Return 486};; 487 488LOCAL_LIBM_END(__libm_error_region) 489 490 491.type __libm_error_support#,@function 492.global __libm_error_support# 493