1.file "fmodf.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//==================================================================== 41// 02/02/00 Initial version 42// 03/02/00 New Algorithm 43// 04/04/00 Unwind support added 44// 08/15/00 Bundle added after call to __libm_error_support to properly 45// set [the previously overwritten] GR_Parameter_RESULT. 46// 11/28/00 Set FR_Y to f9 47// 03/11/02 Fixed flags for fmodf(qnan,zero) 48// 05/20/02 Cleaned up namespace and sf0 syntax 49// 02/10/03 Reordered header: .section, .global, .proc, .align 50// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno 51// 52// API 53//==================================================================== 54// float fmodf(float,float); 55// 56// Overview of operation 57//==================================================================== 58// fmod(a,b)=a-i*b, 59// where i is an integer such that, if b!=0, 60// |i|<|a/b| and |a/b-i|<1 61 62// Algorithm 63//==================================================================== 64// a). if |a|<|b|, return a 65// b). get quotient and reciprocal overestimates accurate to 66// 33 bits (q2,y2) 67// c). if the exponent difference (exponent(a)-exponent(b)) 68// is less than 32, truncate quotient to integer and 69// finish in one iteration 70// d). if exponent(a)-exponent(b)>=32 (q2>=2^32) 71// round quotient estimate to single precision (k=RN(q2)), 72// calculate partial remainder (a'=a-k*b), 73// get quotient estimate (a'*y2), and repeat from c). 74 75// Special cases 76//==================================================================== 77// b=+/-0: return NaN, call libm_error_support 78// a=+/-Inf, a=NaN or b=NaN: return NaN 79 80// Registers used 81//==================================================================== 82// Predicate registers: p6-p11 83// General registers: r2,r29,r32 (ar.pfs), r33-r39 84// Floating point registers: f6-f15 85 86GR_SAVE_B0 = r33 87GR_SAVE_PFS = r34 88GR_SAVE_GP = r35 89GR_SAVE_SP = r36 90 91GR_Parameter_X = r37 92GR_Parameter_Y = r38 93GR_Parameter_RESULT = r39 94GR_Parameter_TAG = r40 95 96FR_X = f10 97FR_Y = f9 98FR_RESULT = f8 99 100 101.section .text 102GLOBAL_IEEE754_ENTRY(fmodf) 103 104// inputs in f8, f9 105// result in f8 106 107{ .mfi 108 alloc r32=ar.pfs,1,4,4,0 109 // f6=|a| 110 fmerge.s f6=f0,f8 111 mov r2 = 0x0ffdd 112} 113 {.mfi 114 nop.m 0 115 // f7=|b| 116 fmerge.s f7=f0,f9 117 nop.i 0;; 118} 119 120{ .mfi 121 setf.exp f11 = r2 122 // (1) y0 123 frcpa.s1 f10,p6=f6,f7 124 nop.i 0 125} 126 127// eliminate special cases 128// Y +-NAN, +-inf, +-0? p7 129{ .mfi 130 nop.m 999 131 fclass.m.unc p7,p0 = f9, 0xe7 132 nop.i 999;; 133} 134 135// qnan snan inf norm unorm 0 -+ 136// 1 1 1 0 0 0 11 137// e 3 138// X +-NAN, +-inf, ? p9 139 140{ .mfi 141 nop.m 999 142 fclass.m.unc p9,p0 = f8, 0xe3 143 nop.i 999 144} 145 146// |x| < |y|? Return x p8 147{ .mfi 148 nop.m 999 149 fcmp.lt.unc.s1 p8,p0 = f6,f7 150 nop.i 999 ;; 151} 152 153{ .mfi 154 nop.m 0 155 // normalize y (if |x|<|y|) 156 (p8) fma.s0 f9=f9,f1,f0 157 nop.i 0;; 158} 159 160 { .mfi 161 mov r2=0x1001f 162 // (2) q0=a*y0 163 (p6) fma.s1 f13=f6,f10,f0 164 nop.i 0 165} 166{ .mfi 167 nop.m 0 168 // (3) e0 = 1 - b * y0 169 (p6) fnma.s1 f12=f7,f10,f1 170 nop.i 0;; 171} 172 173 {.mfi 174 nop.m 0 175 // normalize x (if |x|<|y|) 176 (p8) fma.s.s0 f8=f8,f1,f0 177 nop.i 0 178} 179{.bbb 180 (p9) br.cond.spnt FMOD_X_NAN_INF 181 (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO 182 // if |x|<|y|, return 183 (p8) br.ret.spnt b0;; 184} 185 186 {.mfi 187 nop.m 0 188 // normalize x 189 fma.s0 f6=f6,f1,f0 190 nop.i 0 191} 192{.mfi 193 nop.m 0 194 // normalize y 195 fma.s0 f7=f7,f1,f0 196 nop.i 0;; 197} 198 199 200 {.mfi 201 // f15=2^32 202 setf.exp f15=r2 203 // (4) q1=q0+e0*q0 204 (p6) fma.s1 f13=f12,f13,f13 205 nop.i 0 206} 207{ .mfi 208 nop.m 0 209 // (5) e1 = e0 * e0 + 2^-34 210 (p6) fma.s1 f14=f12,f12,f11 211 nop.i 0;; 212} 213{.mlx 214 nop.m 0 215 movl r2=0x33a00000;; 216} 217{ .mfi 218 nop.m 0 219 // (6) y1 = y0 + e0 * y0 220 (p6) fma.s1 f10=f12,f10,f10 221 nop.i 0;; 222} 223{.mfi 224 // set f12=1.25*2^{-24} 225 setf.s f12=r2 226 // (7) q2=q1+e1*q1 227 (p6) fma.s1 f13=f13,f14,f13 228 nop.i 0;; 229} 230{.mfi 231 nop.m 0 232 fmerge.s f9=f8,f9 233 nop.i 0 234} 235{ .mfi 236 nop.m 0 237 // (8) y2 = y1 + e1 * y1 238 (p6) fma.s1 f10=f14,f10,f10 239 // set p6=0, p10=0 240 cmp.ne.and p6,p10=r0,r0;; 241} 242 243.align 32 244loop24: 245 {.mfi 246 nop.m 0 247 // compare q2, 2^32 248 fcmp.lt.unc.s1 p8,p7=f13,f15 249 nop.i 0 250} 251 {.mfi 252 nop.m 0 253 // will truncate quotient to integer, if exponent<32 (in advance) 254 fcvt.fx.trunc.s1 f11=f13 255 nop.i 0;; 256} 257 {.mfi 258 nop.m 0 259 // if exponent>32, round quotient to single precision (perform in advance) 260 fma.s.s1 f13=f13,f1,f0 261 nop.i 0;; 262} 263 {.mfi 264 nop.m 0 265 // set f12=sgn(a) 266 (p8) fmerge.s f12=f8,f1 267 nop.i 0 268} 269 {.mfi 270 nop.m 0 271 // normalize truncated quotient 272 (p8) fcvt.xf f13=f11 273 nop.i 0;; 274} 275 { .mfi 276 nop.m 0 277 // calculate remainder (assuming f13=RZ(Q)) 278 (p7) fnma.s1 f14=f13,f7,f6 279 nop.i 0 280} 281 {.mfi 282 nop.m 0 283 // also if exponent>32, round quotient to single precision 284 // and subtract 1 ulp: q=q-q*(1.25*2^{-24}) 285 (p7) fnma.s.s1 f11=f13,f12,f13 286 nop.i 0;; 287} 288 289 {.mfi 290 nop.m 0 291 // (p8) calculate remainder (82-bit format) 292 (p8) fnma.s1 f11=f13,f7,f6 293 nop.i 0 294} 295 {.mfi 296 nop.m 0 297 // (p7) calculate remainder (assuming f11=RZ(Q)) 298 (p7) fnma.s1 f6=f11,f7,f6 299 nop.i 0;; 300} 301 302 303 {.mfi 304 nop.m 0 305 // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ? 306 (p8) fcmp.lt.unc.s1 p6,p10=f11,f0 307 nop.i 0;; 308} 309 {.mfi 310 nop.m 0 311 // get new quotient estimation: a'*y2 312 (p7) fma.s1 f13=f14,f10,f0 313 nop.i 0 314} 315 {.mfb 316 nop.m 0 317 // was f14=RZ(Q) ? (then new remainder f14>=0) 318 (p7) fcmp.lt.unc.s1 p7,p9=f14,f0 319 nop.b 0;; 320} 321 322 323.pred.rel "mutex",p6,p10 324 {.mfb 325 nop.m 0 326 // add b to estimated remainder (to cover the case when the quotient was overestimated) 327 // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a) 328 (p6) fma.s.s0 f8=f11,f12,f9 329 nop.b 0 330} 331 {.mfb 332 nop.m 0 333 // calculate remainder (single precision) 334 // set correct sign of result before returning 335 (p10) fma.s.s0 f8=f11,f12,f0 336 (p8) br.ret.sptk b0;; 337} 338 {.mfi 339 nop.m 0 340 // if f13!=RZ(Q), get alternative quotient estimation: a''*y2 341 (p7) fma.s1 f13=f6,f10,f0 342 nop.i 0 343} 344 {.mfb 345 nop.m 0 346 // if f14 was RZ(Q), set remainder to f14 347 (p9) mov f6=f14 348 br.cond.sptk loop24;; 349} 350 351 { .mmb 352 nop.m 0 353 nop.m 0 354 br.ret.sptk b0;; 355 } 356 357FMOD_X_NAN_INF: 358 359 360// Y zero ? 361{.mfi 362 nop.m 0 363 fclass.m p10,p0=f8,0xc3 // Test x=nan 364 nop.i 0 365} 366{.mfi 367 nop.m 0 368 fma.s1 f10=f9,f1,f0 369 nop.i 0;; 370} 371 372{.mfi 373 nop.m 0 374 fma.s0 f8=f8,f1,f0 375 nop.i 0 376} 377{.mfi 378 nop.m 0 379(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero 380 nop.i 0;; 381} 382{.mfb 383 nop.m 0 384 fcmp.eq.unc.s1 p11,p0=f10,f0 385(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero 386} 387{.mib 388 nop.m 0 389 nop.i 0 390 // if Y zero 391 (p11) br.cond.spnt FMOD_Y_ZERO;; 392} 393 394// X infinity? Return QNAN indefinite 395{ .mfi 396 nop.m 999 397 fclass.m.unc p8,p9 = f8, 0x23 398 nop.i 999;; 399} 400// Y NaN ? 401{.mfi 402 nop.m 999 403(p8) fclass.m p9,p8=f9,0xc3 404 nop.i 0;; 405} 406{.mfi 407 nop.m 999 408(p8) frcpa.s0 f8,p0 = f8,f8 409 nop.i 0 410} 411{ .mfi 412 nop.m 999 413 // also set Denormal flag if necessary 414(p8) fma.s0 f9=f9,f1,f0 415 nop.i 999 ;; 416} 417 418{ .mfb 419 nop.m 999 420(p8) fma.s.s0 f8=f8,f1,f0 421 nop.b 999 ;; 422} 423 424{ .mfb 425 nop.m 999 426(p9) frcpa.s0 f8,p7=f8,f9 427 br.ret.sptk b0 ;; 428} 429 430 431FMOD_Y_NAN_INF_ZERO: 432 433// Y INF 434{ .mfi 435 nop.m 999 436 fclass.m.unc p7,p0 = f9, 0x23 437 nop.i 999 ;; 438} 439 440{ .mfb 441 nop.m 999 442(p7) fma.s.s0 f8=f8,f1,f0 443(p7) br.ret.spnt b0 ;; 444} 445 446// Y NAN? 447{ .mfi 448 nop.m 999 449 fclass.m.unc p9,p0 = f9, 0xc3 450 nop.i 999 ;; 451} 452 453{ .mfb 454 nop.m 999 455(p9) fma.s.s0 f8=f9,f1,f0 456(p9) br.ret.spnt b0 ;; 457} 458 459FMOD_Y_ZERO: 460// Y zero? Must be zero at this point 461// because it is the only choice left. 462// Return QNAN indefinite 463 464{.mfi 465 nop.m 0 466 // set Invalid 467 frcpa.s0 f12,p0=f0,f0 468 nop.i 999 469} 470// X NAN? 471{ .mfi 472 nop.m 999 473 fclass.m.unc p9,p10 = f8, 0xc3 474 nop.i 999 ;; 475} 476{ .mfi 477 nop.m 999 478(p10) fclass.nm p9,p10 = f8, 0xff 479 nop.i 999 ;; 480} 481 482{.mfi 483 nop.m 999 484 (p9) frcpa.s0 f11,p7=f8,f0 485 nop.i 0;; 486} 487 488{ .mfi 489 nop.m 999 490(p10) frcpa.s0 f11,p7 = f0,f0 491nop.i 999;; 492} 493 494{ .mfi 495 nop.m 999 496 fmerge.s f10 = f8, f8 497 nop.i 999 498} 499 500{ .mfi 501 nop.m 999 502 fma.s.s0 f8=f11,f1,f0 503 nop.i 999;; 504} 505 506EXP_ERROR_RETURN: 507 508 509{ .mib 510 nop.m 0 511 mov GR_Parameter_TAG=122 512 br.sptk __libm_error_region;; 513} 514 515GLOBAL_IEEE754_END(fmodf) 516libm_alias_float_other (__fmod, fmod) 517 518LOCAL_LIBM_ENTRY(__libm_error_region) 519.prologue 520{ .mfi 521 add GR_Parameter_Y=-32,sp // Parameter 2 value 522 nop.f 0 523.save ar.pfs,GR_SAVE_PFS 524 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 525} 526{ .mfi 527.fframe 64 528 add sp=-64,sp // Create new stack 529 nop.f 0 530 mov GR_SAVE_GP=gp // Save gp 531};; 532{ .mmi 533 stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack 534 add GR_Parameter_X = 16,sp // Parameter 1 address 535.save b0, GR_SAVE_B0 536 mov GR_SAVE_B0=b0 // Save b0 537};; 538.body 539{ .mib 540 stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack 541 add GR_Parameter_RESULT = 0,GR_Parameter_Y 542 nop.b 0 // Parameter 3 address 543} 544{ .mib 545 stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack 546 add GR_Parameter_Y = -16,GR_Parameter_Y 547 br.call.sptk b0=__libm_error_support#;; // Call error handling function 548} 549{ .mmi 550 nop.m 0 551 nop.m 0 552 add GR_Parameter_RESULT = 48,sp 553};; 554{ .mmi 555 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack 556.restore sp 557 add sp = 64,sp // Restore stack pointer 558 mov b0 = GR_SAVE_B0 // Restore return address 559};; 560{ .mib 561 mov gp = GR_SAVE_GP // Restore gp 562 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 563 br.ret.sptk b0 // Return 564};; 565 566LOCAL_LIBM_END(__libm_error_region) 567 568.type __libm_error_support#,@function 569.global __libm_error_support# 570