1.file "atanf.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38 39 40// History 41//============================================================== 42// 02/20/00 Initial version 43// 08/17/00 Changed predicate register macro-usage to direct predicate 44// names due to an assembler bug. 45// 02/06/02 Corrected .section statement 46// 05/20/02 Cleaned up namespace and sf0 syntax 47// 02/06/03 Reordered header: .section, .global, .proc, .align; 48// added missing bundling 49 50// 51// Assembly macros 52//============================================================== 53 54// integer registers used 55EXP_Addr1 = r33 56EXP_Addr2 = r34 57 58// floating point registers used 59atanf_coeff_R4 = f32 60atanf_coeff_R5 = f33 61atanf_coeff_R1 = f34 62atanf_coeff_R2 = f35 63 64atanf_coeff_R3 = f36 65atanf_coeff_P1 = f37 66atanf_coeff_Q6 = f38 67atanf_coeff_Q7 = f39 68atanf_coeff_Q8 = f40 69 70atanf_coeff_Q9 = f41 71atanf_coeff_Q4 = f42 72atanf_coeff_Q5 = f43 73atanf_coeff_Q2 = f44 74atanf_coeff_Q3 = f45 75 76atanf_coeff_P5 = f46 77atanf_coeff_P6 = f47 78atanf_coeff_Q0 = f48 79atanf_coeff_Q1 = f49 80atanf_coeff_P7 = f50 81 82atanf_coeff_P8 = f51 83atanf_coeff_P3 = f52 84atanf_coeff_P4 = f53 85atanf_coeff_P9 = f54 86atanf_coeff_P10 = f55 87 88atanf_coeff_P2 = f56 89atanf_piby2 = f57 90atanf_z = f58 91atanf_b = f59 92atanf_zsq = f60 93 94atanf_sgn_x = f61 95atanf_sgnx_piby2 = f62 96atanf_abs_x = f63 97atanf_t = f64 98atanf_xcub = f65 99 100atanf_tsq = f66 101atanf_t4 = f67 102atanf_x5 = f68 103atanf_x6 = f69 104atanf_x11 = f70 105 106atanf_poly_p1 = f71 107atanf_poly_p2 = f72 108atanf_poly_p3 = f73 109atanf_poly_p4 = f74 110atanf_poly_p5 = f75 111 112atanf_poly_q1 = f76 113atanf_poly_q2 = f77 114atanf_poly_q3 = f78 115atanf_poly_q4 = f79 116atanf_poly_q5 = f80 117 118atanf_poly_q = f81 119atanf_poly_r1 = f81 120atanf_poly_r2 = f82 121atanf_poly_r3 = f83 122atanf_bsq = f84 123atanf_z4 = f85 124 125atanf_z5 = f86 126atanf_z8 = f87 127atanf_z13 = f88 128atanf_poly_r2 = f89 129atanf_poly_r1 = f90 130 131atanf_z8_bsq = f91 132atanf_poly_r = f92 133atanf_z21_poly_r = f93 134atanf_answer = f8 135 136 137// predicate registers used 138//atanf_pred_LE1 = p6 139//atanf_pred_GT1 = p7 140 141 142RODATA 143 144.align 16 145 146LOCAL_OBJECT_START(atanf_coeff_1_table) 147data8 0x40c4c241be751ff2 // r4 148data8 0x40e9f300c2f3070b // r5 149data8 0x409babffef772075 // r3 150data8 0xbfd5555512191621 // p1 151data8 0x3fc9997e7afbff4e // p2 = q8 152data8 0xbfd5555512191621 // p1 = q9 153data8 0x3f97105b4160f86b // p8 = q2 154data8 0xbfa6e10ba401393f // p7 = q3 155data8 0x3f522e5d33bc9baa // p10 = q0 156data8 0xbf7deaadaa336451 // p9 = q1 157data8 0xbfc2473c5145ee38 // p3 158data8 0x3fbc4f512b1865f5 // p4 159data8 0x3fc9997e7afbff4e // p2 160data8 0x3ff921fb54442d18 // pi/2 161LOCAL_OBJECT_END(atanf_coeff_1_table) 162 163 164 165LOCAL_OBJECT_START(atanf_coeff_2_table) 166data8 0x4035000000004284 // r1 167data8 0x406cdffff336a59b // r2 168data8 0x3fbc4f512b1865f5 // p4 = q6 169data8 0xbfc2473c5145ee38 // p3 = q7 170data8 0x3fb142a73d7c54e3 // p6 = q4 171data8 0xbfb68eed6a8cfa32 // p5 = q5 172data8 0xbfb68eed6a8cfa32 // p5 173data8 0x3fb142a73d7c54e3 // p6 174data8 0xbfa6e10ba401393f // p7 175data8 0x3f97105b4160f86b // p8 176data8 0xbf7deaadaa336451 // p9 177data8 0x3f522e5d33bc9baa // p10 178LOCAL_OBJECT_END(atanf_coeff_2_table) 179 180 181 182.section .text 183GLOBAL_LIBM_ENTRY(atanf) 184 185{ .mfi 186 alloc r32 = ar.pfs,1,2,0,0 187 frcpa.s1 atanf_z,p0 = f1,f8 188 addl EXP_Addr2 = @ltoff(atanf_coeff_2_table),gp 189} 190{ .mfi 191 addl EXP_Addr1 = @ltoff(atanf_coeff_1_table),gp 192 fma.s1 atanf_t = f8,f8,f0 193 nop.i 999;; 194} 195 196 197{ .mfi 198 nop.m 999 199 fmerge.s atanf_sgn_x = f8,f1 200 nop.i 999;; 201} 202 203{ .mfi 204 ld8 EXP_Addr1 = [EXP_Addr1] 205 fmerge.s atanf_abs_x = f1,f8 206 nop.i 999 207} 208{ .mfi 209 ld8 EXP_Addr2 = [EXP_Addr2] 210 nop.f 999 211 nop.i 999;; 212} 213 214 215{ .mfi 216 nop.m 999 217 fclass.m p8,p0 = f8,0x7 // @zero 218 nop.i 999;; 219} 220 221{ .mfi 222 nop.m 999 223 fcmp.eq.unc.s0 p9,p10 = f8,f1 224 nop.i 999;; 225} 226 227{ .mfi 228 ldfpd atanf_coeff_R4,atanf_coeff_R5 = [EXP_Addr1],16 229 fnma.s1 atanf_b = f8,atanf_z,f1 230 nop.i 999 231} 232{ .mfi 233 ldfpd atanf_coeff_R1,atanf_coeff_R2 = [EXP_Addr2],16 234 fma.s1 atanf_zsq = atanf_z,atanf_z,f0 235 nop.i 999;; 236} 237 238 239{ .mfi 240 ldfpd atanf_coeff_R3,atanf_coeff_P1 = [EXP_Addr1],16 241 fma.s1 atanf_xcub = f8,atanf_t,f0 242 nop.i 999 243} 244{ .mfi 245 ldfpd atanf_coeff_Q6,atanf_coeff_Q7 = [EXP_Addr2],16 246 fma.s1 atanf_tsq = atanf_t,atanf_t,f0 247 nop.i 999;; 248} 249 250 251{ .mfi 252 ldfpd atanf_coeff_Q8,atanf_coeff_Q9 = [EXP_Addr1],16 253// fcmp.le.s1 atanf_pred_LE1,atanf_pred_GT1 = atanf_abs_x,f1 254 fcmp.le.s1 p6,p7 = atanf_abs_x,f1 255 nop.i 999 256} 257{ .mfi 258 ldfpd atanf_coeff_Q4,atanf_coeff_Q5 = [EXP_Addr2],16 259 nop.f 999 260 nop.i 999;; 261} 262 263 264{ .mfi 265 ldfpd atanf_coeff_Q2,atanf_coeff_Q3 = [EXP_Addr1],16 266 fclass.m p8,p0 = f8,0xe7 // @inf|@qnan|@snan|@zero 267 nop.i 999 268} 269{ .mfi 270 ldfpd atanf_coeff_P5,atanf_coeff_P6 = [EXP_Addr2],16 271 nop.f 999 272 nop.i 999;; 273} 274 275 276{ .mfi 277 ldfpd atanf_coeff_Q0,atanf_coeff_Q1 = [EXP_Addr1],16 278 nop.f 999 279 nop.i 999 280} 281{ .mfi 282 ldfpd atanf_coeff_P7,atanf_coeff_P8 = [EXP_Addr2],16 283 nop.f 999 284 nop.i 999;; 285} 286 287 288{ .mfi 289 ldfpd atanf_coeff_P3,atanf_coeff_P4 = [EXP_Addr1],16 290 fma.s1 atanf_bsq = atanf_b,atanf_b,f0 291 nop.i 999 292} 293{ .mfi 294 ldfpd atanf_coeff_P9,atanf_coeff_P10 = [EXP_Addr2] 295 fma.s1 atanf_z4 = atanf_zsq,atanf_zsq,f0 296 nop.i 999;; 297} 298 299 300{ .mfi 301 ldfpd atanf_coeff_P2,atanf_piby2 = [EXP_Addr1] 302 fma.s1 atanf_x6 = atanf_t,atanf_tsq,f0 303 nop.i 999 304} 305{ .mfi 306 nop.m 999 307 fma.s1 atanf_t4 = atanf_tsq,atanf_tsq,f0 308 nop.i 999;; 309} 310 311 312{ .mfb 313 nop.m 999 314 fma.s1 atanf_x5 = atanf_t,atanf_xcub,f0 315(p8) br.cond.spnt ATANF_X_INF_NAN_ZERO 316} 317;; 318 319{ .mfi 320 nop.m 999 321 fma.s1 atanf_poly_r1 = atanf_b,atanf_coeff_R1,f1 322 nop.i 999 323} 324{ .mfi 325 nop.m 999 326 fma.s1 atanf_poly_r3 = atanf_b,atanf_coeff_R5,atanf_coeff_R4 327 nop.i 999;; 328} 329 330 331{ .mfi 332 nop.m 999 333 fma.s1 atanf_poly_r2 = atanf_b,atanf_coeff_R3,atanf_coeff_R2 334 nop.i 999 335} 336{ .mfi 337 nop.m 999 338 fma.s1 atanf_z8 = atanf_z4,atanf_z4,f0 339 nop.i 999;; 340} 341 342 343{ .mfi 344 nop.m 999 345 fma.s1 atanf_poly_q2 = atanf_t,atanf_coeff_Q5,atanf_coeff_Q4 346 nop.i 999 347} 348{ .mfi 349 nop.m 999 350 fma.s1 atanf_poly_q3 = atanf_t,atanf_coeff_Q7,atanf_coeff_Q6 351 nop.i 999;; 352} 353 354 355{ .mfi 356 nop.m 999 357 fma.s1 atanf_z5 = atanf_z,atanf_z4,f0 358 nop.i 999 359} 360{ .mfi 361 nop.m 999 362 fma.s1 atanf_poly_q1 = atanf_t,atanf_coeff_Q9,atanf_coeff_Q8 363 nop.i 999;; 364} 365 366 367{ .mfi 368 nop.m 999 369 fma.s1 atanf_poly_q4 = atanf_t,atanf_coeff_Q1,atanf_coeff_Q0 370 nop.i 999 371} 372{ .mfi 373 nop.m 999 374 fma.s1 atanf_poly_q5 = atanf_t,atanf_coeff_Q3,atanf_coeff_Q2 375 nop.i 999;; 376} 377 378 379{ .mfi 380 nop.m 999 381 fma.s1 atanf_poly_p4 = f8,atanf_coeff_P1,f0 382 nop.i 999 383} 384{ .mfi 385 nop.m 999 386 fma.s1 atanf_poly_p5 = atanf_t,atanf_coeff_P4,atanf_coeff_P3 387 nop.i 999;; 388} 389 390 391{ .mfi 392 nop.m 999 393 fma.s1 atanf_poly_r1 = atanf_z8,atanf_poly_r1,f0 394 nop.i 999 395} 396{ .mfi 397 nop.m 999 398 fma.s1 atanf_z8_bsq = atanf_z8,atanf_bsq,f0 399 nop.i 999;; 400} 401 402 403{ .mfi 404 nop.m 999 405 fma.s1 atanf_poly_q2 = atanf_tsq,atanf_poly_q3,atanf_poly_q2 406 nop.i 999 407} 408{ .mfi 409 nop.m 999 410 fma.s1 atanf_poly_r2 = atanf_bsq,atanf_poly_r3,atanf_poly_r2 411 nop.i 999;; 412} 413 414 415{ .mfi 416 nop.m 999 417 fma.s1 atanf_poly_p2 = atanf_t,atanf_coeff_P8,atanf_coeff_P7 418 nop.i 999 419} 420{ .mfi 421 nop.m 999 422 fma.s1 atanf_poly_q1 = atanf_poly_q1,f1,atanf_tsq 423 nop.i 999;; 424} 425 426 427{ .mfi 428 nop.m 999 429 fma.s1 atanf_z13 = atanf_z5,atanf_z8,f0 430 nop.i 999 431} 432{ .mfi 433 nop.m 999 434 fma.s1 atanf_poly_p1 = atanf_t,atanf_coeff_P10,atanf_coeff_P9 435 nop.i 999;; 436} 437 438 439{ .mfi 440 nop.m 999 441 fma.s1 atanf_poly_p4 = atanf_t,atanf_poly_p4,f8 442 nop.i 999 443} 444{ .mfi 445 nop.m 999 446 fma.s1 atanf_poly_q4 = atanf_tsq,atanf_poly_q5,atanf_poly_q4 447 nop.i 999;; 448} 449 450 451{ .mfi 452 nop.m 999 453 fma.s1 atanf_poly_p3 = atanf_t,atanf_coeff_P6,atanf_coeff_P5 454 nop.i 999 455} 456{ .mfi 457 nop.m 999 458 fma.s1 atanf_poly_p5 = atanf_t,atanf_poly_p5,atanf_coeff_P2 459 nop.i 999;; 460} 461 462 463{ .mfi 464 nop.m 999 465 fma.s1 atanf_x11 = atanf_x5,atanf_x6,f0 466 nop.i 999 467} 468{ .mfi 469 nop.m 999 470 fma.s1 atanf_poly_r = atanf_z8_bsq,atanf_poly_r2,atanf_poly_r1 471 nop.i 999;; 472} 473 474 475{ .mfi 476 nop.m 999 477 fma.s0 atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0 478 nop.i 999 479} 480{ .mfi 481 nop.m 999 482 fma.s1 atanf_poly_q2 = atanf_t4,atanf_poly_q1,atanf_poly_q2 483 nop.i 999;; 484} 485 486 487{ .mfi 488 nop.m 999 489 fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p2 490 nop.i 999;; 491} 492 493{ .mfi 494 nop.m 999 495 fma.s1 atanf_poly_p4 = atanf_x5,atanf_poly_p5,atanf_poly_p4 496 nop.i 999;; 497} 498 499{ .mfi 500 nop.m 999 501 fma.s1 atanf_z21_poly_r = atanf_z13,atanf_poly_r,f0 502 nop.i 999;; 503} 504 505{ .mfi 506 nop.m 999 507 fma.s1 atanf_poly_q = atanf_t4,atanf_poly_q2,atanf_poly_q4 508 nop.i 999;; 509} 510 511{ .mfi 512 nop.m 999 513 fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p3 514 nop.i 999;; 515} 516 517{ .mfi 518 nop.m 999 519//(atanf_pred_GT1) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2 520(p7) fnma.s.s0 atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2 521 nop.i 999;; 522} 523 524{ .mfb 525 nop.m 999 526//(atanf_pred_LE1) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4 527(p6) fma.s.s0 atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4 528 br.ret.sptk b0 529} 530 531 532 533ATANF_X_INF_NAN_ZERO: 534 535{ .mfi 536 nop.m 0 537 fclass.m p8,p9 = f8,0x23 // @inf 538 nop.i 0 539} 540;; 541{ .mfi 542 nop.m 0 543(p8) fmerge.s f8 = f8, atanf_piby2 544 nop.i 0 545} 546;; 547{ .mfb 548 nop.m 0 549 fnorm.s.s0 f8 = f8 550 br.ret.sptk b0 551} 552;; 553 554GLOBAL_LIBM_END(atanf) 555libm_alias_float_other (atan, atan) 556