1.file "atan2.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 02/02/00 Initial version 42// 04/04/00 Unwind support added 43// 08/15/00 Bundle added after call to __libm_error_support to properly 44// set [the previously overwritten] GR_Parameter_RESULT. 45// 08/17/00 Changed predicate register macro-usage to direct predicate 46// names due to an assembler bug. 47// 09/28/00 Updated to set invalid on SNaN inputs 48// 01/19/01 Fixed flags for small results 49// 04/13/01 Rescheduled to make all paths faster 50// 05/20/02 Cleaned up namespace and sf0 syntax 51// 08/20/02 Corrected inexact flag and directed rounding symmetry bugs 52// 02/06/03 Reordered header: .section, .global, .proc, .align 53// 04/17/03 Added missing mutex directive 54// 12/23/03 atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l 55// 56// API 57//============================================================== 58// double atan2(double Y, double X) 59// 60// Overview of operation 61//============================================================== 62// 63// The atan2 function returns values in the interval [-pi,+pi]. 64// 65// There are two basic paths: swap true and swap false. 66// atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap. 67// 68// p6 swap True |Y| > |X| 69// p7 swap False |Y| <= |X| 70// p8 X+ (If swap=True p8=p9=0) 71// p9 X- 72// 73// all the other predicates p10 thru p15 are false for the main path 74// 75// Simple trigonometric identities show 76// Region 1 (-45 to +45 degrees): 77// X>0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (0 + atan(V/U)) 78// 79// Region 2 (-90 to -45 degrees, and +45 to +90 degrees): 80// X>0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 - atan(V/U)) 81// 82// Region 3 (-135 to -90 degrees, and +90 to +135 degrees): 83// X<0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 + atan(V/U)) 84// 85// Region 4 (-180 to -135 degrees, and +135 to +180 degrees): 86// X<0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (pi - atan(V/U)) 87// 88// So the result is always of the form atan2(Y,X) = P + sgnXY * atan(V/U) 89// 90// We compute atan(V/U) from the identity 91// atan(z) + atan([(V/U)-z] / [1+(V/U)z]) 92// where z is a limited precision approximation (16 bits) to V/U 93// 94// z is calculated with the assistance of the frcpa instruction. 95// 96// atan(z) is calculated by a polynomial z + z^3 * p(w), w=z^2 97// where p(w) = P0+P1*w+...+P22*w^22 98// 99// Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z) 100// 101// Approximate atan(d) by d + P0*d^3 102// Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8. 103// Compute q(a) = 1 + a + ... + a^5. 104// Then F*q(a) approximates the reciprocal to more than 50 bits. 105 106// Special values 107//============================================================== 108// Y x Result 109// +number +inf +0 110// -number +inf -0 111// +number -inf +pi 112// -number -inf -pi 113// 114// +inf +number +pi/2 115// -inf +number -pi/2 116// +inf -number +pi/2 117// -inf -number -pi/2 118// 119// +inf +inf +pi/4 120// -inf +inf -pi/4 121// +inf -inf +3pi/4 122// -inf -inf -3pi/4 123// 124// +1 +1 +pi/4 125// -1 +1 -pi/4 126// +1 -1 +3pi/4 127// -1 -1 -3pi/4 128// 129// +number +0 +pi/2 130// -number +0 -pi/2 131// +number -0 +pi/2 132// -number -0 -pi/2 133// 134// +0 +number +0 135// -0 +number -0 136// +0 -number +pi 137// -0 -number -pi 138// 139// +0 +0 +0 140// -0 +0 -0 141// +0 -0 +pi 142// -0 -0 -pi 143// 144// Nan anything quiet Y 145// Not NaN NaN quiet X 146 147// atan2(+-0/+-0) sets double error tag to 37 148 149// Registers used 150//============================================================== 151 152// predicate registers used: 153// p6 -> p15 154 155// floating-point registers used: 156// f8, f9 input 157// f32 -> f119 158 159// general registers used 160// r32 -> r41 161 162// Assembly macros 163//============================================================== 164 165EXP_AD_P1 = r33 166EXP_AD_P2 = r34 167rsig_near_one = r35 168 169 170GR_SAVE_B0 = r35 171GR_SAVE_GP = r36 172GR_SAVE_PFS = r37 173 174GR_Parameter_X = r38 175GR_Parameter_Y = r39 176GR_Parameter_RESULT = r40 177atan2_GR_tag = r41 178 179atan2_Y = f8 180atan2_X = f9 181 182atan2_u1_X = f32 183atan2_u1_Y = f33 184atan2_z2_X = f34 185atan2_z2_Y = f35 186 187atan2_two = f36 188atan2_B1sq_Y = f37 189atan2_z1_X = f38 190atan2_z1_Y = f39 191atan2_B1X = f40 192 193atan2_B1Y = f41 194atan2_wp_X = f42 195atan2_B1sq_X = f43 196atan2_z = f44 197atan2_w = f45 198 199atan2_P0 = f46 200atan2_P1 = f47 201atan2_P2 = f48 202atan2_P3 = f49 203atan2_P4 = f50 204 205atan2_P5 = f51 206atan2_P6 = f52 207atan2_P7 = f53 208atan2_P8 = f54 209atan2_P9 = f55 210 211atan2_P10 = f56 212atan2_P11 = f57 213atan2_P12 = f58 214atan2_P13 = f59 215atan2_P14 = f60 216 217atan2_P15 = f61 218atan2_P16 = f62 219atan2_P17 = f63 220atan2_P18 = f64 221atan2_P19 = f65 222 223atan2_P20 = f66 224atan2_P21 = f67 225atan2_P22 = f68 226atan2_tmp = f68 227atan2_pi_by_2 = f69 228atan2_sgn_pi_by_2 = f69 229atan2_V13 = f70 230 231atan2_W11 = f71 232atan2_E = f72 233atan2_wp_Y = f73 234atan2_V11 = f74 235atan2_V12 = f75 236 237atan2_V7 = f76 238atan2_V8 = f77 239atan2_W7 = f78 240atan2_W8 = f79 241atan2_W3 = f80 242 243atan2_W4 = f81 244atan2_V3 = f82 245atan2_V4 = f83 246atan2_F = f84 247atan2_gV = f85 248 249atan2_V10 = f86 250atan2_zcub = f87 251atan2_V6 = f88 252atan2_V9 = f89 253atan2_W10 = f90 254 255atan2_W6 = f91 256atan2_W2 = f92 257atan2_V2 = f93 258atan2_alpha = f94 259atan2_alpha_1 = f95 260 261atan2_gVF = f96 262atan2_V5 = f97 263atan2_W12 = f98 264atan2_W5 = f99 265atan2_alpha_sq = f100 266 267atan2_Cp = f101 268atan2_V1 = f102 269atan2_ysq = f103 270atan2_W1 = f104 271atan2_alpha_cub = f105 272 273atan2_C = f106 274atan2_xsq = f107 275atan2_d = f108 276atan2_A_hi = f109 277atan2_dsq = f110 278 279atan2_pd = f111 280atan2_A_lo = f112 281atan2_A = f113 282atan2_Pp = f114 283atan2_sgnY = f115 284 285atan2_sig_near_one = f116 286atan2_near_one = f116 287atan2_pi = f117 288atan2_sgn_pi = f117 289atan2_3pi_by_4 = f118 290atan2_pi_by_4 = f119 291 292 293///////////////////////////////////////////////////////////// 294 295 296RODATA 297 298.align 16 299 300LOCAL_OBJECT_START(atan2_tb1) 301data8 0xA21922DC45605EA1 , 0x00003FFA // P11 302data8 0xB199DD6D2675C40F , 0x0000BFFA // P10 303data8 0xC2F01E5DDD100DBE , 0x00003FFA // P9 304data8 0xD78F28FC2A592781 , 0x0000BFFA // P8 305data8 0xF0F03ADB3FC930D3 , 0x00003FFA // P7 306data8 0x88887EBB209E3543 , 0x0000BFFB // P6 307data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5 308data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4 309data8 0xE38E38E320A8A098 , 0x00003FFB // P3 310data8 0x9249249247E37913 , 0x0000BFFC // P2 311data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1 312data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0 313data8 0xC90FDAA22168C235 , 0x00004000 // pi 314LOCAL_OBJECT_END(atan2_tb1) 315 316LOCAL_OBJECT_START(atan2_tb2) 317data8 0xCE585A259BD8374C , 0x00003FF0 // P21 318data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20 319data8 0x9D3436AABE218776 , 0x00003FF5 // P19 320data8 0xDEC343E068A6D2A8 , 0x0000BFF6 // P18 321data8 0xF396268151CFB11C , 0x00003FF7 // P17 322data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16 323data8 0xA2270D30A90AA220 , 0x00003FF9 // P15 324data8 0xD5F4F2182E7A8725 , 0x0000BFF9 // P14 325data8 0x80D601879218B53A , 0x00003FFA // P13 326data8 0x9297B23CCFFB291F , 0x0000BFFA // P12 327data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22 328data8 0xC90FDAA22168C235 , 0x00003FFF // pi/2 329data8 0xC90FDAA22168C235 , 0x00003FFE // pi/4 330data8 0x96cbe3f9990e91a8 , 0x00004000 // 3pi/4 331LOCAL_OBJECT_END(atan2_tb2) 332 333 334 335 336.section .text 337GLOBAL_IEEE754_ENTRY(atan2) 338 339{ .mfi 340 alloc r32 = ar.pfs,1,5,4,0 341 frcpa.s1 atan2_u1_X,p6 = f1,atan2_X 342 nop.i 999 343} 344{ .mfi 345 addl EXP_AD_P1 = @ltoff(atan2_tb1), gp 346 fma.s1 atan2_two = f1,f1,f1 347 nop.i 999 348;; 349} 350 351{ .mfi 352 ld8 EXP_AD_P1 = [EXP_AD_P1] 353 frcpa.s1 atan2_u1_Y,p7 = f1,atan2_Y 354 nop.i 999 355} 356{ .mfi 357 nop.m 999 358 fma.s1 atan2_xsq = atan2_X,atan2_X,f0 359 nop.i 999 360;; 361} 362 363{ .mfi 364 nop.m 999 365 fclass.m p10,p0 = atan2_Y, 0xc3 // Test for y=nan 366 nop.i 999 367} 368{ .mfi 369 nop.m 999 370 fma.s1 atan2_ysq = atan2_Y,atan2_Y,f0 371 nop.i 999 372} 373;; 374 375{ .mfi 376 add EXP_AD_P2 = 0xd0,EXP_AD_P1 377 fclass.m p12,p0 = atan2_X, 0xc3 // Test for x nan 378 nop.i 999 379} 380;; 381 382 383// p10 Y NAN, quiet and return 384{ .mfi 385 ldfe atan2_P11 = [EXP_AD_P1],16 386 fmerge.s atan2_sgnY = atan2_Y,f1 387 nop.i 999 388} 389{ .mfb 390 ldfe atan2_P21 = [EXP_AD_P2],16 391(p10) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If y=nan, result quietized y 392(p10) br.ret.spnt b0 // Exit if y=nan 393;; 394} 395 396 397{ .mfi 398 ldfe atan2_P10 = [EXP_AD_P1],16 399 fma.s1 atan2_z1_X = atan2_u1_X, atan2_Y, f0 400 nop.i 999 401} 402{ .mfi 403 ldfe atan2_P20 = [EXP_AD_P2],16 404 fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two 405 nop.i 999 406;; 407} 408 409{ .mfi 410 ldfe atan2_P9 = [EXP_AD_P1],16 411 fma.s1 atan2_z1_Y = atan2_u1_Y, atan2_X, f0 412 nop.i 999 413} 414{ .mfi 415 ldfe atan2_P19 = [EXP_AD_P2],16 416 fnma.s1 atan2_B1Y = atan2_u1_Y, atan2_Y, atan2_two 417 nop.i 999 418} 419;; 420 421{ .mfi 422 ldfe atan2_P8 = [EXP_AD_P1],16 423 fma.s1 atan2_z2_X = atan2_u1_X, atan2_ysq, f0 424 nop.i 999 425} 426{ .mfi 427 ldfe atan2_P18 = [EXP_AD_P2],16 428 fma.s1 atan2_z2_Y = atan2_u1_Y, atan2_xsq, f0 429 nop.i 999 430} 431;; 432 433// p10 ==> x inf y ? 434// p11 ==> x !inf y ? 435{ .mfi 436 ldfe atan2_P7 = [EXP_AD_P1],16 437 fclass.m p10,p11 = atan2_X, 0x23 // test for x inf 438 nop.i 999 439} 440{ .mfb 441 ldfe atan2_P17 = [EXP_AD_P2],16 442(p12) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If x nan, result quiet x 443(p12) br.ret.spnt b0 // Exit for x nan 444;; 445} 446 447// p6 true if swap, means |y| > |x| or ysq > xsq 448// p7 true if no swap, means |x| >= |y| or xsq >= ysq 449{ .mmf 450 ldfe atan2_P6 = [EXP_AD_P1],16 451 ldfe atan2_P16 = [EXP_AD_P2],16 452 fcmp.ge.s1 p7,p6 = atan2_xsq, atan2_ysq 453;; 454} 455 456{ .mfi 457 ldfe atan2_P5 = [EXP_AD_P1],16 458 fma.s1 atan2_wp_X = atan2_z1_X, atan2_z1_X, f0 459 nop.i 999 460} 461{ .mfi 462 ldfe atan2_P15 = [EXP_AD_P2],16 463 fma.s1 atan2_B1sq_X = atan2_B1X, atan2_B1X, f0 464 nop.i 999 465;; 466} 467 468{ .mfi 469 ldfe atan2_P4 = [EXP_AD_P1],16 470(p6) fma.s1 atan2_wp_Y = atan2_z1_Y, atan2_z1_Y, f0 471 nop.i 999 472} 473{ .mfi 474 ldfe atan2_P14 = [EXP_AD_P2],16 475(p6) fma.s1 atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0 476 nop.i 999 477;; 478} 479 480{ .mfi 481 ldfe atan2_P3 = [EXP_AD_P1],16 482(p6) fma.s1 atan2_E = atan2_z2_Y, atan2_B1Y, atan2_Y 483 nop.i 999 484} 485{ .mfi 486 ldfe atan2_P13 = [EXP_AD_P2],16 487(p7) fma.s1 atan2_E = atan2_z2_X, atan2_B1X, atan2_X 488 nop.i 999 489;; 490} 491 492 493{ .mfi 494 ldfe atan2_P2 = [EXP_AD_P1],16 495(p6) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0 496 nop.i 999 497} 498{ .mfi 499 ldfe atan2_P12 = [EXP_AD_P2],16 500(p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0 501 nop.i 999 502;; 503} 504 505 506{ .mfi 507 ldfe atan2_P1 = [EXP_AD_P1],16 508 fcmp.eq.s0 p14,p15=atan2_X,atan2_Y // Dummy for denorm and invalid 509 nop.i 999 510} 511{ .mlx 512 ldfe atan2_P22 = [EXP_AD_P2],16 513 movl rsig_near_one = 0x8000000000000001 // signif near 1.0 514;; 515} 516 517 518// p12 ==> x inf y inf 519// p13 ==> x inf y !inf 520{ .mmf 521 ldfe atan2_P0 = [EXP_AD_P1],16 522 ldfe atan2_pi_by_2 = [EXP_AD_P2],16 523(p10) fclass.m.unc p12,p13 = atan2_Y, 0x23 // x inf, test if y inf 524;; 525} 526 527{ .mfi 528 ldfe atan2_pi = [EXP_AD_P1],16 529(p6) fma.s1 atan2_w = atan2_wp_Y, atan2_B1sq_Y,f0 530 nop.i 999 531} 532{ .mfi 533 ldfe atan2_pi_by_4 = [EXP_AD_P2],16 534(p7) fma.s1 atan2_w = atan2_wp_X, atan2_B1sq_X,f0 535 nop.i 999 536;; 537} 538 539{ .mfi 540 ldfe atan2_3pi_by_4 = [EXP_AD_P2],16 541(p11) fclass.m.unc p9,p0 = atan2_Y, 0x23 // x not inf, test if y inf 542 nop.i 999 543;; 544} 545 546{ .mfi 547 setf.sig atan2_sig_near_one = rsig_near_one 548(p12) fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x inf, y inf, test if x +inf 549 nop.i 999 550} 551{ .mfi 552 nop.m 999 553(p6) fnma.s1 atan2_gV = atan2_Y, atan2_z, atan2_X 554 nop.i 999 555;; 556} 557 558{ .mfi 559 nop.m 999 560 frcpa.s1 atan2_F,p0 = f1, atan2_E 561 nop.i 999 562} 563{ .mfi 564 nop.m 999 565(p7) fnma.s1 atan2_gV = atan2_X, atan2_z, atan2_Y 566 nop.i 999 567;; 568} 569 570// p13 ==> x inf y !inf 571{ .mfi 572 nop.m 999 573(p13) fcmp.gt.unc.s1 p14,p15 = atan2_X,f0 // x inf, y !inf, test if x +inf 574 nop.i 999 575} 576{ .mfb 577 nop.m 999 578(p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // +-pi/2 if x !inf, y inf 579(p9) br.ret.spnt b0 // exit if x not inf, y inf, result is +-pi/2 580;; 581} 582 583{ .mfi 584 nop.m 999 585 fma.s1 atan2_V13 = atan2_w, atan2_P11, atan2_P10 586 nop.i 999 587} 588{ .mfi 589 nop.m 999 590 fma.s1 atan2_W11 = atan2_w, atan2_P21, atan2_P20 591 nop.i 999 592;; 593} 594 595{ .mfi 596 nop.m 999 597 fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8 598 nop.i 999 599} 600{ .mfi 601 nop.m 999 602 fma.s1 atan2_V12 = atan2_w, atan2_w, f0 603 nop.i 999 604;; 605} 606 607{ .mfi 608 nop.m 999 609 fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6 610 nop.i 999 611} 612{ .mfi 613 nop.m 999 614 fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18 615 nop.i 999 616;; 617} 618 619{ .mfi 620 nop.m 999 621 fnma.s1 atan2_alpha = atan2_E, atan2_F, f1 622 nop.i 999 623} 624{ .mfi 625 nop.m 999 626 fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two 627 nop.i 999 628;; 629} 630 631 632{ .mfi 633 nop.m 999 634 fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4 635 nop.i 999 636} 637{ .mfi 638 nop.m 999 639 fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16 640 nop.i 999 641;; 642} 643 644{ .mfi 645 nop.m 999 646 fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2 647 nop.i 999 648} 649{ .mfi 650 nop.m 999 651 fma.s1 atan2_W4 = atan2_w, atan2_P15, atan2_P14 652 nop.i 999 653;; 654} 655 656{ .mfi 657 nop.m 999 658 fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0 659 nop.i 999 660} 661{ .mfi 662 nop.m 999 663 fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12 664 nop.i 999 665;; 666} 667 668{ .mfi 669 nop.m 999 670 fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11 671 nop.i 999 672} 673{ .mfi 674 nop.m 999 675 fma.s1 atan2_gVF = atan2_gV, atan2_F, f0 676 nop.i 999 677;; 678} 679 680{ .mfi 681 nop.m 999 682 fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0 683 nop.i 999 684} 685{ .mfi 686 nop.m 999 687 fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1 688 nop.i 999 689;; 690} 691 692{ .mfi 693 nop.m 999 694 fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0 695 nop.i 999 696} 697{ .mfi 698 nop.m 999 699 fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11 700 nop.i 999 701;; 702} 703 704{ .mfi 705 nop.m 999 706 fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7 707 nop.i 999 708} 709{ .mfi 710 nop.m 999 711 fma.s1 atan2_W6 = atan2_V12, atan2_W8 , atan2_W7 712 nop.i 999 713;; 714} 715 716{ .mfi 717 nop.m 999 718 fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3 719 nop.i 999 720} 721{ .mfi 722 nop.m 999 723 fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3 724 nop.i 999 725;; 726} 727 728// p8 ==> y 0 x? 729// p9 ==> y !0 x? 730{ .mfi 731 nop.m 999 732 fclass.m p8,p9 = atan2_Y, 0x07 // Test for y=0 733 nop.i 999 734} 735{ .mfi 736 nop.m 999 737 fma.s1 atan2_zcub = atan2_z, atan2_w, f0 738 nop.i 999 739;; 740} 741 742{ .mfi 743 nop.m 999 744 fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0 745 nop.i 999 746} 747{ .mfi 748 nop.m 999 749 fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0 750 nop.i 999 751;; 752} 753 754// p12 ==> y0 x0 755// p13 ==> y0 x!0 756{ .mfi 757 nop.m 999 758(p8) fclass.m.unc p12,p13 = atan2_X, 0x07 // y=0, test if x is 0 759 nop.i 999 760} 761{ .mfi 762 nop.m 999 763 fma.s1 atan2_W12 = atan2_V9, atan2_V9, f0 764 nop.i 999 765;; 766} 767 768{ .mfi 769 nop.m 999 770 fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6 771 nop.i 999 772} 773{ .mfi 774 nop.m 999 775 fma.s1 atan2_W5 = atan2_V9, atan2_W10, atan2_W6 776 nop.i 999 777;; 778} 779 780 781// p9 ==> y!0 x0 782{ .mfi 783 nop.m 999 784(p9) fclass.m.unc p9,p0 = atan2_X, 0x07 // y not 0, test if x is 0 785 nop.i 999 786} 787// p10 ==> X +INF, Y +-INF 788{ .mfb 789 nop.m 999 790(p10) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_4, f0 // x=+inf, y=inf 791(p10) br.ret.spnt b0 // Exit for x=+inf, y=inf, result is +-pi/4 792;; 793} 794 795.pred.rel "mutex",p11,p14 796{ .mfi 797 nop.m 999 798(p14) fmerge.s f8 = atan2_sgnY, f0 // x=+inf, y !inf, result +-0 799 nop.i 999 800} 801// p11 ==> X -INF, Y +-INF 802{ .mfb 803 nop.m 999 804(p11) fma.d.s0 f8 = atan2_sgnY, atan2_3pi_by_4, f0 // x=-inf, y=inf 805(p11) br.ret.spnt b0 // Exit for x=-inf, y=inf, result is +-3pi/4 806;; 807} 808 809{ .mfi 810 nop.m 999 811(p13) fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x not 0, y=0, test if x>0 812 nop.i 999 813} 814{ .mfb 815 nop.m 999 816 fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C 817(p14) br.ret.spnt b0 // Exit if x=+inf, y !inf, result +-0 818;; 819} 820 821{ .mfi 822 nop.m 999 823 fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0 824 nop.i 999 825} 826{ .mfb 827 nop.m 999 828(p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // x=0, y not 0 829(p9) br.ret.spnt b0 // Exit if x=0 and y not 0, result is +-pi/2 830;; 831} 832 833{ .mfi 834 nop.m 999 835 fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2 836 nop.i 999 837} 838{ .mfb 839 nop.m 999 840 fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2 841(p12) br.spnt ATAN2_ERROR // Branch if x=0 and y=0 842;; 843} 844 845{ .mfi 846 nop.m 999 847(p10) fmerge.s f8 = atan2_sgnY, f0 // +-0 if x>0, y=0 848 nop.i 999 849} 850{ .mfb 851 nop.m 999 852(p11) fma.d.s0 f8 = atan2_sgnY, atan2_pi, f0 // +-pi if x<0, y=0 853(p13) br.ret.spnt b0 // Exit if x!0 and y=0 854;; 855} 856 857 858{ .mfi 859 nop.m 999 860 fma.s1 atan2_pd = atan2_P0, atan2_d, f0 861 nop.i 999 862} 863{ .mfi 864 nop.m 999 865 fma.s1 atan2_dsq = atan2_d, atan2_d, f0 866 nop.i 999 867;; 868} 869 870 871{ .mfi 872 nop.m 999 873 fmerge.se atan2_near_one = f1, atan2_sig_near_one // Const ~1.0 874 nop.i 999 875} 876{ .mfi 877 nop.m 999 878 fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1 879 nop.i 999 880;; 881} 882 883// p8 true if no swap and X positive 884// p9 true if no swap and X negative 885// both are false is swap is true 886{ .mfi 887 nop.m 999 888(p7) fcmp.ge.unc.s1 p8,p9 = atan2_X,f0 889 nop.i 999 890} 891{ .mfb 892 nop.m 999 893(p15) fma.d.s0 f8 = atan2_sgnY, atan2_pi, f0 894(p15) br.ret.spnt b0 // Exit if x=-inf, y !inf, result +-pi 895;; 896} 897 898{ .mfi 899 nop.m 999 900 fma.s1 atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0 901 nop.i 999 902} 903{ .mfi 904 nop.m 999 905 fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d 906 nop.i 999 907;; 908} 909 910 911{ .mfi 912 nop.m 999 913 fma.s1 atan2_sgn_pi = atan2_pi, atan2_sgnY, f0 914 nop.i 999 915} 916{ .mfi 917 nop.m 999 918 fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z 919 nop.i 999 920;; 921} 922 923 924// For |Y| <= |X| and X > 0, force inexact in case A_lo is zero 925{ .mfi 926 nop.m 999 927(p8) fmpy.s0 atan2_tmp = atan2_P22, atan2_P22 928 nop.i 999 929;; 930} 931 932{ .mfi 933 nop.m 999 934 fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo 935 nop.i 999 936} 937// For |Y| <= |X| and X > 0, result is A_hi + A_lo 938{ .mfi 939 nop.m 999 940(p8) fma.d.s0 f8 = atan2_A_hi, f1, atan2_A_lo 941 nop.i 999 942;; 943} 944 945.pred.rel "mutex",p6,p9 946// We perturb A by multiplying by 1.0+1ulp as we produce the result 947// in order to get symmetrically rounded results in directed rounding modes. 948// If we don't do this, there are a few cases where the trailing 11 bits of 949// the significand of the result, before converting to double, are zero. These 950// cases do not round symmetrically in round to +infinity or round to -infinity. 951// The perturbation also insures that the inexact flag is set. 952// For |Y| > |X|, result is +- pi/2 - (A_hi + A_lo) 953{ .mfi 954 nop.m 999 955(p6) fnma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi_by_2 956 nop.i 999 957} 958// For |Y| <= |X|, and X < 0, result is +- pi + (A_hi + A_lo) 959{ .mfb 960 nop.m 999 961(p9) fma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi 962 br.ret.sptk b0 963;; 964} 965 966ATAN2_ERROR: 967// Here if x=0 and y=0 968{ .mfi 969 nop.m 999 970 fclass.m p10,p11 = atan2_X,0x05 // Test if x=+0 971 nop.i 999 972} 973;; 974 975{ .mfi 976 mov atan2_GR_tag = 37 977(p10) fmerge.s f10 = atan2_sgnY, f0 // x=+0, y=0 978 nop.i 999 979} 980{ .mfi 981 nop.m 999 982(p11) fma.d.s0 f10 = atan2_sgnY, atan2_pi, f0 // x=-0, y=0 983 nop.i 999 984;; 985} 986GLOBAL_IEEE754_END(atan2) 987libm_alias_double_other (__atan2, atan2) 988 989 990LOCAL_LIBM_ENTRY(__libm_error_region) 991.prologue 992// (1) 993{ .mfi 994 add GR_Parameter_Y=-32,sp // Parameter 2 value 995 nop.f 999 996.save ar.pfs,GR_SAVE_PFS 997 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 998} 999{ .mfi 1000.fframe 64 1001 add sp=-64,sp // Create new stack 1002 nop.f 0 1003 mov GR_SAVE_GP=gp // Save gp 1004};; 1005 1006 1007// (2) 1008{ .mmi 1009 stfd [GR_Parameter_Y] = f8,16 // STORE Parameter 2 on stack 1010 add GR_Parameter_X = 16,sp // Parameter 1 address 1011.save b0, GR_SAVE_B0 1012 mov GR_SAVE_B0=b0 // Save b0 1013};; 1014 1015.body 1016// (3) 1017{ .mib 1018 stfd [GR_Parameter_X] = f9 // STORE Parameter 1 on stack 1019 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 1020 nop.b 0 1021} 1022{ .mib 1023 stfd [GR_Parameter_Y] = f10 // STORE Parameter 3 on stack 1024 add GR_Parameter_Y = -16,GR_Parameter_Y 1025 br.call.sptk b0=__libm_error_support# // Call error handling function 1026};; 1027{ .mmi 1028 add GR_Parameter_RESULT = 48,sp 1029 nop.m 0 1030 nop.i 0 1031};; 1032 1033// (4) 1034{ .mmi 1035 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack 1036.restore sp 1037 add sp = 64,sp // Restore stack pointer 1038 mov b0 = GR_SAVE_B0 // Restore return address 1039};; 1040{ .mib 1041 mov gp = GR_SAVE_GP // Restore gp 1042 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 1043 br.ret.sptk b0 // Return 1044};; 1045 1046LOCAL_LIBM_END(__libm_error_region) 1047 1048.type __libm_error_support#,@function 1049.global __libm_error_support# 1050