1.file "atan2f.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38 39// History 40//============================================================== 41// 06/01/00 Initial version 42// 08/15/00 Bundle added after call to __libm_error_support to properly 43// set [the previously overwritten] GR_Parameter_RESULT. 44// 08/17/00 Changed predicate register macro-usage to direct predicate 45// names due to an assembler bug. 46// 01/05/01 Fixed flag settings for denormal input. 47// 01/19/01 Added documentation 48// 01/30/01 Improved speed 49// 02/06/02 Corrected .section statement 50// 05/20/02 Cleaned up namespace and sf0 syntax 51// 02/06/03 Reordered header: .section, .global, .proc, .align 52 53// Description 54//========================================= 55// The atan2 function computes the principle value of the arc tangent of y/x using 56// the signs of both arguments to determine the quadrant of the return value. 57// A domain error may occur if both arguments are zero. 58 59// The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians. 60 61//.. 62//..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that 63//..v and u can be negative. We state the relationship between atan2(y,x) and 64//..atan(v/u). 65//.. 66//..Let swap = false if v = y, and swap = true if v = x. 67//..Define C according to the matrix 68//.. 69//.. TABLE FOR C 70//.. x +ve x -ve 71//.. no swap (swap = false) sgn(y)*0 sgn(y)*pi 72//.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2 73//.. 74//.. atan2(y,x) = C + atan(v/u) if no swap 75//.. atan2(y,x) = C - atan(v/u) if swap 76//.. 77//..These relationship is more efficient to compute as we accommodate signs in v and u 78//..saving the need to obtain the absolute value before computation can proceed. 79//.. 80//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows: 81//..A = y * frcpa(x) (so A = (y/x)(1 - beta)) 82//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is 83//..a correction. 84//..atan(A) is approximated by a polynomial 85//..A + p1 A^3 + p2 A^5 + ... + p10 A^21, 86//..atan(G) is approximated as follows: 87//..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1 88//..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay). 89//.. 90//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows: 91//..Z = x * frcpa(y) (so Z = (x/y)(1 - beta)) 92//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is 93//..a correction. 94//..atan(Z) is approximated by a polynomial 95//..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21, 96//..atan(T) is approximated as follows: 97//..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1 98//..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax). 99//.. 100//.. 101//..A = y * frcpa(x) 102//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21 103//.. 104//..This polynomial is computed as follows: 105//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq 106//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6 107//.. 108//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6 109//..poly_A1 = poly_A2 + A4 * poly_A1 110//..poly_A1 = poly_A3 + A4 * poly_A1 111//.. 112//..poly_A4 = p1 * A 113//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4 114//..poly_A5 = p2 + Asq * poly_A5 115//..poly_A4 = poly_A4 + A5 * poly_A5 116//.. 117//..atan_A = poly_A4 + A11 * poly_A1 118//.. 119//..atan(G) is approximated as follows: 120//..G_numer = y - A*x, G_denom = x + A*y 121//..H1 = frcpa(G_denom) 122//..H_beta = 1 - H1 * G_denom 123//..H2 = H1 + H1 * H_beta 124//..H_beta2 = H_beta*H_beta 125//..H3 = H2 + H2*H_beta2 126//..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq 127//..atan_G = G_numer*H3 + atan_G 128//.. 129//.. 130//..A = y * frcpa(x) 131//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21 132//.. 133//..This polynomial is computed as follows: 134//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq 135//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6 136//.. 137//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6 138//..poly_A1 = poly_A2 + A4 * poly_A1 139//..poly_A1 = poly_A3 + A4 * poly_A1 140//.. 141//..poly_A4 = p1 * A 142//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4 143//..poly_A5 = p2 + Asq * poly_A5 144//..poly_A4 = poly_A4 + A5 * poly_A5 145//.. 146//..atan_A = poly_A4 + A11 * poly_A1 147//.. 148//.. 149//..==================================================================== 150//.. COEFFICIENTS USED IN THE COMPUTATION 151//..==================================================================== 152 153//coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21 154// 155// coef_p1 = -.3333332707155439167401311806315789E+00 156// coef_p1 in dbl = BFD5 5555 1219 1621 157// 158// coef_p2 = .1999967670926658391827857030875748E+00 159// coef_p2 in dbl = 3FC9 997E 7AFB FF4E 160// 161// coef_p3 = -.1427989384500152360161563301087296E+00 162// coef_p3 in dbl = BFC2 473C 5145 EE38 163// 164// coef_p4 = .1105852823460720770079031213661163E+00 165// coef_p4 in dbl = 3FBC 4F51 2B18 65F5 166// 167// coef_p5 = -.8811839915595312348625710228448363E-01 168// coef_p5 in dbl = BFB6 8EED 6A8C FA32 169// 170// coef_p6 = .6742329836955067042153645159059714E-01 171// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3 172// 173// coef_p7 = -.4468571068774672908561591262231909E-01 174// coef_p7 in dbl = BFA6 E10B A401 393F 175// 176// coef_p8 = .2252333246746511135532726960586493E-01 177// coef_p8 in dbl = 3F97 105B 4160 F86B 178// 179// coef_p9 = -.7303884867007574742501716845542314E-02 180// coef_p9 in dbl = BF7D EAAD AA33 6451 181// 182// coef_p10 = .1109686868355312093949039454619058E-02 183// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA 184// 185 186// Special values 187//============================================================== 188// Y x Result 189// +number +inf +0 190// -number +inf -0 191// +number -inf +pi 192// -number -inf -pi 193// 194// +inf +number +pi/2 195// -inf +number -pi/2 196// +inf -number +pi/2 197// -inf -number -pi/2 198// 199// +inf +inf +pi/4 200// -inf +inf -pi/4 201// +inf -inf +3pi/4 202// -inf -inf -3pi/4 203// 204// +1 +1 +pi/4 205// -1 +1 -pi/4 206// +1 -1 +3pi/4 207// -1 -1 -3pi/4 208// 209// +number +0 +pi/2 // does not raise DBZ 210// -number +0 -pi/2 // does not raise DBZ 211// +number -0 +pi/2 // does not raise DBZ 212// -number -0 -pi/2 // does not raise DBZ 213// 214// +0 +number +0 215// -0 +number -0 216// +0 -number +pi 217// -0 -number -pi 218// 219// +0 +0 +0 // does not raise invalid 220// -0 +0 -0 // does not raise invalid 221// +0 -0 +pi // does not raise invalid 222// -0 -0 -pi // does not raise invalid 223// 224// Nan anything quiet Y 225// anything NaN quiet X 226 227// atan2(+-0/+-0) sets double error tag to 37 228// atan2f(+-0/+-0) sets single error tag to 38 229// These are domain errors. 230 231 232// 233// Assembly macros 234//========================================= 235 236 237// integer registers 238atan2f_GR_Addr_1 = r33 239atan2f_GR_Addr_2 = r34 240GR_SAVE_B0 = r35 241 242GR_SAVE_PFS = r36 243GR_SAVE_GP = r37 244 245GR_Parameter_X = r38 246GR_Parameter_Y = r39 247GR_Parameter_RESULT = r40 248GR_Parameter_TAG = r41 249 250// floating point registers 251atan2f_coef_p1 = f32 252atan2f_coef_p10 = f33 253atan2f_coef_p7 = f34 254atan2f_coef_p6 = f35 255 256atan2f_coef_p3 = f36 257atan2f_coef_p2 = f37 258atan2f_coef_p9 = f38 259atan2f_coef_p8 = f39 260atan2f_coef_p5 = f40 261 262atan2f_coef_p4 = f41 263atan2f_const_piby2 = f42 264atan2f_const_pi = f43 265atan2f_const_piby4 = f44 266atan2f_const_3piby4 = f45 267 268atan2f_xsq = f46 269atan2f_ysq = f47 270atan2f_xy = f48 271atan2f_const_1 = f49 272atan2f_sgn_Y = f50 273 274atan2f_Z0 = f51 275atan2f_A0 = f52 276atan2f_Z = f53 277atan2f_A = f54 278atan2f_C = f55 279 280atan2f_U = f56 281atan2f_Usq = f57 282atan2f_U4 = f58 283atan2f_U6 = f59 284atan2f_U8 = f60 285 286atan2f_poly_u109 = f61 287atan2f_poly_u87 = f62 288atan2f_poly_u65 = f63 289atan2f_poly_u43 = f64 290atan2f_poly_u21 = f65 291 292atan2f_poly_u10to7 = f66 293atan2f_poly_u6to3 = f67 294atan2f_poly_u10to3 = f68 295atan2f_poly_u10to0 = f69 296atan2f_poly_u210 = f70 297 298atan2f_T_numer = f71 299atan2f_T_denom = f72 300atan2f_G_numer = f73 301atan2f_G_denom = f74 302atan2f_p1rnum = f75 303 304atan2f_R_denom = f76 305atan2f_R_numer = f77 306atan2f_pR = f78 307atan2f_pRC = f79 308atan2f_pQRC = f80 309 310atan2f_Q1 = f81 311atan2f_Q_beta = f82 312atan2f_Q2 = f83 313atan2f_Q_beta2 = f84 314atan2f_Q3 = f85 315 316atan2f_r = f86 317atan2f_rsq = f87 318atan2f_poly_atan_U = f88 319 320 321// predicate registers 322//atan2f_Pred_Swap = p6 // |y| > |x| 323//atan2f_Pred_noSwap = p7 // |y| <= |x| 324//atan2f_Pred_Xpos = p8 // x >= 0 325//atan2f_Pred_Xneg = p9 // x < 0 326 327 328RODATA 329 330.align 16 331 332LOCAL_OBJECT_START(atan2f_coef_table1) 333data8 0xBFD5555512191621 // p1 334data8 0x3F522E5D33BC9BAA // p10 335data8 0xBFA6E10BA401393F // p7 336data8 0x3FB142A73D7C54E3 // p6 337data8 0xBFC2473C5145EE38 // p3 338data8 0x3FC9997E7AFBFF4E // p2 339LOCAL_OBJECT_END(atan2f_coef_table1) 340 341LOCAL_OBJECT_START(atan2f_coef_table2) 342data8 0xBF7DEAADAA336451 // p9 343data8 0x3F97105B4160F86B // p8 344data8 0xBFB68EED6A8CFA32 // p5 345data8 0x3FBC4F512B1865F5 // p4 346data8 0x3ff921fb54442d18 // pi/2 347data8 0x400921fb54442d18 // pi 348data8 0x3fe921fb54442d18 // pi/4 349data8 0x4002d97c7f3321d2 // 3pi/4 350LOCAL_OBJECT_END(atan2f_coef_table2) 351 352 353 354.section .text 355GLOBAL_IEEE754_ENTRY(atan2f) 356 357{ .mfi 358 alloc r32 = ar.pfs,1,5,4,0 359 frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y 360 nop.i 999 361} 362{ .mfi 363 addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp 364 fma.s1 atan2f_xsq = f9,f9,f0 365 nop.i 999 ;; 366} 367 368 369{ .mfi 370 ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1] 371 frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x 372 nop.i 999 373} 374{ .mfi 375 nop.m 999 376 fma.s1 atan2f_ysq = f8,f8,f0 377 nop.i 999 ;; 378} 379 380{ .mfi 381 nop.m 999 382 fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0 383 nop.i 999 384} 385{ .mfi 386 nop.m 999 387 fma.s1 atan2f_xy = f9,f8,f0 388 nop.i 999 ;; 389} 390 391 392{ .mfi 393 add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1 394 fmerge.s atan2f_sgn_Y = f8,f1 395 nop.i 999 ;; 396} 397 398{ .mmf 399 ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16 400 ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16 401 fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero 402} 403;; 404 405{ .mfi 406 ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16 407 fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8 408 nop.i 999 409} 410{ .mfi 411 ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16 412 fma.s1 atan2f_Z = atan2f_Z0,f9,f0 413 nop.i 999 ;; 414} 415 416 417{ .mfi 418 ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16 419 fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9 420 nop.i 999 421} 422{ .mfi 423 ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16 424 fma.s1 atan2f_A = atan2f_A0,f8,f0 425 nop.i 999 ;; 426} 427 428{ .mfi 429 ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2] 430 fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero 431 nop.i 999 432} 433{ .mfb 434 nop.m 999 435 fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9 436(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero 437} 438 439 440// p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test 441{ .mfi 442 nop.m 999 443 fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq 444 nop.i 999 445} 446{ .mfb 447 nop.m 999 448 fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8 449(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero 450} 451 452 453{ .mfi 454 nop.m 999 455(p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0 456 nop.i 999 457} 458{ .mfi 459 nop.m 999 460(p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0 461 nop.i 999 ;; 462} 463 464 465{ .mfi 466 nop.m 999 467(p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0 468 nop.i 999 469} 470{ .mfi 471 nop.m 999 472(p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0 473 nop.i 999 ;; 474} 475 476 477{ .mfi 478 nop.m 999 479(p7) fma.s1 atan2f_U = atan2f_A,f1,f0 480 nop.i 999 481} 482{ .mfi 483 nop.m 999 484(p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0 485 nop.i 999 ;; 486} 487 488 489{ .mfi 490 nop.m 999 491(p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom 492 nop.i 999 493} 494{ .mfi 495 nop.m 999 496(p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0 497 nop.i 999 ;; 498} 499 500 501{ .mfi 502 nop.m 999 503(p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom 504 nop.i 999 505} 506{ .mfi 507 nop.m 999 508(p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0 509 nop.i 999 ;; 510} 511 512 513{ .mfi 514 nop.m 999 515(p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0 516 nop.i 999 517} 518{ .mfi 519 nop.m 999 520(p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0 521 nop.i 999 ;; 522} 523 524 525{ .mfi 526 nop.m 999 527(p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0 528 nop.i 999 ;; 529} 530{ .mfi 531 nop.m 999 532(p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0 533 nop.i 999 ;; 534} 535 536 537{ .mfi 538 nop.m 999 539 fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0 540 nop.i 999 541} 542{ .mfi 543 nop.m 999 544 fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9 545 nop.i 999 ;; 546} 547 548{ .mfi 549 nop.m 999 550 fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7 551 nop.i 999 552} 553{ .mfi 554 nop.m 999 555 fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5 556 nop.i 999 ;; 557} 558 559 560{ .mfi 561 nop.m 999 562 fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3 563 nop.i 999 564} 565{ .mfi 566 nop.m 999 567 fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1 568 nop.i 999 ;; 569} 570 571 572{ .mfi 573 nop.m 999 574 fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1 575 nop.i 999 576} 577{ .mfi 578 nop.m 999 579 fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0 580 nop.i 999 ;; 581} 582 583{ .mfi 584 nop.m 999 585(p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0 586 nop.i 999 587} 588{ .mfi 589 nop.m 999 590(p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0 591 nop.i 999 ;; 592} 593 594{ .mfi 595 nop.m 999 596 fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0 597 nop.i 999 598} 599{ .mfi 600 nop.m 999 601 fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0 602 nop.i 999 ;; 603} 604 605{ .mfi 606 nop.m 999 607 fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87 608 nop.i 999 609} 610{ .mfi 611 nop.m 999 612 fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0 613 nop.i 999 ;; 614} 615 616{ .mfi 617 nop.m 999 618 fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43 619 nop.i 999 620} 621{ .mfi 622 nop.m 999 623 fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1 624 nop.i 999 ;; 625} 626 627{ .mfi 628 nop.m 999 629 fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0 630 nop.i 999 631} 632{ .mfi 633 nop.m 999 634 fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0 635 nop.i 999 ;; 636} 637 638{ .mfi 639 nop.m 999 640 fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1 641 nop.i 999 ;; 642} 643 644{ .mfi 645 nop.m 999 646 fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs 647 nop.i 999 648} 649{ .mfi 650 nop.m 999 651 fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3 652 nop.i 999 ;; 653} 654 655{ .mfi 656 nop.m 999 657 fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2 658 nop.i 999 659} 660{ .mfi 661 nop.m 999 662 fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C 663 nop.i 999 ;; 664} 665 666{ .mfi 667 nop.m 999 668 fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210 669 nop.i 999 ;; 670} 671 672{ .mfi 673 nop.m 999 674 fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC 675 nop.i 999 ;; 676} 677 678{ .mfb 679 nop.m 999 680 fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC 681 br.ret.sptk b0 ;; 682} 683 684 685 686ATAN2F_XY_INF_NAN_ZERO: 687 688{ .mfi 689 nop.m 999 690 fclass.m p10,p0 = f8,0xc3 // Is y nan 691 nop.i 999 692} 693;; 694 695{ .mfi 696 nop.m 999 697 fclass.m p12,p0 = f9,0xc3 // Is x nan 698 nop.i 999 699} 700;; 701 702{ .mfi 703 nop.m 999 704 fclass.m p6,p0 = f9,0x21 // Is x +inf 705 nop.i 999 706} 707{ .mfb 708 nop.m 999 709(p10) fma.s.s0 f8 = f9,f8,f0 // Result quietized y if y is nan 710(p10) br.ret.spnt b0 // Exit if y is nan 711} 712;; 713 714 715{ .mfi 716 nop.m 999 717(p6) fclass.m.unc p7,p8 = f8,0x23 // x +inf, is y inf 718 nop.i 999 719} 720{ .mfb 721 nop.m 999 722(p12) fnorm.s.s0 f8 = f9 // Result quietized x if x is nan, y not nan 723(p12) br.ret.spnt b0 // Exit if x is nan, y not nan 724} 725;; 726 727// Here if x or y inf, or x or y zero 728{ .mfi 729 nop.m 999 730 fcmp.eq.s0 p15,p0 = f8,f9 // Dummy op to set flag on denormal inputs 731 nop.i 999 732} 733;; 734 735{ .mfi 736 nop.m 999 737 fclass.m p11,p12 = f9,0x22 // Is x -inf 738 nop.i 999 739} 740{ .mfb 741 nop.m 999 742(p7) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4 743(p7) br.ret.spnt b0 // Exit if x +inf and y inf 744} 745;; 746 747{ .mfb 748 nop.m 999 749(p8) fmerge.s f8 = f8,f0 // If x +inf and y not inf, result +-0 750(p8) br.ret.spnt b0 // Exit if x +inf and y not inf 751} 752;; 753 754{ .mfi 755 nop.m 999 756(p12) fclass.m.unc p13,p0 = f8,0x23 // x not -inf, is y inf 757 nop.i 999 758} 759;; 760 761{ .mfi 762 nop.m 999 763(p11) fclass.m.unc p14,p15 = f8,0x23 // x -inf, is y inf 764 nop.i 999 765} 766;; 767 768{ .mfi 769 nop.m 999 770 fclass.m p6,p7 = f9,0x7 // Is x zero 771 nop.i 999 772} 773{ .mfb 774 nop.m 999 775(p13) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2 776(p13) br.ret.spnt b0 // Exit if x not -inf and y inf 777} 778;; 779 780{ .mfi 781 nop.m 999 782(p14) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4 783 nop.i 999 784} 785{ .mfb 786 nop.m 999 787(p15) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi 788(p11) br.ret.spnt b0 // Exit if x -inf 789} 790;; 791 792// Here if x or y zero 793{ .mfi 794 nop.m 999 795(p7) fclass.m.unc p8,p9 = f9,0x19 // x not zero, y zero, is x > zero 796 nop.i 999 797} 798;; 799 800{ .mfi 801 nop.m 999 802(p6) fclass.m.unc p10,p11 = f8,0x7 // x zero, is y zero 803 nop.i 999 804} 805;; 806 807{ .mfi 808 nop.m 999 809(p8) fmerge.s f8 = f8, f0 // x > zero and y zero, result is +-zero 810 nop.i 999 811} 812{ .mfb 813 nop.m 999 814(p9) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi 815(p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero 816} 817;; 818 819{ .mfb 820 nop.m 999 821(p11) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero 822 br.ret.sptk b0 // Final special case exit 823} 824;; 825 826 827GLOBAL_IEEE754_END(atan2f) 828libm_alias_float_other (__atan2, atan2) 829 830 831LOCAL_LIBM_ENTRY(__libm_error_region) 832.prologue 833 mov GR_Parameter_TAG = 38 834 fclass.m p10,p11 = f9,0x5 // @zero | @pos 835;; 836(p10) fmerge.s f10 = f8, f0 837(p11) fma.s.s0 f10 = atan2f_sgn_Y, atan2f_const_pi,f0 838;; 839 840{ .mfi 841 add GR_Parameter_Y=-32,sp // Parameter 2 value 842 nop.f 999 843.save ar.pfs,GR_SAVE_PFS 844 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 845} 846 847{ .mfi 848.fframe 64 849 add sp=-64,sp // Create new stack 850 nop.f 0 851 mov GR_SAVE_GP=gp // Save gp 852} 853;; 854 855{ .mmi 856 stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack 857 add GR_Parameter_X = 16,sp // Parameter 1 address 858.save b0, GR_SAVE_B0 859 mov GR_SAVE_B0=b0 // Save b0 860} 861;; 862 863 864.body 865{ .mib 866 stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack 867 add GR_Parameter_RESULT = 0,GR_Parameter_Y 868 nop.b 0 // Parameter 3 address 869} 870{ .mib 871 stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack 872 add GR_Parameter_Y = -16,GR_Parameter_Y 873 br.call.sptk b0=__libm_error_support# // Call error handling function 874} 875;; 876{ .mmi 877 nop.m 0 878 nop.m 0 879 add GR_Parameter_RESULT = 48,sp 880};; 881 882{ .mmi 883 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack 884.restore sp 885 add sp = 64,sp // Restore stack pointer 886 mov b0 = GR_SAVE_B0 // Restore return address 887} 888;; 889 890{ .mib 891 mov gp = GR_SAVE_GP // Restore gp 892 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 893 br.ret.sptk b0 // Return 894} 895;; 896 897LOCAL_LIBM_END(__libm_error_region) 898 899.type __libm_error_support#,@function 900.global __libm_error_support# 901