1.file "atan2f.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38
39// History
40//==============================================================
41// 06/01/00 Initial version
42// 08/15/00 Bundle added after call to __libm_error_support to properly
43//          set [the previously overwritten] GR_Parameter_RESULT.
44// 08/17/00 Changed predicate register macro-usage to direct predicate
45//          names due to an assembler bug.
46// 01/05/01 Fixed flag settings for denormal input.
47// 01/19/01 Added documentation
48// 01/30/01 Improved speed
49// 02/06/02 Corrected .section statement
50// 05/20/02 Cleaned up namespace and sf0 syntax
51// 02/06/03 Reordered header: .section, .global, .proc, .align
52
53// Description
54//=========================================
55// The atan2 function computes the principle value of the arc tangent of y/x using
56// the signs of both arguments to determine the quadrant of the return value.
57// A domain error may occur if both arguments are zero.
58
59// The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
60
61//..
62//..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that
63//..v and u can be negative. We state the relationship between atan2(y,x) and
64//..atan(v/u).
65//..
66//..Let swap = false if v = y, and swap = true if v = x.
67//..Define C according to the matrix
68//..
69//..                   TABLE FOR C
70//..                              x +ve       x -ve
71//..   no swap (swap = false)    sgn(y)*0     sgn(y)*pi
72//..   swap    (swap = true )    sgn(y)*pi/2  sgn(y)*pi/2
73//..
74//..   atan2(y,x) =  C +  atan(v/u)  if no swap
75//..   atan2(y,x) =  C -  atan(v/u)  if  swap
76//..
77//..These relationship is more efficient to compute as we accommodate signs in v and u
78//..saving the need to obtain the absolute value before computation can proceed.
79//..
80//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
81//..A = y * frcpa(x)    (so A = (y/x)(1 - beta))
82//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
83//..a correction.
84//..atan(A) is approximated by a polynomial
85//..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
86//..atan(G) is approximated as follows:
87//..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
88//..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).
89//..
90//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
91//..Z = x * frcpa(y)    (so Z = (x/y)(1 - beta))
92//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
93//..a correction.
94//..atan(Z) is approximated by a polynomial
95//..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
96//..atan(T) is approximated as follows:
97//..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
98//..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).
99//..
100//..
101//..A = y * frcpa(x)
102//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
103//..
104//..This polynomial is computed as follows:
105//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
106//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
107//..
108//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
109//..poly_A1 = poly_A2 + A4 * poly_A1
110//..poly_A1 = poly_A3 + A4 * poly_A1
111//..
112//..poly_A4 = p1 * A
113//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
114//..poly_A5 = p2 + Asq * poly_A5
115//..poly_A4 = poly_A4 + A5 * poly_A5
116//..
117//..atan_A = poly_A4 + A11 * poly_A1
118//..
119//..atan(G) is approximated as follows:
120//..G_numer = y - A*x, G_denom = x + A*y
121//..H1 = frcpa(G_denom)
122//..H_beta = 1 - H1 * G_denom
123//..H2 = H1 + H1 * H_beta
124//..H_beta2 = H_beta*H_beta
125//..H3 = H2 + H2*H_beta2
126//..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq
127//..atan_G = G_numer*H3 + atan_G
128//..
129//..
130//..A = y * frcpa(x)
131//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
132//..
133//..This polynomial is computed as follows:
134//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
135//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
136//..
137//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
138//..poly_A1 = poly_A2 + A4 * poly_A1
139//..poly_A1 = poly_A3 + A4 * poly_A1
140//..
141//..poly_A4 = p1 * A
142//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
143//..poly_A5 = p2 + Asq * poly_A5
144//..poly_A4 = poly_A4 + A5 * poly_A5
145//..
146//..atan_A = poly_A4 + A11 * poly_A1
147//..
148//..
149//..====================================================================
150//..	COEFFICIENTS USED IN THE COMPUTATION
151//..====================================================================
152
153//coef_pj, j = 1,2,...,10;  atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
154//
155//  coef_p1          =      -.3333332707155439167401311806315789E+00
156//  coef_p1   in dbl = BFD5 5555 1219 1621
157//
158//  coef_p2          =       .1999967670926658391827857030875748E+00
159//  coef_p2   in dbl = 3FC9 997E 7AFB FF4E
160//
161//  coef_p3          =      -.1427989384500152360161563301087296E+00
162//  coef_p3   in dbl = BFC2 473C 5145 EE38
163//
164//  coef_p4          =       .1105852823460720770079031213661163E+00
165//  coef_p4   in dbl = 3FBC 4F51 2B18 65F5
166//
167//  coef_p5          =      -.8811839915595312348625710228448363E-01
168//  coef_p5   in dbl = BFB6 8EED 6A8C FA32
169//
170//  coef_p6          =       .6742329836955067042153645159059714E-01
171//  coef_p6   in dbl = 3FB1 42A7 3D7C 54E3
172//
173//  coef_p7          =      -.4468571068774672908561591262231909E-01
174//  coef_p7   in dbl = BFA6 E10B A401 393F
175//
176//  coef_p8          =       .2252333246746511135532726960586493E-01
177//  coef_p8   in dbl = 3F97 105B 4160 F86B
178//
179//  coef_p9          =      -.7303884867007574742501716845542314E-02
180//  coef_p9   in dbl = BF7D EAAD AA33 6451
181//
182//  coef_p10         =       .1109686868355312093949039454619058E-02
183//  coef_p10  in dbl = 3F52 2E5D 33BC 9BAA
184//
185
186// Special values
187//==============================================================
188//              Y                 x          Result
189//             +number           +inf        +0
190//             -number           +inf        -0
191//             +number           -inf        +pi
192//             -number           -inf        -pi
193//
194//             +inf              +number     +pi/2
195//             -inf              +number     -pi/2
196//             +inf              -number     +pi/2
197//             -inf              -number     -pi/2
198//
199//             +inf              +inf        +pi/4
200//             -inf              +inf        -pi/4
201//             +inf              -inf        +3pi/4
202//             -inf              -inf        -3pi/4
203//
204//             +1                +1          +pi/4
205//             -1                +1          -pi/4
206//             +1                -1          +3pi/4
207//             -1                -1          -3pi/4
208//
209//             +number           +0          +pi/2    // does not raise DBZ
210//             -number           +0          -pi/2    // does not raise DBZ
211//             +number           -0          +pi/2    // does not raise DBZ
212//             -number           -0          -pi/2    // does not raise DBZ
213//
214//             +0                +number     +0
215//             -0                +number     -0
216//             +0                -number     +pi
217//             -0                -number     -pi
218//
219//             +0                +0          +0      // does not raise invalid
220//             -0                +0          -0      // does not raise invalid
221//             +0                -0          +pi     // does not raise invalid
222//             -0                -0          -pi     // does not raise invalid
223//
224//            Nan             anything      quiet Y
225//            anything        NaN           quiet X
226
227// atan2(+-0/+-0) sets double error tag to 37
228// atan2f(+-0/+-0) sets single error tag to 38
229// These are domain errors.
230
231
232//
233// Assembly macros
234//=========================================
235
236
237// integer registers
238atan2f_GR_Addr_1              = r33
239atan2f_GR_Addr_2              = r34
240GR_SAVE_B0                    = r35
241
242GR_SAVE_PFS                   = r36
243GR_SAVE_GP                    = r37
244
245GR_Parameter_X                = r38
246GR_Parameter_Y                = r39
247GR_Parameter_RESULT           = r40
248GR_Parameter_TAG              = r41
249
250// floating point registers
251atan2f_coef_p1         = f32
252atan2f_coef_p10        = f33
253atan2f_coef_p7         = f34
254atan2f_coef_p6         = f35
255
256atan2f_coef_p3         = f36
257atan2f_coef_p2         = f37
258atan2f_coef_p9         = f38
259atan2f_coef_p8         = f39
260atan2f_coef_p5         = f40
261
262atan2f_coef_p4         = f41
263atan2f_const_piby2     = f42
264atan2f_const_pi        = f43
265atan2f_const_piby4     = f44
266atan2f_const_3piby4    = f45
267
268atan2f_xsq             = f46
269atan2f_ysq             = f47
270atan2f_xy              = f48
271atan2f_const_1         = f49
272atan2f_sgn_Y           = f50
273
274atan2f_Z0              = f51
275atan2f_A0              = f52
276atan2f_Z               = f53
277atan2f_A               = f54
278atan2f_C               = f55
279
280atan2f_U               = f56
281atan2f_Usq             = f57
282atan2f_U4              = f58
283atan2f_U6              = f59
284atan2f_U8              = f60
285
286atan2f_poly_u109       = f61
287atan2f_poly_u87        = f62
288atan2f_poly_u65        = f63
289atan2f_poly_u43        = f64
290atan2f_poly_u21        = f65
291
292atan2f_poly_u10to7     = f66
293atan2f_poly_u6to3      = f67
294atan2f_poly_u10to3     = f68
295atan2f_poly_u10to0     = f69
296atan2f_poly_u210       = f70
297
298atan2f_T_numer         = f71
299atan2f_T_denom         = f72
300atan2f_G_numer         = f73
301atan2f_G_denom         = f74
302atan2f_p1rnum          = f75
303
304atan2f_R_denom         = f76
305atan2f_R_numer         = f77
306atan2f_pR              = f78
307atan2f_pRC             = f79
308atan2f_pQRC            = f80
309
310atan2f_Q1              = f81
311atan2f_Q_beta          = f82
312atan2f_Q2              = f83
313atan2f_Q_beta2         = f84
314atan2f_Q3              = f85
315
316atan2f_r               = f86
317atan2f_rsq             = f87
318atan2f_poly_atan_U     = f88
319
320
321// predicate registers
322//atan2f_Pred_Swap     = p6 // |y| >  |x|
323//atan2f_Pred_noSwap   = p7 // |y| <= |x|
324//atan2f_Pred_Xpos     = p8 //  x  >=  0
325//atan2f_Pred_Xneg     = p9 //  x  <   0
326
327
328RODATA
329
330.align 16
331
332LOCAL_OBJECT_START(atan2f_coef_table1)
333data8 0xBFD5555512191621 // p1
334data8 0x3F522E5D33BC9BAA // p10
335data8 0xBFA6E10BA401393F // p7
336data8 0x3FB142A73D7C54E3 // p6
337data8 0xBFC2473C5145EE38 // p3
338data8 0x3FC9997E7AFBFF4E // p2
339LOCAL_OBJECT_END(atan2f_coef_table1)
340
341LOCAL_OBJECT_START(atan2f_coef_table2)
342data8 0xBF7DEAADAA336451 // p9
343data8 0x3F97105B4160F86B // p8
344data8 0xBFB68EED6A8CFA32 // p5
345data8 0x3FBC4F512B1865F5 // p4
346data8 0x3ff921fb54442d18 // pi/2
347data8 0x400921fb54442d18 // pi
348data8 0x3fe921fb54442d18 // pi/4
349data8 0x4002d97c7f3321d2 // 3pi/4
350LOCAL_OBJECT_END(atan2f_coef_table2)
351
352
353
354.section .text
355GLOBAL_IEEE754_ENTRY(atan2f)
356
357{     .mfi
358     alloc      r32           = ar.pfs,1,5,4,0
359     frcpa.s1  atan2f_Z0,p0     =    f1,f8   // Approx to 1/y
360     nop.i  999
361}
362{     .mfi
363     addl      atan2f_GR_Addr_1    =    @ltoff(atan2f_coef_table1),gp
364     fma.s1    atan2f_xsq     =    f9,f9,f0
365     nop.i  999 ;;
366}
367
368
369{     .mfi
370     ld8       atan2f_GR_Addr_1    =    [atan2f_GR_Addr_1]
371     frcpa.s1  atan2f_A0,p0     =    f1,f9   // Approx to 1/x
372     nop.i  999
373}
374{     .mfi
375     nop.m  999
376     fma.s1    atan2f_ysq     =    f8,f8,f0
377     nop.i  999 ;;
378}
379
380{     .mfi
381     nop.m  999
382     fcmp.ge.s1     p8,p9  =    f9,f0  // Set p8 if x>=0, p9 if x<0
383     nop.i  999
384}
385{     .mfi
386     nop.m  999
387     fma.s1    atan2f_xy     =    f9,f8,f0
388     nop.i  999 ;;
389}
390
391
392{     .mfi
393     add   atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
394     fmerge.s  atan2f_sgn_Y   =    f8,f1
395     nop.i  999 ;;
396}
397
398{     .mmf
399     ldfpd     atan2f_coef_p1,atan2f_coef_p10 =    [atan2f_GR_Addr_1],16
400     ldfpd     atan2f_coef_p9,atan2f_coef_p8 =    [atan2f_GR_Addr_2],16
401     fclass.m  p10,p0 =    f9,0xe7	// Test x @inf|@snan|@qnan|@zero
402}
403;;
404
405{     .mfi
406     ldfpd     atan2f_coef_p7,atan2f_coef_p6 =    [atan2f_GR_Addr_1],16
407     fma.s1    atan2f_T_denom =    atan2f_Z0,atan2f_xsq,f8
408     nop.i  999
409}
410{     .mfi
411     ldfpd     atan2f_coef_p5,atan2f_coef_p4     =    [atan2f_GR_Addr_2],16
412     fma.s1    atan2f_Z                      =    atan2f_Z0,f9,f0
413     nop.i  999 ;;
414}
415
416
417{     .mfi
418     ldfpd     atan2f_coef_p3,atan2f_coef_p2 =    [atan2f_GR_Addr_1],16
419     fma.s1    atan2f_G_denom =    atan2f_A0,atan2f_ysq,f9
420     nop.i  999
421}
422{     .mfi
423     ldfpd     atan2f_const_piby2,atan2f_const_pi =    [atan2f_GR_Addr_2],16
424     fma.s1    atan2f_A                           =    atan2f_A0,f8,f0
425     nop.i  999 ;;
426}
427
428{     .mfi
429     ldfpd     atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
430     fclass.m  p11,p0 = f8,0xe7	// Test y @inf|@snan|@qnan|@zero
431     nop.i  999
432}
433{     .mfb
434     nop.m  999
435     fnma.s1   atan2f_T_numer =    atan2f_Z0,atan2f_xy,f9
436(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;;   // Branch on x nan,inf,zero
437}
438
439
440// p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
441{     .mfi
442     nop.m  999
443     fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
444     nop.i  999
445}
446{     .mfb
447     nop.m  999
448     fnma.s1   atan2f_G_numer =    atan2f_A0,atan2f_xy,f8
449(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;;  // Branch on y nan,inf,zero
450}
451
452
453{     .mfi
454     nop.m  999
455(p8) fma.s1    atan2f_const_1 =    atan2f_sgn_Y,f0,f0
456     nop.i  999
457}
458{     .mfi
459     nop.m  999
460(p9) fma.s1    atan2f_const_1 =    atan2f_sgn_Y,f1,f0
461     nop.i  999 ;;
462}
463
464
465{     .mfi
466     nop.m  999
467(p6) fnma.s1    atan2f_U       =    atan2f_Z,f1,f0
468     nop.i  999
469}
470{     .mfi
471     nop.m  999
472(p6) fma.s1    atan2f_Usq     =    atan2f_Z,atan2f_Z,f0
473     nop.i  999 ;;
474}
475
476
477{     .mfi
478     nop.m  999
479(p7) fma.s1    atan2f_U       =    atan2f_A,f1,f0
480     nop.i  999
481}
482{     .mfi
483     nop.m  999
484(p7) fma.s1    atan2f_Usq     =    atan2f_A,atan2f_A,f0
485     nop.i  999 ;;
486}
487
488
489{     .mfi
490     nop.m  999
491(p6) frcpa.s1  atan2f_Q1,p0    =    f1,atan2f_T_denom
492     nop.i  999
493}
494{     .mfi
495     nop.m  999
496(p6) fma.s1    atan2f_R_denom =   atan2f_T_denom,f1,f0
497     nop.i  999 ;;
498}
499
500
501{     .mfi
502     nop.m  999
503(p7) frcpa.s1  atan2f_Q1,p0    =    f1,atan2f_G_denom
504     nop.i  999
505}
506{     .mfi
507     nop.m  999
508(p7) fma.s1    atan2f_R_denom =   atan2f_G_denom,f1,f0
509     nop.i  999 ;;
510}
511
512
513{     .mfi
514     nop.m  999
515(p6) fnma.s1    atan2f_R_numer =   atan2f_T_numer,f1,f0
516     nop.i  999
517}
518{     .mfi
519     nop.m  999
520(p7) fma.s1    atan2f_R_numer =   atan2f_G_numer,f1,f0
521     nop.i  999 ;;
522}
523
524
525{     .mfi
526     nop.m  999
527(p6) fnma.s1    atan2f_p1rnum =   atan2f_T_numer,atan2f_coef_p1,f0
528     nop.i  999 ;;
529}
530{     .mfi
531     nop.m  999
532(p7) fma.s1    atan2f_p1rnum =   atan2f_G_numer,atan2f_coef_p1,f0
533     nop.i  999 ;;
534}
535
536
537{     .mfi
538     nop.m  999
539     fma.s1    atan2f_U4 =    atan2f_Usq,atan2f_Usq,f0
540     nop.i  999
541}
542{     .mfi
543     nop.m  999
544     fma.s1    atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
545     nop.i  999 ;;
546}
547
548{     .mfi
549     nop.m  999
550     fma.s1    atan2f_poly_u87 =    atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
551     nop.i  999
552}
553{     .mfi
554     nop.m  999
555     fma.s1    atan2f_poly_u65 =    atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
556     nop.i  999 ;;
557}
558
559
560{     .mfi
561     nop.m  999
562     fma.s1    atan2f_poly_u43 =    atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
563     nop.i  999
564}
565{     .mfi
566     nop.m  999
567     fnma.s1   atan2f_Q_beta  =    atan2f_Q1,atan2f_R_denom,f1
568     nop.i  999 ;;
569}
570
571
572{     .mfi
573     nop.m  999
574     fma.s1    atan2f_poly_u21 =    atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
575     nop.i  999
576}
577{     .mfi
578     nop.m  999
579     fma.s1    atan2f_r  =    atan2f_Q1,atan2f_R_numer,f0
580     nop.i  999 ;;
581}
582
583{     .mfi
584     nop.m  999
585(p6) fma.s1    atan2f_C  =    atan2f_sgn_Y,atan2f_const_piby2,f0
586     nop.i  999
587}
588{     .mfi
589     nop.m  999
590(p7) fma.s1    atan2f_C  =    atan2f_const_1,atan2f_const_pi,f0
591     nop.i  999 ;;
592}
593
594{     .mfi
595     nop.m  999
596     fma.s1    atan2f_U6 =    atan2f_U4,atan2f_Usq,f0
597     nop.i  999
598}
599{     .mfi
600     nop.m  999
601     fma.s1    atan2f_U8 =    atan2f_U4,atan2f_U4,f0
602     nop.i  999 ;;
603}
604
605{     .mfi
606     nop.m  999
607     fma.s1    atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
608     nop.i  999
609}
610{     .mfi
611     nop.m  999
612     fma.s1    atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
613     nop.i  999 ;;
614}
615
616{     .mfi
617     nop.m  999
618     fma.s1    atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
619     nop.i  999
620}
621{     .mfi
622     nop.m  999
623     fma.s1    atan2f_Q2 =    atan2f_Q1,atan2f_Q_beta,atan2f_Q1
624     nop.i  999 ;;
625}
626
627{     .mfi
628     nop.m  999
629     fma.s1    atan2f_Q_beta2 =    atan2f_Q_beta,atan2f_Q_beta,f0
630     nop.i  999
631}
632{     .mfi
633     nop.m  999
634     fma.s1    atan2f_rsq     =    atan2f_r,atan2f_r,f0
635     nop.i  999 ;;
636}
637
638{     .mfi
639     nop.m  999
640     fma.s1    atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
641     nop.i  999 ;;
642}
643
644{     .mfi
645     nop.m 999
646     fcmp.eq.s0 p8,p0 = f8,f9      // Dummy op to set flag on denormal inputs
647     nop.i 999
648}
649{     .mfi
650     nop.m  999
651     fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
652     nop.i  999 ;;
653}
654
655{     .mfi
656     nop.m                 999
657     fma.s1    atan2f_Q3 =    atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
658     nop.i                 999
659}
660{     .mfi
661     nop.m  999
662     fma.s1    atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
663     nop.i  999 ;;
664}
665
666{     .mfi
667     nop.m  999
668     fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
669     nop.i  999 ;;
670}
671
672{     .mfi
673     nop.m  999
674     fma.s1    atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
675     nop.i  999 ;;
676}
677
678{     .mfb
679     nop.m  999
680     fma.s.s0    f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
681     br.ret.sptk b0 ;;
682}
683
684
685
686ATAN2F_XY_INF_NAN_ZERO:
687
688{ .mfi
689      nop.m 999
690      fclass.m   p10,p0 = f8,0xc3	// Is y nan
691      nop.i 999
692}
693;;
694
695{ .mfi
696      nop.m 999
697      fclass.m   p12,p0 = f9,0xc3	// Is x nan
698      nop.i 999
699}
700;;
701
702{ .mfi
703      nop.m 999
704      fclass.m   p6,p0 = f9,0x21	// Is x +inf
705      nop.i 999
706}
707{ .mfb
708      nop.m 999
709(p10) fma.s.s0 f8  = f9,f8,f0          // Result quietized y if y is nan
710(p10) br.ret.spnt b0                // Exit if y is nan
711}
712;;
713
714
715{ .mfi
716      nop.m 999
717(p6)  fclass.m.unc   p7,p8 = f8,0x23	// x +inf, is y inf
718      nop.i 999
719}
720{ .mfb
721      nop.m 999
722(p12) fnorm.s.s0 f8 = f9               // Result quietized x if x is nan, y not nan
723(p12) br.ret.spnt b0                // Exit if x is nan, y not nan
724}
725;;
726
727// Here if x or y inf, or x or y zero
728{ .mfi
729      nop.m 999
730      fcmp.eq.s0 p15,p0 = f8,f9     // Dummy op to set flag on denormal inputs
731      nop.i 999
732}
733;;
734
735{ .mfi
736      nop.m 999
737      fclass.m   p11,p12 = f9,0x22	// Is x -inf
738      nop.i 999
739}
740{ .mfb
741      nop.m 999
742(p7)  fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
743(p7)  br.ret.spnt b0            // Exit if x +inf and y inf
744}
745;;
746
747{ .mfb
748      nop.m 999
749(p8)  fmerge.s   f8 = f8,f0     // If x +inf and y not inf, result +-0
750(p8)  br.ret.spnt b0            // Exit if x +inf and y not inf
751}
752;;
753
754{ .mfi
755      nop.m 999
756(p12) fclass.m.unc   p13,p0 = f8,0x23	// x not -inf, is y inf
757      nop.i 999
758}
759;;
760
761{ .mfi
762      nop.m 999
763(p11) fclass.m.unc   p14,p15 = f8,0x23	// x -inf, is y inf
764      nop.i 999
765}
766;;
767
768{ .mfi
769      nop.m 999
770      fclass.m  p6,p7 = f9,0x7	// Is x zero
771      nop.i 999
772}
773{ .mfb
774      nop.m 999
775(p13) fma.s.s0   f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
776(p13) br.ret.spnt b0           // Exit if x not -inf and y inf
777}
778;;
779
780{ .mfi
781      nop.m 999
782(p14) fma.s.s0   f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
783      nop.i 999
784}
785{ .mfb
786      nop.m 999
787(p15) fma.s.s0   f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
788(p11) br.ret.spnt b0           // Exit if x -inf
789}
790;;
791
792// Here if x or y zero
793{ .mfi
794      nop.m 999
795(p7)  fclass.m.unc   p8,p9 = f9,0x19	// x not zero, y zero, is x > zero
796      nop.i 999
797}
798;;
799
800{ .mfi
801      nop.m 999
802(p6)  fclass.m.unc   p10,p11 = f8,0x7	// x zero, is y zero
803      nop.i 999
804}
805;;
806
807{ .mfi
808      nop.m 999
809(p8)  fmerge.s  f8 = f8, f0  // x > zero and y zero, result is +-zero
810      nop.i 999
811}
812{ .mfb
813      nop.m 999
814(p9)  fma.s.s0  f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
815(p10) br.cond.spnt   __libm_error_region // Branch if x zero and y zero
816}
817;;
818
819{ .mfb
820      nop.m 999
821(p11) fma.s.s0  f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
822      br.ret.sptk b0         // Final special case exit
823}
824;;
825
826
827GLOBAL_IEEE754_END(atan2f)
828libm_alias_float_other (__atan2, atan2)
829
830
831LOCAL_LIBM_ENTRY(__libm_error_region)
832.prologue
833         mov            GR_Parameter_TAG      = 38
834         fclass.m       p10,p11               = f9,0x5	// @zero | @pos
835;;
836(p10)    fmerge.s       f10                   = f8, f0
837(p11)    fma.s.s0          f10                   = atan2f_sgn_Y, atan2f_const_pi,f0
838;;
839
840{ .mfi
841        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
842        nop.f 999
843.save   ar.pfs,GR_SAVE_PFS
844        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
845}
846
847{ .mfi
848.fframe 64
849        add sp=-64,sp                           // Create new stack
850        nop.f 0
851        mov GR_SAVE_GP=gp                       // Save gp
852}
853;;
854
855{ .mmi
856        stfs [GR_Parameter_Y] = f9,16         // Store Parameter 2 on stack
857        add GR_Parameter_X = 16,sp              // Parameter 1 address
858.save   b0, GR_SAVE_B0
859        mov GR_SAVE_B0=b0                       // Save b0
860}
861;;
862
863
864.body
865{ .mib
866        stfs [GR_Parameter_X] = f8            // Store Parameter 1 on stack
867        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
868        nop.b 0                                 // Parameter 3 address
869}
870{ .mib
871        stfs [GR_Parameter_Y] = f10       // Store Parameter 3 on stack
872        add   GR_Parameter_Y = -16,GR_Parameter_Y
873        br.call.sptk b0=__libm_error_support#   // Call error handling function
874}
875;;
876{ .mmi
877        nop.m 0
878        nop.m 0
879        add   GR_Parameter_RESULT = 48,sp
880};;
881
882{ .mmi
883        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
884.restore sp
885        add   sp = 64,sp                       // Restore stack pointer
886        mov   b0 = GR_SAVE_B0                  // Restore return address
887}
888;;
889
890{ .mib
891        mov   gp = GR_SAVE_GP                  // Restore gp
892        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
893        br.ret.sptk     b0                     // Return
894}
895;;
896
897LOCAL_LIBM_END(__libm_error_region)
898
899.type   __libm_error_support#,@function
900.global __libm_error_support#
901