1.file "atan2.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39// History
40//==============================================================
41// 02/02/00  Initial version
42// 04/04/00  Unwind support added
43// 08/15/00  Bundle added after call to __libm_error_support to properly
44//           set [the previously overwritten] GR_Parameter_RESULT.
45// 08/17/00  Changed predicate register macro-usage to direct predicate
46//           names due to an assembler bug.
47// 09/28/00  Updated to set invalid on SNaN inputs
48// 01/19/01  Fixed flags for small results
49// 04/13/01  Rescheduled to make all paths faster
50// 05/20/02  Cleaned up namespace and sf0 syntax
51// 08/20/02  Corrected inexact flag and directed rounding symmetry bugs
52// 02/06/03  Reordered header: .section, .global, .proc, .align
53// 04/17/03  Added missing mutex directive
54// 12/23/03  atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l
55//
56// API
57//==============================================================
58// double atan2(double Y, double X)
59//
60// Overview of operation
61//==============================================================
62//
63// The atan2 function returns values in the interval [-pi,+pi].
64//
65// There are two basic paths: swap true and swap false.
66// atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
67//
68// p6  swap True    |Y| > |X|
69// p7  swap False   |Y| <= |X|
70// p8  X+   (If swap=True p8=p9=0)
71// p9  X-
72//
73// all the other predicates p10 thru p15 are false for the main path
74//
75// Simple trigonometric identities show
76//   Region 1 (-45 to +45 degrees):
77//         X>0, |Y|<=X, V=Y, U=X     atan2(Y,X) = sgnY * (0 + atan(V/U))
78//
79//   Region 2 (-90 to -45 degrees, and +45 to +90 degrees):
80//         X>0, |Y|>X, V=X, U=Y      atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
81//
82//   Region 3 (-135 to -90 degrees, and +90 to +135 degrees):
83//         X<0, |Y|>X, V=X, U=Y      atan2(Y,X) = sgnY * (pi/2 + atan(V/U))
84//
85//   Region 4 (-180 to -135 degrees, and +135 to +180 degrees):
86//         X<0, |Y|<=X, V=Y, U=X      atan2(Y,X) = sgnY * (pi - atan(V/U))
87//
88// So the result is always of the form atan2(Y,X) = P + sgnXY * atan(V/U)
89//
90// We compute atan(V/U) from the identity
91//      atan(z) + atan([(V/U)-z] / [1+(V/U)z])
92//      where z is a limited precision approximation (16 bits) to V/U
93//
94// z is calculated with the assistance of the frcpa instruction.
95//
96// atan(z) is calculated by a polynomial z + z^3 * p(w),  w=z^2
97// where p(w) = P0+P1*w+...+P22*w^22
98//
99// Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z)
100//
101// Approximate atan(d) by d + P0*d^3
102// Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8.
103// Compute q(a) = 1 + a + ... + a^5.
104// Then F*q(a) approximates the reciprocal to more than 50 bits.
105
106// Special values
107//==============================================================
108//              Y                 x          Result
109//             +number           +inf        +0
110//             -number           +inf        -0
111//             +number           -inf        +pi
112//             -number           -inf        -pi
113//
114//             +inf              +number     +pi/2
115//             -inf              +number     -pi/2
116//             +inf              -number     +pi/2
117//             -inf              -number     -pi/2
118//
119//             +inf              +inf        +pi/4
120//             -inf              +inf        -pi/4
121//             +inf              -inf        +3pi/4
122//             -inf              -inf        -3pi/4
123//
124//             +1                +1          +pi/4
125//             -1                +1          -pi/4
126//             +1                -1          +3pi/4
127//             -1                -1          -3pi/4
128//
129//             +number           +0          +pi/2
130//             -number           +0          -pi/2
131//             +number           -0          +pi/2
132//             -number           -0          -pi/2
133//
134//             +0                +number     +0
135//             -0                +number     -0
136//             +0                -number     +pi
137//             -0                -number     -pi
138//
139//             +0                +0          +0
140//             -0                +0          -0
141//             +0                -0          +pi
142//             -0                -0          -pi
143//
144//            Nan             anything      quiet Y
145//            Not NaN         NaN           quiet X
146
147// atan2(+-0/+-0) sets double error tag to 37
148
149// Registers used
150//==============================================================
151
152// predicate registers used:
153// p6 -> p15
154
155// floating-point registers used:
156// f8, f9 input
157// f32 -> f119
158
159// general registers used
160// r32 -> r41
161
162// Assembly macros
163//==============================================================
164
165EXP_AD_P1                    = r33
166EXP_AD_P2                    = r34
167rsig_near_one                = r35
168
169
170GR_SAVE_B0                   = r35
171GR_SAVE_GP                   = r36
172GR_SAVE_PFS                  = r37
173
174GR_Parameter_X               = r38
175GR_Parameter_Y               = r39
176GR_Parameter_RESULT          = r40
177atan2_GR_tag                 = r41
178
179atan2_Y                      = f8
180atan2_X                      = f9
181
182atan2_u1_X                   = f32
183atan2_u1_Y                   = f33
184atan2_z2_X                   = f34
185atan2_z2_Y                   = f35
186
187atan2_two                    = f36
188atan2_B1sq_Y                 = f37
189atan2_z1_X                   = f38
190atan2_z1_Y                   = f39
191atan2_B1X                    = f40
192
193atan2_B1Y                    = f41
194atan2_wp_X                   = f42
195atan2_B1sq_X                 = f43
196atan2_z                      = f44
197atan2_w                      = f45
198
199atan2_P0                     = f46
200atan2_P1                     = f47
201atan2_P2                     = f48
202atan2_P3                     = f49
203atan2_P4                     = f50
204
205atan2_P5                     = f51
206atan2_P6                     = f52
207atan2_P7                     = f53
208atan2_P8                     = f54
209atan2_P9                     = f55
210
211atan2_P10                    = f56
212atan2_P11                    = f57
213atan2_P12                    = f58
214atan2_P13                    = f59
215atan2_P14                    = f60
216
217atan2_P15                    = f61
218atan2_P16                    = f62
219atan2_P17                    = f63
220atan2_P18                    = f64
221atan2_P19                    = f65
222
223atan2_P20                    = f66
224atan2_P21                    = f67
225atan2_P22                    = f68
226atan2_tmp                    = f68
227atan2_pi_by_2                = f69
228atan2_sgn_pi_by_2            = f69
229atan2_V13                    = f70
230
231atan2_W11                    = f71
232atan2_E                      = f72
233atan2_wp_Y                   = f73
234atan2_V11                    = f74
235atan2_V12                    = f75
236
237atan2_V7                     = f76
238atan2_V8                     = f77
239atan2_W7                     = f78
240atan2_W8                     = f79
241atan2_W3                     = f80
242
243atan2_W4                     = f81
244atan2_V3                     = f82
245atan2_V4                     = f83
246atan2_F                      = f84
247atan2_gV                     = f85
248
249atan2_V10                    = f86
250atan2_zcub                   = f87
251atan2_V6                     = f88
252atan2_V9                     = f89
253atan2_W10                    = f90
254
255atan2_W6                     = f91
256atan2_W2                     = f92
257atan2_V2                     = f93
258atan2_alpha                  = f94
259atan2_alpha_1                = f95
260
261atan2_gVF                    = f96
262atan2_V5                     = f97
263atan2_W12                    = f98
264atan2_W5                     = f99
265atan2_alpha_sq               = f100
266
267atan2_Cp                     = f101
268atan2_V1                     = f102
269atan2_ysq                    = f103
270atan2_W1                     = f104
271atan2_alpha_cub              = f105
272
273atan2_C                      = f106
274atan2_xsq                    = f107
275atan2_d                      = f108
276atan2_A_hi                   = f109
277atan2_dsq                    = f110
278
279atan2_pd                     = f111
280atan2_A_lo                   = f112
281atan2_A                      = f113
282atan2_Pp                     = f114
283atan2_sgnY                   = f115
284
285atan2_sig_near_one           = f116
286atan2_near_one               = f116
287atan2_pi                     = f117
288atan2_sgn_pi                 = f117
289atan2_3pi_by_4               = f118
290atan2_pi_by_4                = f119
291
292
293/////////////////////////////////////////////////////////////
294
295
296RODATA
297
298.align 16
299
300LOCAL_OBJECT_START(atan2_tb1)
301data8 0xA21922DC45605EA1 ,  0x00003FFA // P11
302data8 0xB199DD6D2675C40F ,  0x0000BFFA // P10
303data8 0xC2F01E5DDD100DBE ,  0x00003FFA // P9
304data8 0xD78F28FC2A592781 ,  0x0000BFFA // P8
305data8 0xF0F03ADB3FC930D3 ,  0x00003FFA // P7
306data8 0x88887EBB209E3543 ,  0x0000BFFB // P6
307data8 0x9D89D7D55C3287A5 ,  0x00003FFB // P5
308data8 0xBA2E8B9793955C77 ,  0x0000BFFB // P4
309data8 0xE38E38E320A8A098 ,  0x00003FFB // P3
310data8 0x9249249247E37913 ,  0x0000BFFC // P2
311data8 0xCCCCCCCCCCC906CD ,  0x00003FFC // P1
312data8 0xAAAAAAAAAAAAA8A9 ,  0x0000BFFD // P0
313data8 0xC90FDAA22168C235 ,  0x00004000 // pi
314LOCAL_OBJECT_END(atan2_tb1)
315
316LOCAL_OBJECT_START(atan2_tb2)
317data8 0xCE585A259BD8374C ,  0x00003FF0 // P21
318data8 0x9F90FB984D8E39D0 ,  0x0000BFF3 // P20
319data8 0x9D3436AABE218776 ,  0x00003FF5 // P19
320data8 0xDEC343E068A6D2A8 ,  0x0000BFF6 // P18
321data8 0xF396268151CFB11C ,  0x00003FF7 // P17
322data8 0xD818B4BB43D84BF2 ,  0x0000BFF8 // P16
323data8 0xA2270D30A90AA220 ,  0x00003FF9 // P15
324data8 0xD5F4F2182E7A8725 ,  0x0000BFF9 // P14
325data8 0x80D601879218B53A ,  0x00003FFA // P13
326data8 0x9297B23CCFFB291F ,  0x0000BFFA // P12
327data8 0xFE7E52D2A89995B3 ,  0x0000BFEC // P22
328data8 0xC90FDAA22168C235 ,  0x00003FFF // pi/2
329data8 0xC90FDAA22168C235 ,  0x00003FFE // pi/4
330data8 0x96cbe3f9990e91a8 ,  0x00004000 // 3pi/4
331LOCAL_OBJECT_END(atan2_tb2)
332
333
334
335
336.section .text
337GLOBAL_IEEE754_ENTRY(atan2)
338
339{ .mfi
340           alloc        r32           = ar.pfs,1,5,4,0
341           frcpa.s1     atan2_u1_X,p6 = f1,atan2_X
342           nop.i 999
343}
344{ .mfi
345           addl         EXP_AD_P1   = @ltoff(atan2_tb1), gp
346           fma.s1       atan2_two  = f1,f1,f1
347           nop.i 999
348;;
349}
350
351{ .mfi
352           ld8  EXP_AD_P1 = [EXP_AD_P1]
353           frcpa.s1     atan2_u1_Y,p7 = f1,atan2_Y
354           nop.i 999
355}
356{ .mfi
357           nop.m 999
358           fma.s1       atan2_xsq  = atan2_X,atan2_X,f0
359           nop.i 999
360;;
361}
362
363{ .mfi
364           nop.m 999
365           fclass.m p10,p0 = atan2_Y, 0xc3     // Test for y=nan
366           nop.i 999
367}
368{ .mfi
369           nop.m 999
370           fma.s1       atan2_ysq  = atan2_Y,atan2_Y,f0
371           nop.i 999
372}
373;;
374
375{ .mfi
376           add  EXP_AD_P2 = 0xd0,EXP_AD_P1
377           fclass.m p12,p0 = atan2_X, 0xc3     // Test for x nan
378           nop.i 999
379}
380;;
381
382
383// p10 Y NAN, quiet and return
384{ .mfi
385           ldfe         atan2_P11  = [EXP_AD_P1],16
386           fmerge.s     atan2_sgnY = atan2_Y,f1
387           nop.i 999
388}
389{ .mfb
390           ldfe         atan2_P21  = [EXP_AD_P2],16
391(p10)      fma.d.s0 f8 = atan2_X,atan2_Y,f0   // If y=nan, result quietized y
392(p10)      br.ret.spnt b0        // Exit if y=nan
393;;
394}
395
396
397{ .mfi
398           ldfe         atan2_P10  = [EXP_AD_P1],16
399           fma.s1       atan2_z1_X = atan2_u1_X, atan2_Y, f0
400           nop.i 999
401}
402{ .mfi
403           ldfe         atan2_P20  = [EXP_AD_P2],16
404           fnma.s1      atan2_B1X  = atan2_u1_X, atan2_X, atan2_two
405           nop.i 999
406;;
407}
408
409{ .mfi
410           ldfe         atan2_P9   = [EXP_AD_P1],16
411           fma.s1       atan2_z1_Y = atan2_u1_Y, atan2_X, f0
412           nop.i 999
413}
414{ .mfi
415           ldfe         atan2_P19  = [EXP_AD_P2],16
416           fnma.s1      atan2_B1Y  = atan2_u1_Y, atan2_Y, atan2_two
417           nop.i 999
418}
419;;
420
421{ .mfi
422           ldfe         atan2_P8   = [EXP_AD_P1],16
423           fma.s1       atan2_z2_X = atan2_u1_X, atan2_ysq, f0
424           nop.i 999
425}
426{ .mfi
427           ldfe         atan2_P18  = [EXP_AD_P2],16
428           fma.s1       atan2_z2_Y = atan2_u1_Y, atan2_xsq, f0
429           nop.i 999
430}
431;;
432
433// p10 ==> x  inf     y ?
434// p11 ==> x !inf     y ?
435{ .mfi
436           ldfe         atan2_P7   = [EXP_AD_P1],16
437           fclass.m p10,p11 = atan2_X, 0x23    // test for x inf
438           nop.i 999
439}
440{ .mfb
441           ldfe         atan2_P17  = [EXP_AD_P2],16
442(p12)      fma.d.s0        f8 = atan2_X,atan2_Y,f0     // If x nan, result quiet x
443(p12)      br.ret.spnt b0                 // Exit for x nan
444;;
445}
446
447// p6 true if swap,    means |y| >  |x|    or ysq > xsq
448// p7 true if no swap, means |x| >= |y|    or xsq >= ysq
449{ .mmf
450           ldfe         atan2_P6   = [EXP_AD_P1],16
451           ldfe         atan2_P16  = [EXP_AD_P2],16
452           fcmp.ge.s1 p7,p6    = atan2_xsq, atan2_ysq
453;;
454}
455
456{ .mfi
457           ldfe         atan2_P5   = [EXP_AD_P1],16
458           fma.s1       atan2_wp_X   = atan2_z1_X, atan2_z1_X, f0
459           nop.i 999
460}
461{ .mfi
462           ldfe         atan2_P15       = [EXP_AD_P2],16
463           fma.s1       atan2_B1sq_X = atan2_B1X, atan2_B1X, f0
464           nop.i 999
465;;
466}
467
468{ .mfi
469           ldfe         atan2_P4   = [EXP_AD_P1],16
470(p6)       fma.s1       atan2_wp_Y   = atan2_z1_Y, atan2_z1_Y, f0
471           nop.i 999
472}
473{ .mfi
474           ldfe         atan2_P14  = [EXP_AD_P2],16
475(p6)       fma.s1       atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0
476           nop.i 999
477;;
478}
479
480{ .mfi
481           ldfe         atan2_P3        = [EXP_AD_P1],16
482(p6)       fma.s1       atan2_E         = atan2_z2_Y, atan2_B1Y, atan2_Y
483           nop.i 999
484}
485{ .mfi
486           ldfe         atan2_P13  = [EXP_AD_P2],16
487(p7)       fma.s1       atan2_E         = atan2_z2_X, atan2_B1X, atan2_X
488           nop.i 999
489;;
490}
491
492
493{ .mfi
494           ldfe         atan2_P2        = [EXP_AD_P1],16
495(p6)       fma.s1       atan2_z         = atan2_z1_Y, atan2_B1Y, f0
496           nop.i 999
497}
498{ .mfi
499           ldfe         atan2_P12  = [EXP_AD_P2],16
500(p7)       fma.s1       atan2_z         = atan2_z1_X, atan2_B1X, f0
501           nop.i 999
502;;
503}
504
505
506{ .mfi
507           ldfe         atan2_P1        = [EXP_AD_P1],16
508           fcmp.eq.s0  p14,p15=atan2_X,atan2_Y  // Dummy for denorm and invalid
509           nop.i 999
510}
511{ .mlx
512           ldfe         atan2_P22       = [EXP_AD_P2],16
513           movl         rsig_near_one = 0x8000000000000001 // signif near 1.0
514;;
515}
516
517
518// p12 ==> x  inf     y inf
519// p13 ==> x  inf     y !inf
520{ .mmf
521           ldfe         atan2_P0        = [EXP_AD_P1],16
522           ldfe         atan2_pi_by_2   = [EXP_AD_P2],16
523(p10)      fclass.m.unc p12,p13 = atan2_Y, 0x23  // x inf, test if y inf
524;;
525}
526
527{ .mfi
528           ldfe         atan2_pi        = [EXP_AD_P1],16
529(p6)       fma.s1       atan2_w         = atan2_wp_Y, atan2_B1sq_Y,f0
530           nop.i 999
531}
532{ .mfi
533           ldfe         atan2_pi_by_4       = [EXP_AD_P2],16
534(p7)       fma.s1       atan2_w         = atan2_wp_X, atan2_B1sq_X,f0
535           nop.i 999
536;;
537}
538
539{ .mfi
540           ldfe         atan2_3pi_by_4       = [EXP_AD_P2],16
541(p11)      fclass.m.unc p9,p0 = atan2_Y, 0x23  // x not inf, test if y inf
542           nop.i 999
543;;
544}
545
546{ .mfi
547           setf.sig      atan2_sig_near_one = rsig_near_one
548(p12)      fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x inf, y inf, test if x +inf
549           nop.i 999
550}
551{ .mfi
552           nop.m 999
553(p6)       fnma.s1       atan2_gV        = atan2_Y, atan2_z, atan2_X
554           nop.i 999
555;;
556}
557
558{ .mfi
559           nop.m 999
560           frcpa.s1     atan2_F,p0     = f1, atan2_E
561           nop.i 999
562}
563{ .mfi
564           nop.m 999
565(p7)       fnma.s1       atan2_gV        = atan2_X, atan2_z, atan2_Y
566           nop.i 999
567;;
568}
569
570// p13 ==> x  inf     y !inf
571{ .mfi
572           nop.m 999
573(p13)      fcmp.gt.unc.s1 p14,p15 = atan2_X,f0 // x inf, y !inf, test if x +inf
574           nop.i 999
575}
576{ .mfb
577           nop.m 999
578(p9)       fma.d.s0  f8 = atan2_sgnY, atan2_pi_by_2, f0  // +-pi/2 if x !inf, y inf
579(p9)       br.ret.spnt b0      // exit if x not inf, y inf, result is +-pi/2
580;;
581}
582
583{ .mfi
584           nop.m 999
585           fma.s1       atan2_V13       = atan2_w, atan2_P11, atan2_P10
586           nop.i 999
587}
588{ .mfi
589           nop.m 999
590           fma.s1       atan2_W11       = atan2_w, atan2_P21, atan2_P20
591           nop.i 999
592;;
593}
594
595{ .mfi
596           nop.m 999
597           fma.s1       atan2_V11       = atan2_w, atan2_P9, atan2_P8
598           nop.i 999
599}
600{ .mfi
601           nop.m 999
602           fma.s1       atan2_V12       = atan2_w, atan2_w, f0
603           nop.i 999
604;;
605}
606
607{ .mfi
608           nop.m 999
609           fma.s1       atan2_V8        = atan2_w, atan2_P7 , atan2_P6
610           nop.i 999
611}
612{ .mfi
613           nop.m 999
614           fma.s1       atan2_W8        = atan2_w, atan2_P19, atan2_P18
615           nop.i 999
616;;
617}
618
619{ .mfi
620           nop.m 999
621           fnma.s1      atan2_alpha     = atan2_E, atan2_F, f1
622           nop.i 999
623}
624{ .mfi
625           nop.m 999
626           fnma.s1      atan2_alpha_1   = atan2_E, atan2_F, atan2_two
627           nop.i 999
628;;
629}
630
631
632{ .mfi
633           nop.m 999
634           fma.s1       atan2_V7        = atan2_w, atan2_P5 , atan2_P4
635           nop.i 999
636}
637{ .mfi
638           nop.m 999
639           fma.s1       atan2_W7        = atan2_w, atan2_P17, atan2_P16
640           nop.i 999
641;;
642}
643
644{ .mfi
645           nop.m 999
646           fma.s1       atan2_V4        = atan2_w, atan2_P3 , atan2_P2
647           nop.i 999
648}
649{ .mfi
650           nop.m 999
651           fma.s1       atan2_W4        = atan2_w, atan2_P15, atan2_P14
652           nop.i 999
653;;
654}
655
656{ .mfi
657           nop.m 999
658           fma.s1       atan2_V3        = atan2_w, atan2_P1 , atan2_P0
659           nop.i 999
660}
661{ .mfi
662           nop.m 999
663           fma.s1       atan2_W3        = atan2_w, atan2_P13, atan2_P12
664           nop.i 999
665;;
666}
667
668{ .mfi
669           nop.m 999
670           fma.s1       atan2_V10       = atan2_V12, atan2_V13, atan2_V11
671           nop.i 999
672}
673{ .mfi
674           nop.m 999
675           fma.s1       atan2_gVF       = atan2_gV, atan2_F, f0
676           nop.i 999
677;;
678}
679
680{ .mfi
681           nop.m 999
682           fma.s1       atan2_alpha_sq  = atan2_alpha, atan2_alpha, f0
683           nop.i 999
684}
685{ .mfi
686           nop.m 999
687           fma.s1       atan2_Cp        = atan2_alpha, atan2_alpha_1, f1
688           nop.i 999
689;;
690}
691
692{ .mfi
693           nop.m 999
694           fma.s1       atan2_V9        = atan2_V12, atan2_V12, f0
695           nop.i 999
696}
697{ .mfi
698           nop.m 999
699           fma.s1       atan2_W10       = atan2_V12, atan2_P22 , atan2_W11
700           nop.i 999
701;;
702}
703
704{ .mfi
705           nop.m 999
706           fma.s1       atan2_V6        = atan2_V12, atan2_V8 , atan2_V7
707           nop.i 999
708}
709{ .mfi
710           nop.m 999
711           fma.s1       atan2_W6        = atan2_V12, atan2_W8 , atan2_W7
712           nop.i 999
713;;
714}
715
716{ .mfi
717           nop.m 999
718           fma.s1       atan2_V2        = atan2_V12, atan2_V4 , atan2_V3
719           nop.i 999
720}
721{ .mfi
722           nop.m 999
723           fma.s1       atan2_W2        = atan2_V12, atan2_W4  , atan2_W3
724           nop.i 999
725;;
726}
727
728// p8 ==> y   0     x?
729// p9 ==> y  !0     x?
730{ .mfi
731           nop.m 999
732           fclass.m p8,p9 = atan2_Y, 0x07  // Test for y=0
733           nop.i 999
734}
735{ .mfi
736           nop.m 999
737           fma.s1       atan2_zcub      = atan2_z, atan2_w, f0
738           nop.i 999
739;;
740}
741
742{ .mfi
743           nop.m 999
744           fma.s1       atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
745           nop.i 999
746}
747{ .mfi
748           nop.m 999
749           fma.s1       atan2_C         = atan2_gVF, atan2_Cp, f0
750           nop.i 999
751;;
752}
753
754// p12 ==>  y0     x0
755// p13 ==>  y0     x!0
756{ .mfi
757           nop.m 999
758(p8)       fclass.m.unc p12,p13 = atan2_X, 0x07  // y=0, test if x is 0
759           nop.i 999
760}
761{ .mfi
762           nop.m 999
763           fma.s1       atan2_W12       = atan2_V9, atan2_V9, f0
764           nop.i 999
765;;
766}
767
768{ .mfi
769           nop.m 999
770           fma.s1       atan2_V5        = atan2_V9, atan2_V10, atan2_V6
771           nop.i 999
772}
773{ .mfi
774           nop.m 999
775           fma.s1       atan2_W5        = atan2_V9, atan2_W10, atan2_W6
776           nop.i 999
777;;
778}
779
780
781// p9 ==>  y!0    x0
782{ .mfi
783           nop.m 999
784(p9)       fclass.m.unc p9,p0 = atan2_X, 0x07  // y not 0, test if x is 0
785           nop.i 999
786}
787// p10 ==> X +INF, Y +-INF
788{ .mfb
789           nop.m 999
790(p10)      fma.d.s0       f8 = atan2_sgnY, atan2_pi_by_4, f0 // x=+inf, y=inf
791(p10)      br.ret.spnt b0          // Exit for x=+inf, y=inf, result is +-pi/4
792;;
793}
794
795.pred.rel "mutex",p11,p14
796{ .mfi
797           nop.m 999
798(p14)      fmerge.s    f8 = atan2_sgnY, f0 // x=+inf, y !inf, result +-0
799           nop.i 999
800}
801// p11 ==> X -INF, Y +-INF
802{ .mfb
803           nop.m 999
804(p11)      fma.d.s0       f8 = atan2_sgnY, atan2_3pi_by_4, f0 // x=-inf, y=inf
805(p11)      br.ret.spnt b0          // Exit for x=-inf, y=inf, result is +-3pi/4
806;;
807}
808
809{ .mfi
810           nop.m 999
811(p13)      fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x not 0, y=0, test if x>0
812           nop.i 999
813}
814{ .mfb
815           nop.m 999
816           fma.s1       atan2_d         = atan2_alpha_cub, atan2_C, atan2_C
817(p14)      br.ret.spnt b0         // Exit if x=+inf, y !inf, result +-0
818;;
819}
820
821{ .mfi
822           nop.m 999
823           fma.s1       atan2_W12       = atan2_V9, atan2_W12, f0
824           nop.i 999
825}
826{ .mfb
827           nop.m 999
828(p9)       fma.d.s0       f8 = atan2_sgnY, atan2_pi_by_2, f0 // x=0, y not 0
829(p9)       br.ret.spnt b0      // Exit if x=0 and y not 0, result is +-pi/2
830;;
831}
832
833{ .mfi
834           nop.m 999
835           fma.s1       atan2_V1        = atan2_V9, atan2_V5, atan2_V2
836           nop.i 999
837}
838{ .mfb
839           nop.m 999
840           fma.s1       atan2_W1        = atan2_V9, atan2_W5, atan2_W2
841(p12)      br.spnt ATAN2_ERROR            // Branch if x=0 and y=0
842;;
843}
844
845{ .mfi
846           nop.m 999
847(p10)      fmerge.s     f8              = atan2_sgnY, f0  // +-0 if x>0, y=0
848           nop.i 999
849}
850{ .mfb
851           nop.m 999
852(p11)      fma.d.s0        f8 = atan2_sgnY, atan2_pi, f0 // +-pi if x<0, y=0
853(p13)      br.ret.spnt b0      // Exit if x!0 and y=0
854;;
855}
856
857
858{ .mfi
859           nop.m 999
860           fma.s1       atan2_pd        = atan2_P0, atan2_d, f0
861           nop.i 999
862}
863{ .mfi
864           nop.m 999
865           fma.s1       atan2_dsq       = atan2_d, atan2_d, f0
866           nop.i 999
867;;
868}
869
870
871{ .mfi
872           nop.m 999
873           fmerge.se    atan2_near_one = f1, atan2_sig_near_one // Const ~1.0
874           nop.i 999
875}
876{ .mfi
877           nop.m 999
878           fma.s1       atan2_Pp        = atan2_W12, atan2_W1, atan2_V1
879           nop.i 999
880;;
881}
882
883// p8 true if no swap and X positive
884// p9 true if no swap and X negative
885// both are false is swap is true
886{ .mfi
887           nop.m 999
888(p7)       fcmp.ge.unc.s1 p8,p9    = atan2_X,f0
889           nop.i 999
890}
891{ .mfb
892           nop.m 999
893(p15)      fma.d.s0        f8              = atan2_sgnY, atan2_pi, f0
894(p15)      br.ret.spnt b0         // Exit if x=-inf, y !inf, result +-pi
895;;
896}
897
898{ .mfi
899           nop.m 999
900           fma.s1       atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0
901           nop.i 999
902}
903{ .mfi
904           nop.m 999
905           fma.s1       atan2_A_lo      = atan2_pd, atan2_dsq, atan2_d
906           nop.i 999
907;;
908}
909
910
911{ .mfi
912           nop.m 999
913           fma.s1       atan2_sgn_pi = atan2_pi, atan2_sgnY, f0
914           nop.i 999
915}
916{ .mfi
917           nop.m 999
918           fma.s1       atan2_A_hi      = atan2_zcub, atan2_Pp, atan2_z
919           nop.i 999
920;;
921}
922
923
924// For |Y| <= |X| and X > 0, force inexact in case A_lo is zero
925{ .mfi
926           nop.m 999
927(p8)       fmpy.s0      atan2_tmp       = atan2_P22, atan2_P22
928           nop.i 999
929;;
930}
931
932{ .mfi
933           nop.m 999
934           fma.s1       atan2_A         = atan2_A_hi, f1, atan2_A_lo
935           nop.i 999
936}
937// For |Y| <= |X| and X > 0, result is A_hi + A_lo
938{ .mfi
939           nop.m 999
940(p8)       fma.d.s0       f8         = atan2_A_hi, f1, atan2_A_lo
941           nop.i 999
942;;
943}
944
945.pred.rel "mutex",p6,p9
946// We perturb A by multiplying by 1.0+1ulp as we produce the result
947// in order to get symmetrically rounded results in directed rounding modes.
948// If we don't do this, there are a few cases where the trailing 11 bits of
949// the significand of the result, before converting to double, are zero.  These
950// cases do not round symmetrically in round to +infinity or round to -infinity.
951// The perturbation also insures that the inexact flag is set.
952// For |Y| > |X|, result is  +- pi/2 - (A_hi + A_lo)
953{ .mfi
954           nop.m 999
955(p6)       fnma.d.s0      f8        = atan2_A, atan2_near_one, atan2_sgn_pi_by_2
956           nop.i 999
957}
958// For |Y| <= |X|, and X < 0, result is  +- pi + (A_hi + A_lo)
959{ .mfb
960           nop.m 999
961(p9)       fma.d.s0        f8        = atan2_A, atan2_near_one, atan2_sgn_pi
962           br.ret.sptk  b0
963;;
964}
965
966ATAN2_ERROR:
967// Here if x=0 and y=0
968{ .mfi
969          nop.m 999
970          fclass.m p10,p11       = atan2_X,0x05  // Test if x=+0
971          nop.i 999
972}
973;;
974
975{ .mfi
976          mov        atan2_GR_tag     = 37
977(p10)     fmerge.s     f10             = atan2_sgnY, f0 // x=+0, y=0
978          nop.i 999
979}
980{ .mfi
981          nop.m 999
982(p11)     fma.d.s0        f10            = atan2_sgnY, atan2_pi, f0 // x=-0, y=0
983          nop.i 999
984;;
985}
986GLOBAL_IEEE754_END(atan2)
987libm_alias_double_other (__atan2, atan2)
988
989
990LOCAL_LIBM_ENTRY(__libm_error_region)
991.prologue
992// (1)
993{ .mfi
994        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
995        nop.f 999
996.save   ar.pfs,GR_SAVE_PFS
997        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
998}
999{ .mfi
1000.fframe 64
1001        add sp=-64,sp                          // Create new stack
1002        nop.f 0
1003        mov GR_SAVE_GP=gp                      // Save gp
1004};;
1005
1006
1007// (2)
1008{ .mmi
1009        stfd [GR_Parameter_Y] = f8,16         // STORE Parameter 2 on stack
1010        add GR_Parameter_X = 16,sp            // Parameter 1 address
1011.save   b0, GR_SAVE_B0
1012        mov GR_SAVE_B0=b0                     // Save b0
1013};;
1014
1015.body
1016// (3)
1017{ .mib
1018        stfd [GR_Parameter_X] = f9            // STORE Parameter 1 on stack
1019        add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
1020        nop.b 0
1021}
1022{ .mib
1023        stfd [GR_Parameter_Y] = f10           // STORE Parameter 3 on stack
1024        add   GR_Parameter_Y = -16,GR_Parameter_Y
1025        br.call.sptk b0=__libm_error_support# // Call error handling function
1026};;
1027{ .mmi
1028        add   GR_Parameter_RESULT = 48,sp
1029        nop.m 0
1030        nop.i 0
1031};;
1032
1033// (4)
1034{ .mmi
1035        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1036.restore sp
1037        add   sp = 64,sp                       // Restore stack pointer
1038        mov   b0 = GR_SAVE_B0                  // Restore return address
1039};;
1040{ .mib
1041        mov   gp = GR_SAVE_GP                  // Restore gp
1042        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1043        br.ret.sptk     b0                     // Return
1044};;
1045
1046LOCAL_LIBM_END(__libm_error_region)
1047
1048.type   __libm_error_support#,@function
1049.global __libm_error_support#
1050