1.file "atan.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39// History
40//==============================================================
41// 02/02/00  Initial version
42// 04/13/00  Improved speed
43// 04/19/00  Removed the qualifying predicate from the fmerge.s that
44//           takes the absolute value.
45// 06/16/00  Reassigned FP registers to eliminate stalls on loads
46// 08/30/00  Saved 5 cycles in main path by rearranging large argument logic
47//           and delaying use of result of fcmp in load by 1 group
48// 05/20/02  Cleaned up namespace and sf0 syntax
49// 08/20/02  Use atan2 algorithm with x=1 for better accuracy
50// 02/06/03  Reordered header: .section, .global, .proc, .align
51//
52// API
53//==============================================================
54// double atan(double Y)
55//
56// Overview of operation
57//==============================================================
58//
59// The atan function returns values in the interval [-pi/2,+pi/2].
60//
61// The algorithm used is the atan2(Y,X) algorithm where we fix X=1.0.
62//
63// There are two basic paths: swap true and swap false.
64// atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
65//
66// p6  swap True    |Y| > |X|
67// p7  swap False   |Y| <= |X|
68//
69//
70// Simple trigonometric identities show
71//   Region 1
72//         |Y|<=1.0, V=Y, U=1.0     atan2(Y,X) = sgnY * (0 + atan(V/U))
73//
74//   Region 2
75//         |Y|>1.0, V=1.0, U=Y      atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
76//
77//
78// We compute atan(V/U) from the identity
79//      atan(z) + atan([(V/U)-z] / [1+(V/U)z])
80//      where z is a limited precision approximation (16 bits) to V/U
81//
82// z is calculated with the assistance of the frcpa instruction.
83//
84// atan(z) is calculated by a polynomial z + z^3 * p(w),  w=z^2
85// where p(w) = P0+P1*w+...+P22*w^22
86//
87// Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z)
88//
89// Approximate atan(d) by d + P0*d^3
90// Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8.
91// Compute q(a) = 1 + a + ... + a^5.
92// Then F*q(a) approximates the reciprocal to more than 50 bits.
93
94// Special values
95//==============================================================
96// atan(QNAN)  = QNAN
97// atan(SNAN)  = quieted SNAN
98// atan(+-inf) = +- pi/2
99// atan(+-0)   = +-0
100
101// Registers used
102//==============================================================
103
104// predicate registers used:
105// p6 -> p15
106
107// floating-point registers used:
108// f8, input
109// f32 -> f116
110
111// general registers used
112// r14 -> r16
113
114// Assembly macros
115//==============================================================
116
117EXP_AD_P1                    = r14
118EXP_AD_P2                    = r15
119rsig_near_one                = r16
120
121atan2_Y                      = f8
122atan2_X                      = f1
123
124atan2_u1_X                   = f32
125atan2_u1_Y                   = f33
126atan2_z2_X                   = f34
127
128atan2_two                    = f36
129atan2_B1sq_Y                 = f37
130atan2_z1_X                   = f38
131atan2_B1X                    = f40
132
133atan2_B1Y                    = f41
134atan2_wp_X                   = f42
135atan2_B1sq_X                 = f43
136atan2_z                      = f44
137atan2_w                      = f45
138
139atan2_P0                     = f46
140atan2_P1                     = f47
141atan2_P2                     = f48
142atan2_P3                     = f49
143atan2_P4                     = f50
144
145atan2_P5                     = f51
146atan2_P6                     = f52
147atan2_P7                     = f53
148atan2_P8                     = f54
149atan2_P9                     = f55
150
151atan2_P10                    = f56
152atan2_P11                    = f57
153atan2_P12                    = f58
154atan2_P13                    = f59
155atan2_P14                    = f60
156
157atan2_P15                    = f61
158atan2_P16                    = f62
159atan2_P17                    = f63
160atan2_P18                    = f64
161atan2_P19                    = f65
162
163atan2_P20                    = f66
164atan2_P21                    = f67
165atan2_P22                    = f68
166atan2_pi_by_2                = f69
167atan2_sgn_pi_by_2            = f69
168atan2_V13                    = f70
169
170atan2_W11                    = f71
171atan2_E                      = f72
172atan2_wp_Y                   = f73
173atan2_V11                    = f74
174atan2_V12                    = f75
175
176atan2_V7                     = f76
177atan2_V8                     = f77
178atan2_W7                     = f78
179atan2_W8                     = f79
180atan2_W3                     = f80
181
182atan2_W4                     = f81
183atan2_V3                     = f82
184atan2_V4                     = f83
185atan2_F                      = f84
186atan2_gV                     = f85
187
188atan2_V10                    = f86
189atan2_zcub                   = f87
190atan2_V6                     = f88
191atan2_V9                     = f89
192atan2_W10                    = f90
193
194atan2_W6                     = f91
195atan2_W2                     = f92
196atan2_V2                     = f93
197atan2_alpha                  = f94
198atan2_alpha_1                = f95
199
200atan2_gVF                    = f96
201atan2_V5                     = f97
202atan2_W12                    = f98
203atan2_W5                     = f99
204atan2_alpha_sq               = f100
205
206atan2_Cp                     = f101
207atan2_V1                     = f102
208atan2_ysq                    = f103
209atan2_W1                     = f104
210atan2_alpha_cub              = f105
211
212atan2_C                      = f106
213atan2_d                      = f108
214atan2_A_hi                   = f109
215atan2_dsq                    = f110
216
217atan2_pd                     = f111
218atan2_A_lo                   = f112
219atan2_A                      = f113
220atan2_Pp                     = f114
221atan2_sgnY                   = f115
222
223atan2_sig_near_one           = f116
224atan2_near_one               = f116
225
226/////////////////////////////////////////////////////////////
227
228
229RODATA
230
231.align 16
232
233LOCAL_OBJECT_START(atan2_tb1)
234data8 0xA21922DC45605EA1 ,  0x00003FFA // P11
235data8 0xB199DD6D2675C40F ,  0x0000BFFA // P10
236data8 0xC2F01E5DDD100DBE ,  0x00003FFA // P9
237data8 0xD78F28FC2A592781 ,  0x0000BFFA // P8
238data8 0xF0F03ADB3FC930D3 ,  0x00003FFA // P7
239data8 0x88887EBB209E3543 ,  0x0000BFFB // P6
240data8 0x9D89D7D55C3287A5 ,  0x00003FFB // P5
241data8 0xBA2E8B9793955C77 ,  0x0000BFFB // P4
242data8 0xE38E38E320A8A098 ,  0x00003FFB // P3
243data8 0x9249249247E37913 ,  0x0000BFFC // P2
244data8 0xCCCCCCCCCCC906CD ,  0x00003FFC // P1
245data8 0xAAAAAAAAAAAAA8A9 ,  0x0000BFFD // P0
246data8 0x0000000000000000 ,  0x00000000 // pad to avoid bank conflict
247LOCAL_OBJECT_END(atan2_tb1)
248
249LOCAL_OBJECT_START(atan2_tb2)
250data8 0xCE585A259BD8374C ,  0x00003FF0 // P21
251data8 0x9F90FB984D8E39D0 ,  0x0000BFF3 // P20
252data8 0x9D3436AABE218776 ,  0x00003FF5 // P19
253data8 0xDEC343E068A6D2A8 ,  0x0000BFF6 // P18
254data8 0xF396268151CFB11C ,  0x00003FF7 // P17
255data8 0xD818B4BB43D84BF2 ,  0x0000BFF8 // P16
256data8 0xA2270D30A90AA220 ,  0x00003FF9 // P15
257data8 0xD5F4F2182E7A8725 ,  0x0000BFF9 // P14
258data8 0x80D601879218B53A ,  0x00003FFA // P13
259data8 0x9297B23CCFFB291F ,  0x0000BFFA // P12
260data8 0xFE7E52D2A89995B3 ,  0x0000BFEC // P22
261data8 0xC90FDAA22168C235 ,  0x00003FFF // pi/2
262LOCAL_OBJECT_END(atan2_tb2)
263
264
265
266
267.section .text
268GLOBAL_LIBM_ENTRY(atan)
269
270{ .mfi
271           nop.m 999
272           frcpa.s1     atan2_u1_Y,p7 = f1,atan2_Y
273           nop.i 999
274}
275{ .mfi
276           addl         EXP_AD_P1   = @ltoff(atan2_tb1), gp
277           fma.s1       atan2_two  = f1,f1,f1
278           nop.i 999
279;;
280}
281
282{ .mfi
283           ld8  EXP_AD_P1 = [EXP_AD_P1]
284           frcpa.s1     atan2_u1_X,p6 = f1,atan2_X
285           nop.i 999
286}
287{ .mfi
288           nop.m 999
289           fma.s1       atan2_ysq  = atan2_Y,atan2_Y,f0
290           nop.i 999
291}
292;;
293
294{ .mfi
295           add  EXP_AD_P2 = 0xd0,EXP_AD_P1
296           fmerge.s     atan2_sgnY = atan2_Y,f1
297           nop.i 999
298}
299;;
300
301
302{ .mfi
303           ldfe         atan2_P11  = [EXP_AD_P1],16
304           fclass.m p10,p0 = atan2_Y, 0xc3     // Test for y=nan
305           nop.i 999
306}
307{ .mfi
308           ldfe         atan2_P21  = [EXP_AD_P2],16
309           nop.f 999
310           nop.i 999
311;;
312}
313
314
315{ .mfi
316           ldfe         atan2_P10  = [EXP_AD_P1],16
317           fnma.s1      atan2_B1Y  = atan2_u1_Y, atan2_Y, atan2_two
318           nop.i 999
319}
320{ .mfi
321           ldfe         atan2_P20  = [EXP_AD_P2],16
322           fma.s1       atan2_wp_Y   = atan2_u1_Y, atan2_u1_Y, f0
323           nop.i 999
324;;
325}
326
327{ .mfi
328           ldfe         atan2_P9   = [EXP_AD_P1],16
329           fma.s1       atan2_z1_X = atan2_u1_X, atan2_Y, f0
330           nop.i 999
331}
332{ .mfi
333           ldfe         atan2_P19  = [EXP_AD_P2],16
334           fnma.s1      atan2_B1X  = atan2_u1_X, atan2_X, atan2_two
335           nop.i 999
336}
337;;
338
339{ .mfi
340           ldfe         atan2_P8   = [EXP_AD_P1],16
341           fma.s1       atan2_z2_X = atan2_u1_X, atan2_ysq, f0
342           nop.i 999
343}
344{ .mfb
345           ldfe         atan2_P18  = [EXP_AD_P2],16
346(p10)      fma.d.s0 f8 = atan2_Y,atan2_X,f0   // If y=nan, result quietized y
347(p10)      br.ret.spnt b0        // Exit if y=nan
348}
349;;
350
351// p6 true if swap,    means |y| >  1.0    or ysq > 1.0
352// p7 true if no swap, means 1.0 >= |y|    or 1.0 >= ysq
353{ .mfi
354           ldfe         atan2_P7   = [EXP_AD_P1],16
355           fcmp.ge.s1 p7,p6    = f1, atan2_ysq
356           nop.i 999
357}
358{ .mmf
359           ldfe         atan2_P17  = [EXP_AD_P2],16
360           nop.m 999
361           nop.f 999
362}
363;;
364
365{ .mfi
366           ldfe         atan2_P6   = [EXP_AD_P1],16
367           fma.s1       atan2_E         = atan2_u1_Y, atan2_B1Y, atan2_Y
368           nop.i 999
369}
370{ .mfi
371           ldfe         atan2_P16  = [EXP_AD_P2],16
372           fma.s1       atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0
373           nop.i 999
374;;
375}
376
377{ .mfi
378           ldfe         atan2_P5   = [EXP_AD_P1],16
379(p7)       fma.s1       atan2_wp_X   = atan2_z1_X, atan2_z1_X, f0
380           nop.i 999
381}
382{ .mfi
383           ldfe         atan2_P15       = [EXP_AD_P2],16
384(p7)       fma.s1       atan2_B1sq_X = atan2_B1X, atan2_B1X, f0
385           nop.i 999
386;;
387}
388
389{ .mfi
390           ldfe         atan2_P4   = [EXP_AD_P1],16
391(p6)       fma.s1       atan2_z         = atan2_u1_Y, atan2_B1Y, f0
392           nop.i 999
393}
394{ .mfi
395           ldfe         atan2_P14  = [EXP_AD_P2],16
396(p7)       fma.s1       atan2_E         = atan2_z2_X, atan2_B1X, atan2_X
397           nop.i 999
398;;
399}
400
401
402{ .mfi
403           ldfe         atan2_P3        = [EXP_AD_P1],16
404           fcmp.eq.s0  p14,p15=atan2_X,atan2_Y  // Dummy for denorm and invalid
405           nop.i 999
406}
407{ .mmf
408           ldfe         atan2_P13  = [EXP_AD_P2],16
409           nop.m 999
410(p7)       fma.s1       atan2_z         = atan2_z1_X, atan2_B1X, f0
411;;
412}
413
414{ .mfi
415           ldfe         atan2_P2        = [EXP_AD_P1],16
416(p6)       fma.s1       atan2_w         = atan2_wp_Y, atan2_B1sq_Y,f0
417           nop.i 999
418}
419{ .mlx
420           ldfe         atan2_P12  = [EXP_AD_P2],16
421           movl         rsig_near_one = 0x8000000000000001 // signif near 1.0
422;;
423}
424
425{ .mfi
426           ldfe         atan2_P1        = [EXP_AD_P1],16
427           fclass.m p9,p0 = atan2_Y, 0x23  // test if y inf
428           nop.i 999
429}
430{ .mfi
431           ldfe         atan2_P22       = [EXP_AD_P2],16
432(p7)       fma.s1       atan2_w         = atan2_wp_X, atan2_B1sq_X,f0
433           nop.i 999
434;;
435}
436
437{ .mfi
438           ldfe         atan2_P0        = [EXP_AD_P1],16
439           frcpa.s1     atan2_F,p0     = f1, atan2_E
440           nop.i 999
441}
442{ .mfi
443           ldfe         atan2_pi_by_2   = [EXP_AD_P2],16
444(p6)       fnma.s1       atan2_gV        = atan2_Y, atan2_z, atan2_X
445           nop.i 999
446;;
447}
448
449{ .mfi
450           setf.sig      atan2_sig_near_one = rsig_near_one
451(p7)       fnma.s1       atan2_gV        = atan2_X, atan2_z, atan2_Y
452           nop.i 999
453}
454{ .mfb
455           nop.m 999
456(p9)       fma.d.s0  f8 = atan2_sgnY, atan2_pi_by_2, f0  // +-pi/2 if y inf
457(p9)       br.ret.spnt b0      // exit if y inf, result is +-pi/2
458;;
459}
460
461{ .mfi
462           nop.m 999
463           fma.s1       atan2_V13       = atan2_w, atan2_P11, atan2_P10
464           nop.i 999
465}
466{ .mfi
467           nop.m 999
468           fma.s1       atan2_W11       = atan2_w, atan2_P21, atan2_P20
469           nop.i 999
470;;
471}
472
473{ .mfi
474           nop.m 999
475           fma.s1       atan2_V11       = atan2_w, atan2_P9, atan2_P8
476           nop.i 999
477}
478{ .mfi
479           nop.m 999
480           fma.s1       atan2_V12       = atan2_w, atan2_w, f0
481           nop.i 999
482;;
483}
484
485{ .mfi
486           nop.m 999
487           fma.s1       atan2_V8        = atan2_w, atan2_P7 , atan2_P6
488           nop.i 999
489}
490{ .mfi
491           nop.m 999
492           fma.s1       atan2_W8        = atan2_w, atan2_P19, atan2_P18
493           nop.i 999
494;;
495}
496
497{ .mfi
498           nop.m 999
499           fnma.s1      atan2_alpha     = atan2_E, atan2_F, f1
500           nop.i 999
501}
502{ .mfi
503           nop.m 999
504           fnma.s1      atan2_alpha_1   = atan2_E, atan2_F, atan2_two
505           nop.i 999
506;;
507}
508
509
510{ .mfi
511           nop.m 999
512           fma.s1       atan2_V7        = atan2_w, atan2_P5 , atan2_P4
513           nop.i 999
514}
515{ .mfi
516           nop.m 999
517           fma.s1       atan2_W7        = atan2_w, atan2_P17, atan2_P16
518           nop.i 999
519;;
520}
521
522{ .mfi
523           nop.m 999
524           fma.s1       atan2_V4        = atan2_w, atan2_P3 , atan2_P2
525           nop.i 999
526}
527{ .mfi
528           nop.m 999
529           fma.s1       atan2_W4        = atan2_w, atan2_P15, atan2_P14
530           nop.i 999
531;;
532}
533
534{ .mfi
535           nop.m 999
536           fma.s1       atan2_V3        = atan2_w, atan2_P1 , atan2_P0
537           nop.i 999
538}
539{ .mfi
540           nop.m 999
541           fma.s1       atan2_W3        = atan2_w, atan2_P13, atan2_P12
542           nop.i 999
543;;
544}
545
546{ .mfi
547           nop.m 999
548           fma.s1       atan2_V10       = atan2_V12, atan2_V13, atan2_V11
549           nop.i 999
550}
551{ .mfi
552           nop.m 999
553           fma.s1       atan2_gVF       = atan2_gV, atan2_F, f0
554           nop.i 999
555;;
556}
557
558{ .mfi
559           nop.m 999
560           fma.s1       atan2_alpha_sq  = atan2_alpha, atan2_alpha, f0
561           nop.i 999
562}
563{ .mfi
564           nop.m 999
565           fma.s1       atan2_Cp        = atan2_alpha, atan2_alpha_1, f1
566           nop.i 999
567;;
568}
569
570{ .mfi
571           nop.m 999
572           fma.s1       atan2_V9        = atan2_V12, atan2_V12, f0
573           nop.i 999
574}
575{ .mfi
576           nop.m 999
577           fma.s1       atan2_W10       = atan2_V12, atan2_P22 , atan2_W11
578           nop.i 999
579;;
580}
581
582{ .mfi
583           nop.m 999
584           fma.s1       atan2_V6        = atan2_V12, atan2_V8 , atan2_V7
585           nop.i 999
586}
587{ .mfi
588           nop.m 999
589           fma.s1       atan2_W6        = atan2_V12, atan2_W8 , atan2_W7
590           nop.i 999
591;;
592}
593
594{ .mfi
595           nop.m 999
596           fma.s1       atan2_V2        = atan2_V12, atan2_V4 , atan2_V3
597           nop.i 999
598}
599{ .mfi
600           nop.m 999
601           fma.s1       atan2_W2        = atan2_V12, atan2_W4  , atan2_W3
602           nop.i 999
603;;
604}
605
606{ .mfi
607           nop.m 999
608           fma.s1       atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
609           nop.i 999
610}
611{ .mfi
612           nop.m 999
613           fma.s1       atan2_C         = atan2_gVF, atan2_Cp, f0
614           nop.i 999
615;;
616}
617
618{ .mfi
619           nop.m 999
620           fma.s1       atan2_W12       = atan2_V9, atan2_V9, f0
621           nop.i 999
622;;
623}
624
625{ .mfi
626           nop.m 999
627           fma.s1       atan2_V5        = atan2_V9, atan2_V10, atan2_V6
628           nop.i 999
629}
630{ .mfi
631           nop.m 999
632           fma.s1       atan2_W5        = atan2_V9, atan2_W10, atan2_W6
633           nop.i 999
634;;
635}
636
637{ .mfi
638           nop.m 999
639           fclass.m p8,p0 = atan2_Y, 0x07  // Test for y=0
640           nop.i 999
641}
642{ .mfi
643           nop.m 999
644           fma.s1       atan2_d         = atan2_alpha_cub, atan2_C, atan2_C
645           nop.i 999
646}
647;;
648
649{ .mfi
650           nop.m 999
651           fma.s1       atan2_W12       = atan2_V9, atan2_W12, f0
652           nop.i 999
653}
654;;
655
656{ .mfi
657           nop.m 999
658           fma.s1       atan2_V1        = atan2_V9, atan2_V5, atan2_V2
659           nop.i 999
660}
661{ .mfi
662           nop.m 999
663           fma.s1       atan2_W1        = atan2_V9, atan2_W5, atan2_W2
664           nop.i 999
665;;
666}
667
668{ .mfi
669           nop.m 999
670(p8)       fmerge.s     f8              = atan2_sgnY, f0  // +-0 if y=0
671           nop.i 999
672}
673{ .mfb
674           nop.m 999
675           fma.s1       atan2_zcub      = atan2_z, atan2_w, f0
676(p8)       br.ret.spnt b0      // Exit if y=0
677;;
678}
679
680{ .mfi
681           nop.m 999
682           fma.s1       atan2_pd        = atan2_P0, atan2_d, f0
683           nop.i 999
684}
685{ .mfi
686           nop.m 999
687           fma.s1       atan2_dsq       = atan2_d, atan2_d, f0
688           nop.i 999
689;;
690}
691
692
693{ .mfi
694           nop.m 999
695           fmerge.se    atan2_near_one = f1, atan2_sig_near_one // Const ~1.0
696           nop.i 999
697}
698{ .mfi
699           nop.m 999
700           fma.s1       atan2_Pp        = atan2_W12, atan2_W1, atan2_V1
701           nop.i 999
702;;
703}
704
705{ .mfi
706           nop.m 999
707           fma.s1       atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0
708           nop.i 999
709}
710{ .mfi
711           nop.m 999
712           fma.s1       atan2_A_lo      = atan2_pd, atan2_dsq, atan2_d
713           nop.i 999
714;;
715}
716
717
718{ .mfi
719           nop.m 999
720           fma.s1       atan2_A_hi      = atan2_zcub, atan2_Pp, atan2_z
721           nop.i 999
722;;
723}
724
725
726{ .mfi
727           nop.m 999
728(p6)       fma.s1       atan2_A         = atan2_A_hi, f1, atan2_A_lo
729           nop.i 999
730}
731// For |Y| <= |X| and X > 0, result is A_hi + A_lo
732{ .mfi
733           nop.m 999
734(p7)       fma.d.s0       f8         = atan2_A_hi, f1, atan2_A_lo
735           nop.i 999
736;;
737}
738
739// For |Y| > |X|, result is  +- pi/2 - (A_hi + A_lo)
740// We perturb A by multiplying by 1.0+1ulp as we produce the result
741// in order to get symmetrically rounded results in directed rounding modes.
742// If we don't do this, there are a few cases where the trailing 11 bits of
743// the significand of the result, before converting to double, are zero.  These
744// cases do not round symmetrically in round to +infinity or round to -infinity.
745{ .mfb
746           nop.m 999
747(p6)       fnma.d.s0      f8        = atan2_A, atan2_near_one, atan2_sgn_pi_by_2
748           br.ret.sptk  b0
749;;
750}
751
752GLOBAL_LIBM_END(atan)
753libm_alias_double_other (atan, atan)
754