1.file "acosh.s"
2
3
4// Copyright (c) 2000 - 2005, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39// ==============================================================
40// History
41// ==============================================================
42// 03/23/01 Initial version
43// 04/19/01 Improved speed of the paths #1,2,3,4,5
44// 05/20/02 Cleaned up namespace and sf0 syntax
45// 02/06/03 Reordered header: .section, .global, .proc, .align
46// 05/14/03 Improved performance, set denormal flag for unorms >= 1.0
47// 03/31/05 Reformatted delimiters between data tables
48//
49// API
50// ==============================================================
51// double acosh(double)
52//
53// Overview of operation
54// ==============================================================
55//
56// There are 7 paths:
57// 1. x = 1.0
58//    Return acosh(x) = 0.0
59// 2. 1.0 < x < 1.000499725341796875(0x3FF0020C00000000)
60//    Return acosh(x) = sqrt(x-1) * Pol4(x), where Pol4(x) =
61//      (((x*C4 + C3)*(x-1) + C2)*(x-1) + C1)*(x-1) + C0
62
63// 3. 1.000499725341796875(0x3FF0020C00000000) <= x < 2^63
64//    Return acosh(x) = log(x + sqrt(x^2 -1.0))
65//    To compute x + sqrt(x^2 -1.0) modified Newton Raphson method is used
66//      (3 iterations)
67//    Algorithm description for log function see below.
68//
69// 4. 2^63 <= x < +INF
70//    Return acosh(x) = log(2*x)
71//    Algorithm description for log function see below.
72//
73// 5. x = +INF
74//    Return acosh(x) = +INF
75//
76// 6. x = [S,Q]NaN
77//    Return acosh(x) = QNaN
78//
79// 7. x < 1.0
80//    It's domain error. Error handler with tag = 136 is called
81//
82//==============================================================
83// Algorithm Description for log(x) function
84// Below we are using the fact that inequality x - 1.0 > 2^(-6) is always
85//   true for this acosh implementation
86//
87// Consider  x = 2^N 1.f1 f2 f3 f4...f63
88// Log(x) = log(frcpa(x) x/frcpa(x))
89//        = log(1/frcpa(x)) + log(frcpa(x) x)
90//        = -log(frcpa(x)) + log(frcpa(x) x)
91//
92// frcpa(x)       = 2^-N frcpa((1.f1 f2 ... f63)
93//
94// -log(frcpa(x)) = -log(C)
95//                = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
96//
97// -log(frcpa(x)) = -log(C)
98//                = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
99//
100// -log(frcpa(x)) = -log(C)
101//                = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
102//
103// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
104//
105// Log(x) =  +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
106// Log(x) =  +Nlog2 - log(/frcpa(1.f1 f2 ... f63))   + log(frcpa(x) x)
107// Log(x) =  +Nlog2 + T                              + log(frcpa(x) x)
108//
109// Log(x) =  +Nlog2 + T                     + log(C x)
110//
111// Cx = 1 + r
112//
113// Log(x) =  +Nlog2 + T  + log(1+r)
114// Log(x) =  +Nlog2 + T  + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
115//
116// 1.f1 f2 ... f8 has 256 entries.
117// They are 1 + k/2^8, k = 0 ... 255
118// These 256 values are the table entries.
119//
120// Implementation
121//==============================================================
122// C = frcpa(x)
123// r = C * x - 1
124//
125// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4 + P4*r^5 + P5*r^6
126//
127// x = f * 2*n where f is 1.f_1f_2f_3....f_63
128// Nfloat = float(n)  where n is the true unbiased exponent
129// pre-index = f_1f_2....f_8
130// index = pre_index * 16
131// get the dxt table entry at index + offset = T
132//
133// result = (T + Nfloat * log(2)) + rseries
134//
135// The T table is calculated as follows
136// Form x_k = 1 + k/2^8 where k goes from 0... 255
137//      y_k = frcpa(x_k)
138//      log(1/y_k)  in quad and round to double-extended
139//
140
141// Registers used
142//==============================================================
143// Floating Point registers used:
144// f8, input
145// f9 -> f15,  f32 -> f65
146
147// General registers used:
148// r14 -> r27, r32 -> r39
149
150// Predicate registers used:
151// p6 -> p15
152
153// p6 to filter out case when x = [Q,S]NaN
154// p7,p8 to filter out case when x < 1.0
155// p10 to select path #1
156// p11 to filter out case when x = +INF
157// p12 used in the frcpa
158// p13 to select path #4
159// p14,p15 to select path #2
160
161// Assembly macros
162//==============================================================
163log_GR_exp_17_ones    = r14
164log_GR_signexp_f8     = r15
165log_table_address2    = r16
166log_GR_exp_16_ones    = r17
167log_GR_exp_f8         = r18
168log_GR_true_exp_f8    = r19
169log_GR_significand_f8 = r20
170log_GR_index          = r21
171log_GR_comp2          = r22
172acosh_GR_f8           = r23
173log_GR_comp           = r24
174acosh_GR_f8_sig       = r25
175log_table_address3    = r26
176NR_table_address      = r27
177
178GR_SAVE_B0            = r33
179GR_SAVE_GP            = r34
180GR_SAVE_PFS           = r35
181
182GR_Parameter_X        = r36
183GR_Parameter_Y        = r37
184GR_Parameter_RESULT   = r38
185acosh_GR_tag          = r39
186
187//==============================================================
188log_y            = f9
189NR1              = f10
190NR2              = f11
191log_y_rs         = f12
192log_y_rs_iter    = f13
193log_y_rs_iter1   = f14
194log_NORM_f8      = f15
195acosh_comp       = f32
196log_w            = f34
197log_P5           = f35
198log_P4           = f36
199log_P3           = f37
200log_P2           = f38
201log_P1           = f39
202log_C0           = f40
203log_C1           = f41
204log_C2           = f42
205log2             = f43
206acosh_w_rs       = f44
207log_C            = f45
208log_arg          = f46
209acosh_w_iter1    = f47
210acosh_w_iter2    = f48
211log_int_Nfloat   = f49
212log_r            = f50
213log_rsq          = f51
214log_rp_p4        = f52
215log_rp_p32       = f53
216log_rcube        = f54
217log_rp_p10       = f55
218log_rp_p2        = f56
219log_Nfloat       = f57
220log_T            = f58
221log_r2P_r        = f59
222log_T_plus_Nlog2 = f60
223acosh_w_sqrt     = f61
224acosh_w_1        = f62
225log_C3           = f63
226log_C4           = f64
227log_arg_early    = f65
228
229
230// Data tables
231//==============================================================
232
233RODATA
234.align 16
235
236LOCAL_OBJECT_START(log_table_1)
237data8 0x3FF0020C49BA5E35 // 1.0005
238data8 0xBFC5555DA7212371 // P5
239data8 0x3FC999A19EEF5826 // P4
240data8 0xBFCFFFFFFFFEF009 // P3
241data8 0x3FD555555554ECB2 // P2
242data8 0xBFE0000000000000 // P1 = -0.5
243//
244data8 0xb17217f7d1cf79ac, 0x00003ffe  // log2
245LOCAL_OBJECT_END(log_table_1)
246
247LOCAL_OBJECT_START(log_table_2)
248data8 0x3FE0000000000000 // 0.5
249data8 0x4008000000000000 // 3.0
250//
251data8 0xAFE8F9203939CCF8, 0x00003FF6 // C4 3FF6AFE8F9203939CCF8
252data8 0xAD46EB6AE752D809, 0x0000BFF8 // C3 BFF8AD46EB6AE752D809
253data8 0xD93923D7F53F3627, 0x00003FF9 // C2 3FF9D93923D7F53F3627
254data8 0xF15BEEEFF7D32D36, 0x0000BFFB // C1 BFFBF15BEEEFF7D32D36
255data8 0xB504F333F9DE6484, 0x00003FFF // C0 3FFFB504F333F9DE6484
256LOCAL_OBJECT_END(log_table_2)
257
258
259LOCAL_OBJECT_START(log_table_3)
260data8 0x80200aaeac44ef38 , 0x00003ff6 //   log(1/frcpa(1+  0/2^-8))
261//
262data8 0xc09090a2c35aa070 , 0x00003ff7 //   log(1/frcpa(1+  1/2^-8))
263data8 0xa0c94fcb41977c75 , 0x00003ff8 //   log(1/frcpa(1+  2/2^-8))
264data8 0xe18b9c263af83301 , 0x00003ff8 //   log(1/frcpa(1+  3/2^-8))
265data8 0x8d35c8d6399c30ea , 0x00003ff9 //   log(1/frcpa(1+  4/2^-8))
266data8 0xadd4d2ecd601cbb8 , 0x00003ff9 //   log(1/frcpa(1+  5/2^-8))
267//
268data8 0xce95403a192f9f01 , 0x00003ff9 //   log(1/frcpa(1+  6/2^-8))
269data8 0xeb59392cbcc01096 , 0x00003ff9 //   log(1/frcpa(1+  7/2^-8))
270data8 0x862c7d0cefd54c5d , 0x00003ffa //   log(1/frcpa(1+  8/2^-8))
271data8 0x94aa63c65e70d499 , 0x00003ffa //   log(1/frcpa(1+  9/2^-8))
272data8 0xa54a696d4b62b382 , 0x00003ffa //   log(1/frcpa(1+ 10/2^-8))
273//
274data8 0xb3e4a796a5dac208 , 0x00003ffa //   log(1/frcpa(1+ 11/2^-8))
275data8 0xc28c45b1878340a9 , 0x00003ffa //   log(1/frcpa(1+ 12/2^-8))
276data8 0xd35c55f39d7a6235 , 0x00003ffa //   log(1/frcpa(1+ 13/2^-8))
277data8 0xe220f037b954f1f5 , 0x00003ffa //   log(1/frcpa(1+ 14/2^-8))
278data8 0xf0f3389b036834f3 , 0x00003ffa //   log(1/frcpa(1+ 15/2^-8))
279//
280data8 0xffd3488d5c980465 , 0x00003ffa //   log(1/frcpa(1+ 16/2^-8))
281data8 0x87609ce2ed300490 , 0x00003ffb //   log(1/frcpa(1+ 17/2^-8))
282data8 0x8ede9321e8c85927 , 0x00003ffb //   log(1/frcpa(1+ 18/2^-8))
283data8 0x96639427f2f8e2f4 , 0x00003ffb //   log(1/frcpa(1+ 19/2^-8))
284data8 0x9defad3e8f73217b , 0x00003ffb //   log(1/frcpa(1+ 20/2^-8))
285//
286data8 0xa582ebd50097029c , 0x00003ffb //   log(1/frcpa(1+ 21/2^-8))
287data8 0xac06dbe75ab80fee , 0x00003ffb //   log(1/frcpa(1+ 22/2^-8))
288data8 0xb3a78449b2d3ccca , 0x00003ffb //   log(1/frcpa(1+ 23/2^-8))
289data8 0xbb4f79635ab46bb2 , 0x00003ffb //   log(1/frcpa(1+ 24/2^-8))
290data8 0xc2fec93a83523f3f , 0x00003ffb //   log(1/frcpa(1+ 25/2^-8))
291//
292data8 0xc99af2eaca4c4571 , 0x00003ffb //   log(1/frcpa(1+ 26/2^-8))
293data8 0xd1581106472fa653 , 0x00003ffb //   log(1/frcpa(1+ 27/2^-8))
294data8 0xd8002560d4355f2e , 0x00003ffb //   log(1/frcpa(1+ 28/2^-8))
295data8 0xdfcb43b4fe508632 , 0x00003ffb //   log(1/frcpa(1+ 29/2^-8))
296data8 0xe67f6dff709d4119 , 0x00003ffb //   log(1/frcpa(1+ 30/2^-8))
297//
298data8 0xed393b1c22351280 , 0x00003ffb //   log(1/frcpa(1+ 31/2^-8))
299data8 0xf5192bff087bcc35 , 0x00003ffb //   log(1/frcpa(1+ 32/2^-8))
300data8 0xfbdf4ff6dfef2fa3 , 0x00003ffb //   log(1/frcpa(1+ 33/2^-8))
301data8 0x81559a97f92f9cc7 , 0x00003ffc //   log(1/frcpa(1+ 34/2^-8))
302data8 0x84be72bce90266e8 , 0x00003ffc //   log(1/frcpa(1+ 35/2^-8))
303//
304data8 0x88bc74113f23def2 , 0x00003ffc //   log(1/frcpa(1+ 36/2^-8))
305data8 0x8c2ba3edf6799d11 , 0x00003ffc //   log(1/frcpa(1+ 37/2^-8))
306data8 0x8f9dc92f92ea08b1 , 0x00003ffc //   log(1/frcpa(1+ 38/2^-8))
307data8 0x9312e8f36efab5a7 , 0x00003ffc //   log(1/frcpa(1+ 39/2^-8))
308data8 0x968b08643409ceb6 , 0x00003ffc //   log(1/frcpa(1+ 40/2^-8))
309//
310data8 0x9a062cba08a1708c , 0x00003ffc //   log(1/frcpa(1+ 41/2^-8))
311data8 0x9d845b3abf95485c , 0x00003ffc //   log(1/frcpa(1+ 42/2^-8))
312data8 0xa06fd841bc001bb4 , 0x00003ffc //   log(1/frcpa(1+ 43/2^-8))
313data8 0xa3f3a74652fbe0db , 0x00003ffc //   log(1/frcpa(1+ 44/2^-8))
314data8 0xa77a8fb2336f20f5 , 0x00003ffc //   log(1/frcpa(1+ 45/2^-8))
315//
316data8 0xab0497015d28b0a0 , 0x00003ffc //   log(1/frcpa(1+ 46/2^-8))
317data8 0xae91c2be6ba6a615 , 0x00003ffc //   log(1/frcpa(1+ 47/2^-8))
318data8 0xb189d1b99aebb20b , 0x00003ffc //   log(1/frcpa(1+ 48/2^-8))
319data8 0xb51cced5de9c1b2c , 0x00003ffc //   log(1/frcpa(1+ 49/2^-8))
320data8 0xb819bee9e720d42f , 0x00003ffc //   log(1/frcpa(1+ 50/2^-8))
321//
322data8 0xbbb2a0947b093a5d , 0x00003ffc //   log(1/frcpa(1+ 51/2^-8))
323data8 0xbf4ec1505811684a , 0x00003ffc //   log(1/frcpa(1+ 52/2^-8))
324data8 0xc2535bacfa8975ff , 0x00003ffc //   log(1/frcpa(1+ 53/2^-8))
325data8 0xc55a3eafad187eb8 , 0x00003ffc //   log(1/frcpa(1+ 54/2^-8))
326data8 0xc8ff2484b2c0da74 , 0x00003ffc //   log(1/frcpa(1+ 55/2^-8))
327//
328data8 0xcc0b1a008d53ab76 , 0x00003ffc //   log(1/frcpa(1+ 56/2^-8))
329data8 0xcfb6203844b3209b , 0x00003ffc //   log(1/frcpa(1+ 57/2^-8))
330data8 0xd2c73949a47a19f5 , 0x00003ffc //   log(1/frcpa(1+ 58/2^-8))
331data8 0xd5daae18b49d6695 , 0x00003ffc //   log(1/frcpa(1+ 59/2^-8))
332data8 0xd8f08248cf7e8019 , 0x00003ffc //   log(1/frcpa(1+ 60/2^-8))
333//
334data8 0xdca7749f1b3e540e , 0x00003ffc //   log(1/frcpa(1+ 61/2^-8))
335data8 0xdfc28e033aaaf7c7 , 0x00003ffc //   log(1/frcpa(1+ 62/2^-8))
336data8 0xe2e012a5f91d2f55 , 0x00003ffc //   log(1/frcpa(1+ 63/2^-8))
337data8 0xe600064ed9e292a8 , 0x00003ffc //   log(1/frcpa(1+ 64/2^-8))
338data8 0xe9226cce42b39f60 , 0x00003ffc //   log(1/frcpa(1+ 65/2^-8))
339//
340data8 0xec4749fd97a28360 , 0x00003ffc //   log(1/frcpa(1+ 66/2^-8))
341data8 0xef6ea1bf57780495 , 0x00003ffc //   log(1/frcpa(1+ 67/2^-8))
342data8 0xf29877ff38809091 , 0x00003ffc //   log(1/frcpa(1+ 68/2^-8))
343data8 0xf5c4d0b245cb89be , 0x00003ffc //   log(1/frcpa(1+ 69/2^-8))
344data8 0xf8f3afd6fcdef3aa , 0x00003ffc //   log(1/frcpa(1+ 70/2^-8))
345//
346data8 0xfc2519756be1abc7 , 0x00003ffc //   log(1/frcpa(1+ 71/2^-8))
347data8 0xff59119f503e6832 , 0x00003ffc //   log(1/frcpa(1+ 72/2^-8))
348data8 0x8147ce381ae0e146 , 0x00003ffd //   log(1/frcpa(1+ 73/2^-8))
349data8 0x82e45f06cb1ad0f2 , 0x00003ffd //   log(1/frcpa(1+ 74/2^-8))
350data8 0x842f5c7c573cbaa2 , 0x00003ffd //   log(1/frcpa(1+ 75/2^-8))
351//
352data8 0x85ce471968c8893a , 0x00003ffd //   log(1/frcpa(1+ 76/2^-8))
353data8 0x876e8305bc04066d , 0x00003ffd //   log(1/frcpa(1+ 77/2^-8))
354data8 0x891012678031fbb3 , 0x00003ffd //   log(1/frcpa(1+ 78/2^-8))
355data8 0x8a5f1493d766a05f , 0x00003ffd //   log(1/frcpa(1+ 79/2^-8))
356data8 0x8c030c778c56fa00 , 0x00003ffd //   log(1/frcpa(1+ 80/2^-8))
357//
358data8 0x8da85df17e31d9ae , 0x00003ffd //   log(1/frcpa(1+ 81/2^-8))
359data8 0x8efa663e7921687e , 0x00003ffd //   log(1/frcpa(1+ 82/2^-8))
360data8 0x90a22b6875c6a1f8 , 0x00003ffd //   log(1/frcpa(1+ 83/2^-8))
361data8 0x91f62cc8f5d24837 , 0x00003ffd //   log(1/frcpa(1+ 84/2^-8))
362data8 0x93a06cfc3857d980 , 0x00003ffd //   log(1/frcpa(1+ 85/2^-8))
363//
364data8 0x94f66d5e6fd01ced , 0x00003ffd //   log(1/frcpa(1+ 86/2^-8))
365data8 0x96a330156e6772f2 , 0x00003ffd //   log(1/frcpa(1+ 87/2^-8))
366data8 0x97fb3582754ea25b , 0x00003ffd //   log(1/frcpa(1+ 88/2^-8))
367data8 0x99aa8259aad1bbf2 , 0x00003ffd //   log(1/frcpa(1+ 89/2^-8))
368data8 0x9b0492f6227ae4a8 , 0x00003ffd //   log(1/frcpa(1+ 90/2^-8))
369//
370data8 0x9c5f8e199bf3a7a5 , 0x00003ffd //   log(1/frcpa(1+ 91/2^-8))
371data8 0x9e1293b9998c1daa , 0x00003ffd //   log(1/frcpa(1+ 92/2^-8))
372data8 0x9f6fa31e0b41f308 , 0x00003ffd //   log(1/frcpa(1+ 93/2^-8))
373data8 0xa0cda11eaf46390e , 0x00003ffd //   log(1/frcpa(1+ 94/2^-8))
374data8 0xa22c8f029cfa45aa , 0x00003ffd //   log(1/frcpa(1+ 95/2^-8))
375//
376data8 0xa3e48badb7856b34 , 0x00003ffd //   log(1/frcpa(1+ 96/2^-8))
377data8 0xa5459a0aa95849f9 , 0x00003ffd //   log(1/frcpa(1+ 97/2^-8))
378data8 0xa6a79c84480cfebd , 0x00003ffd //   log(1/frcpa(1+ 98/2^-8))
379data8 0xa80a946d0fcb3eb2 , 0x00003ffd //   log(1/frcpa(1+ 99/2^-8))
380data8 0xa96e831a3ea7b314 , 0x00003ffd //   log(1/frcpa(1+100/2^-8))
381//
382data8 0xaad369e3dc544e3b , 0x00003ffd //   log(1/frcpa(1+101/2^-8))
383data8 0xac92e9588952c815 , 0x00003ffd //   log(1/frcpa(1+102/2^-8))
384data8 0xadfa035aa1ed8fdc , 0x00003ffd //   log(1/frcpa(1+103/2^-8))
385data8 0xaf6219eae1ad6e34 , 0x00003ffd //   log(1/frcpa(1+104/2^-8))
386data8 0xb0cb2e6d8160f753 , 0x00003ffd //   log(1/frcpa(1+105/2^-8))
387//
388data8 0xb2354249ad950f72 , 0x00003ffd //   log(1/frcpa(1+106/2^-8))
389data8 0xb3a056e98ef4a3b4 , 0x00003ffd //   log(1/frcpa(1+107/2^-8))
390data8 0xb50c6dba52c6292a , 0x00003ffd //   log(1/frcpa(1+108/2^-8))
391data8 0xb679882c33876165 , 0x00003ffd //   log(1/frcpa(1+109/2^-8))
392data8 0xb78c07429785cedc , 0x00003ffd //   log(1/frcpa(1+110/2^-8))
393//
394data8 0xb8faeb8dc4a77d24 , 0x00003ffd //   log(1/frcpa(1+111/2^-8))
395data8 0xba6ad77eb36ae0d6 , 0x00003ffd //   log(1/frcpa(1+112/2^-8))
396data8 0xbbdbcc915e9bee50 , 0x00003ffd //   log(1/frcpa(1+113/2^-8))
397data8 0xbd4dcc44f8cf12ef , 0x00003ffd //   log(1/frcpa(1+114/2^-8))
398data8 0xbec0d81bf5b531fa , 0x00003ffd //   log(1/frcpa(1+115/2^-8))
399//
400data8 0xc034f19c139186f4 , 0x00003ffd //   log(1/frcpa(1+116/2^-8))
401data8 0xc14cb69f7c5e55ab , 0x00003ffd //   log(1/frcpa(1+117/2^-8))
402data8 0xc2c2abbb6e5fd56f , 0x00003ffd //   log(1/frcpa(1+118/2^-8))
403data8 0xc439b2c193e6771e , 0x00003ffd //   log(1/frcpa(1+119/2^-8))
404data8 0xc553acb9d5c67733 , 0x00003ffd //   log(1/frcpa(1+120/2^-8))
405//
406data8 0xc6cc96e441272441 , 0x00003ffd //   log(1/frcpa(1+121/2^-8))
407data8 0xc8469753eca88c30 , 0x00003ffd //   log(1/frcpa(1+122/2^-8))
408data8 0xc962cf3ce072b05c , 0x00003ffd //   log(1/frcpa(1+123/2^-8))
409data8 0xcadeba8771f694aa , 0x00003ffd //   log(1/frcpa(1+124/2^-8))
410data8 0xcc5bc08d1f72da94 , 0x00003ffd //   log(1/frcpa(1+125/2^-8))
411//
412data8 0xcd7a3f99ea035c29 , 0x00003ffd //   log(1/frcpa(1+126/2^-8))
413data8 0xcef93860c8a53c35 , 0x00003ffd //   log(1/frcpa(1+127/2^-8))
414data8 0xd0192f68a7ed23df , 0x00003ffd //   log(1/frcpa(1+128/2^-8))
415data8 0xd19a201127d3c645 , 0x00003ffd //   log(1/frcpa(1+129/2^-8))
416data8 0xd2bb92f4061c172c , 0x00003ffd //   log(1/frcpa(1+130/2^-8))
417//
418data8 0xd43e80b2ee8cc8fc , 0x00003ffd //   log(1/frcpa(1+131/2^-8))
419data8 0xd56173601fc4ade4 , 0x00003ffd //   log(1/frcpa(1+132/2^-8))
420data8 0xd6e6637efb54086f , 0x00003ffd //   log(1/frcpa(1+133/2^-8))
421data8 0xd80ad9f58f3c8193 , 0x00003ffd //   log(1/frcpa(1+134/2^-8))
422data8 0xd991d1d31aca41f8 , 0x00003ffd //   log(1/frcpa(1+135/2^-8))
423//
424data8 0xdab7d02231484a93 , 0x00003ffd //   log(1/frcpa(1+136/2^-8))
425data8 0xdc40d532cde49a54 , 0x00003ffd //   log(1/frcpa(1+137/2^-8))
426data8 0xdd685f79ed8b265e , 0x00003ffd //   log(1/frcpa(1+138/2^-8))
427data8 0xde9094bbc0e17b1d , 0x00003ffd //   log(1/frcpa(1+139/2^-8))
428data8 0xe01c91b78440c425 , 0x00003ffd //   log(1/frcpa(1+140/2^-8))
429//
430data8 0xe14658f26997e729 , 0x00003ffd //   log(1/frcpa(1+141/2^-8))
431data8 0xe270cdc2391e0d23 , 0x00003ffd //   log(1/frcpa(1+142/2^-8))
432data8 0xe3ffce3a2aa64922 , 0x00003ffd //   log(1/frcpa(1+143/2^-8))
433data8 0xe52bdb274ed82887 , 0x00003ffd //   log(1/frcpa(1+144/2^-8))
434data8 0xe6589852e75d7df6 , 0x00003ffd //   log(1/frcpa(1+145/2^-8))
435//
436data8 0xe786068c79937a7d , 0x00003ffd //   log(1/frcpa(1+146/2^-8))
437data8 0xe91903adad100911 , 0x00003ffd //   log(1/frcpa(1+147/2^-8))
438data8 0xea481236f7d35bb0 , 0x00003ffd //   log(1/frcpa(1+148/2^-8))
439data8 0xeb77d48c692e6b14 , 0x00003ffd //   log(1/frcpa(1+149/2^-8))
440data8 0xeca84b83d7297b87 , 0x00003ffd //   log(1/frcpa(1+150/2^-8))
441//
442data8 0xedd977f4962aa158 , 0x00003ffd //   log(1/frcpa(1+151/2^-8))
443data8 0xef7179a22f257754 , 0x00003ffd //   log(1/frcpa(1+152/2^-8))
444data8 0xf0a450d139366ca7 , 0x00003ffd //   log(1/frcpa(1+153/2^-8))
445data8 0xf1d7e0524ff9ffdb , 0x00003ffd //   log(1/frcpa(1+154/2^-8))
446data8 0xf30c29036a8b6cae , 0x00003ffd //   log(1/frcpa(1+155/2^-8))
447//
448data8 0xf4412bc411ea8d92 , 0x00003ffd //   log(1/frcpa(1+156/2^-8))
449data8 0xf576e97564c8619d , 0x00003ffd //   log(1/frcpa(1+157/2^-8))
450data8 0xf6ad62fa1b5f172f , 0x00003ffd //   log(1/frcpa(1+158/2^-8))
451data8 0xf7e499368b55c542 , 0x00003ffd //   log(1/frcpa(1+159/2^-8))
452data8 0xf91c8d10abaffe22 , 0x00003ffd //   log(1/frcpa(1+160/2^-8))
453//
454data8 0xfa553f7018c966f3 , 0x00003ffd //   log(1/frcpa(1+161/2^-8))
455data8 0xfb8eb13e185d802c , 0x00003ffd //   log(1/frcpa(1+162/2^-8))
456data8 0xfcc8e3659d9bcbed , 0x00003ffd //   log(1/frcpa(1+163/2^-8))
457data8 0xfe03d6d34d487fd2 , 0x00003ffd //   log(1/frcpa(1+164/2^-8))
458data8 0xff3f8c7581e9f0ae , 0x00003ffd //   log(1/frcpa(1+165/2^-8))
459//
460data8 0x803e029e280173ae , 0x00003ffe //   log(1/frcpa(1+166/2^-8))
461data8 0x80dca10cc52d0757 , 0x00003ffe //   log(1/frcpa(1+167/2^-8))
462data8 0x817ba200632755a1 , 0x00003ffe //   log(1/frcpa(1+168/2^-8))
463data8 0x821b05f3b01d6774 , 0x00003ffe //   log(1/frcpa(1+169/2^-8))
464data8 0x82bacd623ff19d06 , 0x00003ffe //   log(1/frcpa(1+170/2^-8))
465//
466data8 0x835af8c88e7a8f47 , 0x00003ffe //   log(1/frcpa(1+171/2^-8))
467data8 0x83c5f8299e2b4091 , 0x00003ffe //   log(1/frcpa(1+172/2^-8))
468data8 0x8466cb43f3d87300 , 0x00003ffe //   log(1/frcpa(1+173/2^-8))
469data8 0x850803a67c80ca4b , 0x00003ffe //   log(1/frcpa(1+174/2^-8))
470data8 0x85a9a1d11a23b461 , 0x00003ffe //   log(1/frcpa(1+175/2^-8))
471//
472data8 0x864ba644a18e6e05 , 0x00003ffe //   log(1/frcpa(1+176/2^-8))
473data8 0x86ee1182dcc432f7 , 0x00003ffe //   log(1/frcpa(1+177/2^-8))
474data8 0x875a925d7e48c316 , 0x00003ffe //   log(1/frcpa(1+178/2^-8))
475data8 0x87fdaa109d23aef7 , 0x00003ffe //   log(1/frcpa(1+179/2^-8))
476data8 0x88a129ed4becfaf2 , 0x00003ffe //   log(1/frcpa(1+180/2^-8))
477//
478data8 0x89451278ecd7f9cf , 0x00003ffe //   log(1/frcpa(1+181/2^-8))
479data8 0x89b29295f8432617 , 0x00003ffe //   log(1/frcpa(1+182/2^-8))
480data8 0x8a572ac5a5496882 , 0x00003ffe //   log(1/frcpa(1+183/2^-8))
481data8 0x8afc2d0ce3b2dadf , 0x00003ffe //   log(1/frcpa(1+184/2^-8))
482data8 0x8b6a69c608cfd3af , 0x00003ffe //   log(1/frcpa(1+185/2^-8))
483//
484data8 0x8c101e106e899a83 , 0x00003ffe //   log(1/frcpa(1+186/2^-8))
485data8 0x8cb63de258f9d626 , 0x00003ffe //   log(1/frcpa(1+187/2^-8))
486data8 0x8d2539c5bd19e2b1 , 0x00003ffe //   log(1/frcpa(1+188/2^-8))
487data8 0x8dcc0e064b29e6f1 , 0x00003ffe //   log(1/frcpa(1+189/2^-8))
488data8 0x8e734f45d88357ae , 0x00003ffe //   log(1/frcpa(1+190/2^-8))
489//
490data8 0x8ee30cef034a20db , 0x00003ffe //   log(1/frcpa(1+191/2^-8))
491data8 0x8f8b0515686d1d06 , 0x00003ffe //   log(1/frcpa(1+192/2^-8))
492data8 0x90336bba039bf32f , 0x00003ffe //   log(1/frcpa(1+193/2^-8))
493data8 0x90a3edd23d1c9d58 , 0x00003ffe //   log(1/frcpa(1+194/2^-8))
494data8 0x914d0de2f5d61b32 , 0x00003ffe //   log(1/frcpa(1+195/2^-8))
495//
496data8 0x91be0c20d28173b5 , 0x00003ffe //   log(1/frcpa(1+196/2^-8))
497data8 0x9267e737c06cd34a , 0x00003ffe //   log(1/frcpa(1+197/2^-8))
498data8 0x92d962ae6abb1237 , 0x00003ffe //   log(1/frcpa(1+198/2^-8))
499data8 0x9383fa6afbe2074c , 0x00003ffe //   log(1/frcpa(1+199/2^-8))
500data8 0x942f0421651c1c4e , 0x00003ffe //   log(1/frcpa(1+200/2^-8))
501//
502data8 0x94a14a3845bb985e , 0x00003ffe //   log(1/frcpa(1+201/2^-8))
503data8 0x954d133857f861e7 , 0x00003ffe //   log(1/frcpa(1+202/2^-8))
504data8 0x95bfd96468e604c4 , 0x00003ffe //   log(1/frcpa(1+203/2^-8))
505data8 0x9632d31cafafa858 , 0x00003ffe //   log(1/frcpa(1+204/2^-8))
506data8 0x96dfaabd86fa1647 , 0x00003ffe //   log(1/frcpa(1+205/2^-8))
507//
508data8 0x9753261fcbb2a594 , 0x00003ffe //   log(1/frcpa(1+206/2^-8))
509data8 0x9800c11b426b996d , 0x00003ffe //   log(1/frcpa(1+207/2^-8))
510data8 0x9874bf4d45ae663c , 0x00003ffe //   log(1/frcpa(1+208/2^-8))
511data8 0x99231f5ee9a74f79 , 0x00003ffe //   log(1/frcpa(1+209/2^-8))
512data8 0x9997a18a56bcad28 , 0x00003ffe //   log(1/frcpa(1+210/2^-8))
513//
514data8 0x9a46c873a3267e79 , 0x00003ffe //   log(1/frcpa(1+211/2^-8))
515data8 0x9abbcfc621eb6cb6 , 0x00003ffe //   log(1/frcpa(1+212/2^-8))
516data8 0x9b310cb0d354c990 , 0x00003ffe //   log(1/frcpa(1+213/2^-8))
517data8 0x9be14cf9e1b3515c , 0x00003ffe //   log(1/frcpa(1+214/2^-8))
518data8 0x9c5710b8cbb73a43 , 0x00003ffe //   log(1/frcpa(1+215/2^-8))
519//
520data8 0x9ccd0abd301f399c , 0x00003ffe //   log(1/frcpa(1+216/2^-8))
521data8 0x9d7e67f3bdce8888 , 0x00003ffe //   log(1/frcpa(1+217/2^-8))
522data8 0x9df4ea81a99daa01 , 0x00003ffe //   log(1/frcpa(1+218/2^-8))
523data8 0x9e6ba405a54514ba , 0x00003ffe //   log(1/frcpa(1+219/2^-8))
524data8 0x9f1e21c8c7bb62b3 , 0x00003ffe //   log(1/frcpa(1+220/2^-8))
525//
526data8 0x9f956593f6b6355c , 0x00003ffe //   log(1/frcpa(1+221/2^-8))
527data8 0xa00ce1092e5498c3 , 0x00003ffe //   log(1/frcpa(1+222/2^-8))
528data8 0xa0c08309c4b912c1 , 0x00003ffe //   log(1/frcpa(1+223/2^-8))
529data8 0xa1388a8c6faa2afa , 0x00003ffe //   log(1/frcpa(1+224/2^-8))
530data8 0xa1b0ca7095b5f985 , 0x00003ffe //   log(1/frcpa(1+225/2^-8))
531//
532data8 0xa22942eb47534a00 , 0x00003ffe //   log(1/frcpa(1+226/2^-8))
533data8 0xa2de62326449d0a3 , 0x00003ffe //   log(1/frcpa(1+227/2^-8))
534data8 0xa357690f88bfe345 , 0x00003ffe //   log(1/frcpa(1+228/2^-8))
535data8 0xa3d0a93f45169a4b , 0x00003ffe //   log(1/frcpa(1+229/2^-8))
536data8 0xa44a22f7ffe65f30 , 0x00003ffe //   log(1/frcpa(1+230/2^-8))
537//
538data8 0xa500c5e5b4c1aa36 , 0x00003ffe //   log(1/frcpa(1+231/2^-8))
539data8 0xa57ad064eb2ebbc2 , 0x00003ffe //   log(1/frcpa(1+232/2^-8))
540data8 0xa5f5152dedf4384e , 0x00003ffe //   log(1/frcpa(1+233/2^-8))
541data8 0xa66f9478856233ec , 0x00003ffe //   log(1/frcpa(1+234/2^-8))
542data8 0xa6ea4e7cca02c32e , 0x00003ffe //   log(1/frcpa(1+235/2^-8))
543//
544data8 0xa765437325341ccf , 0x00003ffe //   log(1/frcpa(1+236/2^-8))
545data8 0xa81e21e6c75b4020 , 0x00003ffe //   log(1/frcpa(1+237/2^-8))
546data8 0xa899ab333fe2b9ca , 0x00003ffe //   log(1/frcpa(1+238/2^-8))
547data8 0xa9157039c51ebe71 , 0x00003ffe //   log(1/frcpa(1+239/2^-8))
548data8 0xa991713433c2b999 , 0x00003ffe //   log(1/frcpa(1+240/2^-8))
549//
550data8 0xaa0dae5cbcc048b3 , 0x00003ffe //   log(1/frcpa(1+241/2^-8))
551data8 0xaa8a27ede5eb13ad , 0x00003ffe //   log(1/frcpa(1+242/2^-8))
552data8 0xab06de228a9e3499 , 0x00003ffe //   log(1/frcpa(1+243/2^-8))
553data8 0xab83d135dc633301 , 0x00003ffe //   log(1/frcpa(1+244/2^-8))
554data8 0xac3fb076adc7fe7a , 0x00003ffe //   log(1/frcpa(1+245/2^-8))
555//
556data8 0xacbd3cbbe47988f1 , 0x00003ffe //   log(1/frcpa(1+246/2^-8))
557data8 0xad3b06b1a5dc57c3 , 0x00003ffe //   log(1/frcpa(1+247/2^-8))
558data8 0xadb90e94af887717 , 0x00003ffe //   log(1/frcpa(1+248/2^-8))
559data8 0xae3754a218f7c816 , 0x00003ffe //   log(1/frcpa(1+249/2^-8))
560data8 0xaeb5d9175437afa2 , 0x00003ffe //   log(1/frcpa(1+250/2^-8))
561//
562data8 0xaf349c322e9c7cee , 0x00003ffe //   log(1/frcpa(1+251/2^-8))
563data8 0xafb39e30d1768d1c , 0x00003ffe //   log(1/frcpa(1+252/2^-8))
564data8 0xb032df51c2c93116 , 0x00003ffe //   log(1/frcpa(1+253/2^-8))
565data8 0xb0b25fd3e6035ad9 , 0x00003ffe //   log(1/frcpa(1+254/2^-8))
566data8 0xb1321ff67cba178c , 0x00003ffe //   log(1/frcpa(1+255/2^-8))
567LOCAL_OBJECT_END(log_table_3)
568
569
570.section .text
571GLOBAL_LIBM_ENTRY(acosh)
572
573{ .mfi
574      getf.exp   acosh_GR_f8 = f8
575      fclass.m   p6,p0 = f8, 0xc3                    // Test for x = NaN
576      mov        log_GR_comp2 = 0x1003e
577}
578{ .mfi
579      addl       NR_table_address = @ltoff(log_table_1), gp
580      fms.s1     log_y = f8, f8, f1                  // y = x^2-1
581      nop.i      0
582}
583;;
584
585{ .mfi
586      getf.sig   acosh_GR_f8_sig = f8
587      fclass.m   p11,p0 = f8, 0x21                   // Test for x=+inf
588      mov        log_GR_exp_17_ones = 0x1ffff
589}
590{ .mfi
591      ld8        NR_table_address = [NR_table_address]
592      fms.s1     log_w = f8,f1,f1                    // w = x - 1
593      nop.i      0
594}
595;;
596
597{ .mfi
598      nop.m      0
599      fcmp.lt.s1 p7,p8 = f8, f1            // Test for x<1.0
600      addl       log_GR_comp = 0x10020C,r0 // Upper 21 bits of signif of 1.0005
601}
602{ .mfb
603      mov        log_GR_exp_16_ones = 0xffff         //BIAS
604(p6)  fma.d.s0   f8 = f8,f1,f0      // quietize nan result if x=nan
605(p6)  br.ret.spnt b0                // Exit for x=nan
606}
607;;
608
609{ .mfb
610      //get second table address
611      adds       log_table_address2 = 0x40, NR_table_address
612      fcmp.eq.s1 p10,p0 = f8, f1      // Test for x=+1.0
613(p11) br.ret.spnt b0                  // Exit for x=+inf
614}
615;;
616
617{ .mfi
618      ldfpd      NR1,NR2 = [log_table_address2],16
619      frsqrta.s1 log_y_rs,p0 = log_y  // z=1/sqrt(y)
620      nop.i      0
621}
622{ .mfb
623      nop.m      0
624      fma.s1     log_arg = f8,f1,f8
625(p7)  br.cond.spnt ACOSH_LESS_ONE     // Branch if path 7, x < 1.0
626}
627;;
628
629{ .mfi
630      ldfe       log_C4 = [log_table_address2],16
631(p8)  fcmp.eq.s0 p6,p0 = f8, f0       // Dummy op sets denorm flag if unorm>=1.0
632      nop.i      0
633}
634{ .mfb
635(p8)  cmp.le.unc p13,p0 = log_GR_comp2,acosh_GR_f8
636      nop.f      0
637(p13) br.cond.spnt LOG_COMMON1        // Branch if path 4, x >= 2^63
638}
639;;
640
641{ .mfi
642      ldfe       log_C3 = [log_table_address2],16
643(p10) fmerge.s   f8 = f0, f0          // Return 0 if x=1.0
644      shr.u      acosh_GR_f8_sig = acosh_GR_f8_sig,43
645}
646{ .mib
647      cmp.eq     p14,p0 = log_GR_exp_16_ones,acosh_GR_f8
648      nop.i      0
649(p10) br.ret.spnt b0                  // Exit for x=1.0
650}
651;;
652
653{ .mfi
654      ldfe       log_C2 = [log_table_address2],16
655      frsqrta.s1 acosh_w_rs,p0 = log_w // t=1/sqrt(w)
656      nop.i      0
657}
658{ .mfb
659(p14) cmp.lt.unc p15,p0 = acosh_GR_f8_sig,log_GR_comp
660      nop.f      0
661(p15) br.cond.spnt ACOSH_NEAR_ONE     // Branch if path 2, 1.0 < x < 1.0005
662}
663;;
664
665// Here is main path, 1.0005 <= x < 2^63
666/////////////// The first iteration //////////////////////////////////
667{ .mfi
668      ldfpd      acosh_comp,log_P5 = [NR_table_address],16
669      fma.s1     log_y_rs_iter = log_y_rs,log_y,f0              // y*z
670      nop.i      0
671}
672;;
673
674{ .mfi
675      ldfpd      log_P4,log_P3 = [NR_table_address],16
676      fnma.s1    log_y_rs_iter = log_y_rs_iter,log_y_rs,NR2     // 3-(y*z)*z
677      nop.i      0
678}
679{ .mfi
680      nop.m      0
681      fma.s1     log_y_rs_iter1 = log_y_rs,NR1,f0               // 0.5*z
682      nop.i      0
683}
684;;
685
686{ .mfi
687      ldfpd      log_P2,log_P1 = [NR_table_address],16
688      //(0.5*z)*(3-(y*z)*z)
689      fma.s1     log_y_rs_iter = log_y_rs_iter1,log_y_rs_iter,f0
690      nop.i      0
691}
692;;
693
694/////////////////////////// The second iteration /////////////////////////////
695{ .mfi
696      nop.m      0
697      fma.s1     log_y_rs = log_y_rs_iter,log_y,f0
698      nop.i      0
699}
700;;
701
702{ .mfi
703      nop.m      0
704      fnma.s1    log_y_rs = log_y_rs,log_y_rs_iter,NR2
705      nop.i      0
706}
707{ .mfi
708      nop.m      0
709      fma.s1     log_y_rs_iter1 = log_y_rs_iter,NR1,f0
710      nop.i      0
711}
712;;
713
714{ .mfi
715      nop.m      0
716      //(0.5*z)*(3-(y*z)*z)
717      fma.s1     log_y_rs_iter = log_y_rs_iter1,log_y_rs,f0
718      nop.i      0
719}
720{ .mfi
721      nop.m      0
722      //(0.5*z)*(3-(y*z)*z)
723      fma.s1     log_arg_early = log_y_rs_iter1,log_y_rs,f0
724      nop.i      0
725}
726;;
727
728//////////////////////////////////////// The third iteration /////////////////
729{ .mfi
730      nop.m      0
731      fma.s1     log_y_rs = log_y_rs_iter,log_y,f0
732      nop.i      0
733}
734{ .mfi
735      nop.m      0
736      fma.s1     log_y_rs_iter1 = log_y_rs_iter,NR1,f0
737      nop.i      0
738}
739;;
740
741{ .mfi
742      nop.m      0
743      fma.s1     log_arg_early = log_arg_early,log_y,f8
744      nop.i      0
745}
746;;
747
748{ .mfi
749      nop.m      0
750      fnma.s1    log_y_rs = log_y_rs,log_y_rs_iter,NR2
751      nop.i      0
752}
753{ .mfi
754      nop.m      0
755      fma.s1     log_y_rs_iter1 = log_y_rs_iter1,log_y,f0
756      nop.i      0
757}
758;;
759
760{ .mfi
761      nop.m      0
762      frcpa.s1   log_C,p0 = f1,log_arg_early
763      nop.i      0
764}
765;;
766
767{ .mfi
768      getf.exp   log_GR_signexp_f8 = log_arg_early
769      nop.f      0
770      nop.i      0
771}
772;;
773
774{ .mfi
775      getf.sig   log_GR_significand_f8 = log_arg_early
776      fma.s1     log_arg = log_y_rs_iter1,log_y_rs,f8 // (0.5*z)*(3-(y*z)*z)
777      adds       log_table_address3 = 0x70, NR_table_address
778}
779;;
780
781///////////////////////////////// The end NR iterations /////////////////////
782{ .mfi
783      ldfe       log2 = [NR_table_address],16
784      nop.f      0
785      nop.i      0
786}
787;;
788
789{ .mmi
790      //significant bit destruction
791      and        log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
792;;
793      //BIAS subtraction
794      sub        log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
795      nop.i      0
796}
797;;
798
799{ .mfi
800      setf.sig   log_int_Nfloat = log_GR_true_exp_f8
801      fms.s1     log_r = log_C,log_arg,f1  // C = frcpa(x); r = C * x - 1
802      extr.u     log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
803}
804;;
805
806{ .mmi
807      //pre-index*16 + index
808      shladd     log_table_address3 = log_GR_index,4,log_table_address3
809;;
810      ldfe       log_T = [log_table_address3]
811      nop.i      0
812}
813;;
814
815{ .mfi
816      nop.m      0
817      fma.s1     log_rsq = log_r, log_r, f0         //r^2
818      nop.i      0
819}
820{ .mfi
821      nop.m      0
822      fma.s1     log_rp_p4 = log_P5, log_r, log_P4  //P5*r + P4
823      nop.i      0
824}
825;;
826
827{ .mfi
828      nop.m      0
829      fma.s1     log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
830      nop.i      0
831}
832;;
833
834{ .mfi
835      nop.m      0
836      //convert N to the floating-point format log_Nfloat
837      fcvt.xf    log_Nfloat = log_int_Nfloat
838      nop.i      0
839}
840;;
841
842{ .mfi
843      nop.m      0
844      fma.s1     log_rcube = log_rsq, log_r, f0      //r^3
845      nop.i      0
846}
847{ .mfi
848      nop.m      0
849      fma.s1     log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r
850      nop.i      0
851}
852;;
853
854{ .mfi
855      nop.m      0
856      //(P5*r + P4)*r^2 + P3*r + P2
857      fma.s1     log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32
858      nop.i      0
859}
860;;
861
862{ .mfi
863      nop.m      0
864      fma.s1     log_T_plus_Nlog2 = log_Nfloat,log2,log_T    //N*log2 + T
865      nop.i      0
866}
867{ .mfi
868      nop.m      0
869      //((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r
870      fma.s1     log_r2P_r = log_rp_p2, log_rcube, log_rp_p10
871      nop.i      0
872}
873;;
874
875{ .mfb
876      nop.m      0
877      // N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r
878      fadd.d.s0  f8 = log_T_plus_Nlog2, log_r2P_r
879      br.ret.sptk b0           // Exit main path, path 3: 1.0005 <= x < 2^63
880}
881;;
882
883// Here if path 2, 1.0 < x < 1.0005
884ACOSH_NEAR_ONE:
885// The first NR iteration
886{ .mfi
887      ldfe       log_C1 = [log_table_address2],16
888      fma.s1     acosh_w_iter1 = acosh_w_rs,log_w,f0  //t*w
889      nop.i      0
890}
891{ .mfi
892      nop.m      0
893      fma.s1     acosh_w_1 = f8,log_C4,log_C3         //x*C4 + C3
894      nop.i      0
895}
896;;
897
898{ .mfi
899      ldfe       log_C0 = [log_table_address2],16
900      fma.s1     acosh_w_iter2 = acosh_w_rs,NR1,f0    //t*0.5
901      nop.i      0
902}
903{ .mfi
904      nop.m      0
905      fnma.s1    acosh_w_iter1 = acosh_w_iter1,acosh_w_rs,NR2 //3-t*t*w
906      nop.i      0
907}
908;;
909
910{ .mfi
911      nop.m      0
912      //(3-t*t*w)*t*0.5
913      fma.s1     acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
914      nop.i      0
915}
916{ .mfi
917      nop.m      0
918      fma.s1     acosh_w_1 = acosh_w_1,log_w,log_C2 //(x*C4 + C3)*(x-1) + C2
919      nop.i      0
920}
921;;
922
923// The second NR iteration
924{ .mfi
925      nop.m      0
926      fma.s1     acosh_w_rs = acosh_w_iter2,log_w,f0  //t*w
927      nop.i      0
928}
929{ .mfi
930      nop.m      0
931      //((x*C4 + C3)*(x-1) + C2)*(x-1) + C1
932      fma.s1     acosh_w_1 = acosh_w_1,log_w,log_C1
933      nop.i      0
934}
935;;
936
937{ .mfi
938      nop.m      0
939      fnma.s1    acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2
940      nop.i      0
941}
942{ .mfi
943      nop.m      0
944      fma.s1     acosh_w_iter2 = acosh_w_iter2,NR1,f0
945      nop.i      0
946}
947;;
948
949{ .mfi
950      nop.m      0
951      fma.s1     acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
952      nop.i      0
953}
954{ .mfi
955      nop.m      0
956      //(((x*C4 + C3)*(x-1) + C2)*(x-1) + C1)*(x-1) + C0
957      fma.s1     acosh_w_1 = acosh_w_1,log_w,log_C0
958      nop.i      0
959}
960;;
961
962//The third NR iteration
963{ .mfi
964      nop.m      0
965      fma.s1     acosh_w_rs = acosh_w_iter2,log_w,f0  //t*w
966      nop.i      0
967}
968;;
969
970{ .mfi
971      nop.m      0
972      fnma.s1    acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2
973      nop.i      0
974}
975{ .mfi
976      nop.m      0
977      fma.s1     acosh_w_iter2 = acosh_w_iter2,NR1,f0
978      nop.i      0
979}
980;;
981
982{ .mfi
983      nop.m      0
984      fma.s1     acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
985      nop.i      0
986}
987;;
988
989{ .mfi
990      nop.m      0
991      fma.s1     acosh_w_sqrt = acosh_w_iter2,log_w,f0
992      nop.i      0
993}
994;;
995
996{ .mfb
997      nop.m      0
998      fma.d.s0   f8 = acosh_w_1,acosh_w_sqrt,f0
999      br.ret.sptk b0               // Exit path 2, 1.0 < x < 1.0005
1000}
1001;;
1002
1003// Here if path 4, x >= 2^63
1004LOG_COMMON1:
1005{ .mfi
1006      ldfpd      acosh_comp,log_P5 = [NR_table_address],16
1007      frcpa.s1   log_C,p0 = f1,log_arg
1008      nop.i      0
1009}
1010;;
1011
1012{ .mmi
1013      getf.exp   log_GR_signexp_f8 = log_arg
1014      ldfpd      log_P4,log_P3 = [NR_table_address],16
1015      nop.i      0
1016}
1017;;
1018
1019{ .mmi
1020      getf.sig   log_GR_significand_f8 = log_arg
1021      ldfpd      log_P2,log_P1 = [NR_table_address],16
1022      nop.i      0
1023}
1024;;
1025
1026{ .mfi
1027      adds       log_table_address3 = 0x70, NR_table_address
1028      nop.f      0
1029      //significant bit destruction
1030      and        log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
1031}
1032;;
1033
1034{ .mmf
1035      ldfe       log2 = [NR_table_address],16
1036      //BIAS subtraction
1037      sub        log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
1038      fms.s1     log_r = log_C,log_arg,f1  // C = frcpa(x); r = C * x - 1
1039}
1040;;
1041
1042{ .mfi
1043      setf.sig   log_int_Nfloat = log_GR_true_exp_f8
1044      nop.f      0
1045      extr.u     log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
1046}
1047;;
1048
1049{ .mmi
1050      //pre-index*16 + index
1051      shladd     log_table_address3 = log_GR_index,4,log_table_address3
1052;;
1053      ldfe       log_T = [log_table_address3]
1054      nop.i      0
1055}
1056;;
1057
1058{ .mfi
1059      nop.m      0
1060      fma.s1     log_rsq = log_r, log_r, f0         //r^2
1061      nop.i      0
1062}
1063{ .mfi
1064      nop.m      0
1065      fma.s1     log_rp_p4 = log_P5, log_r, log_P4  //P5*r + P4
1066      nop.i      0
1067}
1068;;
1069
1070{ .mfi
1071      nop.m      0
1072      fma.s1     log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
1073      nop.i      0
1074}
1075;;
1076
1077{ .mfi
1078      nop.m      0
1079      fma.s1     log_rcube = log_rsq, log_r, f0     //r^3
1080      nop.i      0
1081}
1082{ .mfi
1083      nop.m      0
1084      fma.s1     log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r
1085      nop.i      0
1086}
1087;;
1088
1089{ .mfi
1090      nop.m      0
1091      //convert N to the floating-point format log_Nfloat
1092      fcvt.xf    log_Nfloat = log_int_Nfloat
1093      nop.i      0
1094}
1095{ .mfi
1096      nop.m      0
1097      //(P5*r + P4)*r^2 + P3*r + P2
1098      fma.s1     log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32
1099      nop.i      0
1100}
1101;;
1102
1103{ .mfi
1104      nop.m      0
1105      fma.s1     log_T_plus_Nlog2 = log_Nfloat,log2,log_T    //N*log2 + T
1106      nop.i      0
1107}
1108{ .mfi
1109      nop.m      0
1110      //((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r
1111      fma.s1     log_r2P_r = log_rp_p2, log_rcube, log_rp_p10
1112      nop.i      0
1113}
1114;;
1115
1116{ .mfb
1117      nop.m      0
1118      //  N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r
1119      fadd.d.s0  f8 = log_T_plus_Nlog2, log_r2P_r
1120      br.ret.sptk b0              // Exit path 4, x >= 2^63
1121}
1122;;
1123
1124// Here if path 7, x < 1.0
1125ACOSH_LESS_ONE:
1126{ .mfi
1127      alloc      r32 = ar.pfs,1,3,4,0
1128      fmerge.s   f10 = f8,f8
1129      nop.i      0
1130}
1131;;
1132
1133{ .mfb
1134      mov        acosh_GR_tag = 136
1135      frcpa.s0   f8,p0 = f0,f0
1136      br.cond.sptk __libm_error_region
1137}
1138;;
1139
1140GLOBAL_LIBM_END(acosh)
1141libm_alias_double_other (acosh, acosh)
1142
1143
1144LOCAL_LIBM_ENTRY(__libm_error_region)
1145.prologue
1146
1147{ .mfi
1148        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
1149        nop.f 0
1150.save   ar.pfs,GR_SAVE_PFS
1151        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
1152}
1153{ .mfi
1154.fframe 64
1155        add sp=-64,sp                          // Create new stack
1156        nop.f 0
1157        mov GR_SAVE_GP=gp                      // Save gp
1158};;
1159
1160{ .mmi
1161        stfd [GR_Parameter_Y] = f1,16         // STORE Parameter 2 on stack
1162        add GR_Parameter_X = 16,sp            // Parameter 1 address
1163.save   b0, GR_SAVE_B0
1164        mov GR_SAVE_B0=b0                     // Save b0
1165};;
1166
1167.body
1168{ .mib
1169        stfd [GR_Parameter_X] = f10           // STORE Parameter 1 on stack
1170        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
1171        nop.b 0
1172}
1173{ .mib
1174        stfd [GR_Parameter_Y] = f8            // STORE Parameter 3 on stack
1175        add   GR_Parameter_Y = -16,GR_Parameter_Y
1176        br.call.sptk b0=__libm_error_support# // Call error handling function
1177};;
1178
1179{ .mmi
1180        add   GR_Parameter_RESULT = 48,sp
1181        nop.m 0
1182        nop.i 0
1183};;
1184
1185{ .mmi
1186        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
1187.restore sp
1188        add   sp = 64,sp                       // Restore stack pointer
1189        mov   b0 = GR_SAVE_B0                  // Restore return address
1190};;
1191
1192{ .mib
1193        mov   gp = GR_SAVE_GP                  // Restore gp
1194        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
1195        br.ret.sptk     b0                     // Return
1196};;
1197
1198LOCAL_LIBM_END(__libm_error_region)
1199
1200
1201.type   __libm_error_support#,@function
1202.global __libm_error_support#
1203