1.file "acosh.s" 2 3 4// Copyright (c) 2000 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// ============================================================== 40// History 41// ============================================================== 42// 03/23/01 Initial version 43// 04/19/01 Improved speed of the paths #1,2,3,4,5 44// 05/20/02 Cleaned up namespace and sf0 syntax 45// 02/06/03 Reordered header: .section, .global, .proc, .align 46// 05/14/03 Improved performance, set denormal flag for unorms >= 1.0 47// 03/31/05 Reformatted delimiters between data tables 48// 49// API 50// ============================================================== 51// double acosh(double) 52// 53// Overview of operation 54// ============================================================== 55// 56// There are 7 paths: 57// 1. x = 1.0 58// Return acosh(x) = 0.0 59// 2. 1.0 < x < 1.000499725341796875(0x3FF0020C00000000) 60// Return acosh(x) = sqrt(x-1) * Pol4(x), where Pol4(x) = 61// (((x*C4 + C3)*(x-1) + C2)*(x-1) + C1)*(x-1) + C0 62 63// 3. 1.000499725341796875(0x3FF0020C00000000) <= x < 2^63 64// Return acosh(x) = log(x + sqrt(x^2 -1.0)) 65// To compute x + sqrt(x^2 -1.0) modified Newton Raphson method is used 66// (3 iterations) 67// Algorithm description for log function see below. 68// 69// 4. 2^63 <= x < +INF 70// Return acosh(x) = log(2*x) 71// Algorithm description for log function see below. 72// 73// 5. x = +INF 74// Return acosh(x) = +INF 75// 76// 6. x = [S,Q]NaN 77// Return acosh(x) = QNaN 78// 79// 7. x < 1.0 80// It's domain error. Error handler with tag = 136 is called 81// 82//============================================================== 83// Algorithm Description for log(x) function 84// Below we are using the fact that inequality x - 1.0 > 2^(-6) is always 85// true for this acosh implementation 86// 87// Consider x = 2^N 1.f1 f2 f3 f4...f63 88// Log(x) = log(frcpa(x) x/frcpa(x)) 89// = log(1/frcpa(x)) + log(frcpa(x) x) 90// = -log(frcpa(x)) + log(frcpa(x) x) 91// 92// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63) 93// 94// -log(frcpa(x)) = -log(C) 95// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63)) 96// 97// -log(frcpa(x)) = -log(C) 98// = +Nlog2 - log(frcpa(1.f1 f2 ... f63)) 99// 100// -log(frcpa(x)) = -log(C) 101// = +Nlog2 + log(frcpa(1.f1 f2 ... f63)) 102// 103// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x) 104// 105// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x) 106// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x) 107// Log(x) = +Nlog2 + T + log(frcpa(x) x) 108// 109// Log(x) = +Nlog2 + T + log(C x) 110// 111// Cx = 1 + r 112// 113// Log(x) = +Nlog2 + T + log(1+r) 114// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....) 115// 116// 1.f1 f2 ... f8 has 256 entries. 117// They are 1 + k/2^8, k = 0 ... 255 118// These 256 values are the table entries. 119// 120// Implementation 121//============================================================== 122// C = frcpa(x) 123// r = C * x - 1 124// 125// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4 + P4*r^5 + P5*r^6 126// 127// x = f * 2*n where f is 1.f_1f_2f_3....f_63 128// Nfloat = float(n) where n is the true unbiased exponent 129// pre-index = f_1f_2....f_8 130// index = pre_index * 16 131// get the dxt table entry at index + offset = T 132// 133// result = (T + Nfloat * log(2)) + rseries 134// 135// The T table is calculated as follows 136// Form x_k = 1 + k/2^8 where k goes from 0... 255 137// y_k = frcpa(x_k) 138// log(1/y_k) in quad and round to double-extended 139// 140 141// Registers used 142//============================================================== 143// Floating Point registers used: 144// f8, input 145// f9 -> f15, f32 -> f65 146 147// General registers used: 148// r14 -> r27, r32 -> r39 149 150// Predicate registers used: 151// p6 -> p15 152 153// p6 to filter out case when x = [Q,S]NaN 154// p7,p8 to filter out case when x < 1.0 155// p10 to select path #1 156// p11 to filter out case when x = +INF 157// p12 used in the frcpa 158// p13 to select path #4 159// p14,p15 to select path #2 160 161// Assembly macros 162//============================================================== 163log_GR_exp_17_ones = r14 164log_GR_signexp_f8 = r15 165log_table_address2 = r16 166log_GR_exp_16_ones = r17 167log_GR_exp_f8 = r18 168log_GR_true_exp_f8 = r19 169log_GR_significand_f8 = r20 170log_GR_index = r21 171log_GR_comp2 = r22 172acosh_GR_f8 = r23 173log_GR_comp = r24 174acosh_GR_f8_sig = r25 175log_table_address3 = r26 176NR_table_address = r27 177 178GR_SAVE_B0 = r33 179GR_SAVE_GP = r34 180GR_SAVE_PFS = r35 181 182GR_Parameter_X = r36 183GR_Parameter_Y = r37 184GR_Parameter_RESULT = r38 185acosh_GR_tag = r39 186 187//============================================================== 188log_y = f9 189NR1 = f10 190NR2 = f11 191log_y_rs = f12 192log_y_rs_iter = f13 193log_y_rs_iter1 = f14 194log_NORM_f8 = f15 195acosh_comp = f32 196log_w = f34 197log_P5 = f35 198log_P4 = f36 199log_P3 = f37 200log_P2 = f38 201log_P1 = f39 202log_C0 = f40 203log_C1 = f41 204log_C2 = f42 205log2 = f43 206acosh_w_rs = f44 207log_C = f45 208log_arg = f46 209acosh_w_iter1 = f47 210acosh_w_iter2 = f48 211log_int_Nfloat = f49 212log_r = f50 213log_rsq = f51 214log_rp_p4 = f52 215log_rp_p32 = f53 216log_rcube = f54 217log_rp_p10 = f55 218log_rp_p2 = f56 219log_Nfloat = f57 220log_T = f58 221log_r2P_r = f59 222log_T_plus_Nlog2 = f60 223acosh_w_sqrt = f61 224acosh_w_1 = f62 225log_C3 = f63 226log_C4 = f64 227log_arg_early = f65 228 229 230// Data tables 231//============================================================== 232 233RODATA 234.align 16 235 236LOCAL_OBJECT_START(log_table_1) 237data8 0x3FF0020C49BA5E35 // 1.0005 238data8 0xBFC5555DA7212371 // P5 239data8 0x3FC999A19EEF5826 // P4 240data8 0xBFCFFFFFFFFEF009 // P3 241data8 0x3FD555555554ECB2 // P2 242data8 0xBFE0000000000000 // P1 = -0.5 243// 244data8 0xb17217f7d1cf79ac, 0x00003ffe // log2 245LOCAL_OBJECT_END(log_table_1) 246 247LOCAL_OBJECT_START(log_table_2) 248data8 0x3FE0000000000000 // 0.5 249data8 0x4008000000000000 // 3.0 250// 251data8 0xAFE8F9203939CCF8, 0x00003FF6 // C4 3FF6AFE8F9203939CCF8 252data8 0xAD46EB6AE752D809, 0x0000BFF8 // C3 BFF8AD46EB6AE752D809 253data8 0xD93923D7F53F3627, 0x00003FF9 // C2 3FF9D93923D7F53F3627 254data8 0xF15BEEEFF7D32D36, 0x0000BFFB // C1 BFFBF15BEEEFF7D32D36 255data8 0xB504F333F9DE6484, 0x00003FFF // C0 3FFFB504F333F9DE6484 256LOCAL_OBJECT_END(log_table_2) 257 258 259LOCAL_OBJECT_START(log_table_3) 260data8 0x80200aaeac44ef38 , 0x00003ff6 // log(1/frcpa(1+ 0/2^-8)) 261// 262data8 0xc09090a2c35aa070 , 0x00003ff7 // log(1/frcpa(1+ 1/2^-8)) 263data8 0xa0c94fcb41977c75 , 0x00003ff8 // log(1/frcpa(1+ 2/2^-8)) 264data8 0xe18b9c263af83301 , 0x00003ff8 // log(1/frcpa(1+ 3/2^-8)) 265data8 0x8d35c8d6399c30ea , 0x00003ff9 // log(1/frcpa(1+ 4/2^-8)) 266data8 0xadd4d2ecd601cbb8 , 0x00003ff9 // log(1/frcpa(1+ 5/2^-8)) 267// 268data8 0xce95403a192f9f01 , 0x00003ff9 // log(1/frcpa(1+ 6/2^-8)) 269data8 0xeb59392cbcc01096 , 0x00003ff9 // log(1/frcpa(1+ 7/2^-8)) 270data8 0x862c7d0cefd54c5d , 0x00003ffa // log(1/frcpa(1+ 8/2^-8)) 271data8 0x94aa63c65e70d499 , 0x00003ffa // log(1/frcpa(1+ 9/2^-8)) 272data8 0xa54a696d4b62b382 , 0x00003ffa // log(1/frcpa(1+ 10/2^-8)) 273// 274data8 0xb3e4a796a5dac208 , 0x00003ffa // log(1/frcpa(1+ 11/2^-8)) 275data8 0xc28c45b1878340a9 , 0x00003ffa // log(1/frcpa(1+ 12/2^-8)) 276data8 0xd35c55f39d7a6235 , 0x00003ffa // log(1/frcpa(1+ 13/2^-8)) 277data8 0xe220f037b954f1f5 , 0x00003ffa // log(1/frcpa(1+ 14/2^-8)) 278data8 0xf0f3389b036834f3 , 0x00003ffa // log(1/frcpa(1+ 15/2^-8)) 279// 280data8 0xffd3488d5c980465 , 0x00003ffa // log(1/frcpa(1+ 16/2^-8)) 281data8 0x87609ce2ed300490 , 0x00003ffb // log(1/frcpa(1+ 17/2^-8)) 282data8 0x8ede9321e8c85927 , 0x00003ffb // log(1/frcpa(1+ 18/2^-8)) 283data8 0x96639427f2f8e2f4 , 0x00003ffb // log(1/frcpa(1+ 19/2^-8)) 284data8 0x9defad3e8f73217b , 0x00003ffb // log(1/frcpa(1+ 20/2^-8)) 285// 286data8 0xa582ebd50097029c , 0x00003ffb // log(1/frcpa(1+ 21/2^-8)) 287data8 0xac06dbe75ab80fee , 0x00003ffb // log(1/frcpa(1+ 22/2^-8)) 288data8 0xb3a78449b2d3ccca , 0x00003ffb // log(1/frcpa(1+ 23/2^-8)) 289data8 0xbb4f79635ab46bb2 , 0x00003ffb // log(1/frcpa(1+ 24/2^-8)) 290data8 0xc2fec93a83523f3f , 0x00003ffb // log(1/frcpa(1+ 25/2^-8)) 291// 292data8 0xc99af2eaca4c4571 , 0x00003ffb // log(1/frcpa(1+ 26/2^-8)) 293data8 0xd1581106472fa653 , 0x00003ffb // log(1/frcpa(1+ 27/2^-8)) 294data8 0xd8002560d4355f2e , 0x00003ffb // log(1/frcpa(1+ 28/2^-8)) 295data8 0xdfcb43b4fe508632 , 0x00003ffb // log(1/frcpa(1+ 29/2^-8)) 296data8 0xe67f6dff709d4119 , 0x00003ffb // log(1/frcpa(1+ 30/2^-8)) 297// 298data8 0xed393b1c22351280 , 0x00003ffb // log(1/frcpa(1+ 31/2^-8)) 299data8 0xf5192bff087bcc35 , 0x00003ffb // log(1/frcpa(1+ 32/2^-8)) 300data8 0xfbdf4ff6dfef2fa3 , 0x00003ffb // log(1/frcpa(1+ 33/2^-8)) 301data8 0x81559a97f92f9cc7 , 0x00003ffc // log(1/frcpa(1+ 34/2^-8)) 302data8 0x84be72bce90266e8 , 0x00003ffc // log(1/frcpa(1+ 35/2^-8)) 303// 304data8 0x88bc74113f23def2 , 0x00003ffc // log(1/frcpa(1+ 36/2^-8)) 305data8 0x8c2ba3edf6799d11 , 0x00003ffc // log(1/frcpa(1+ 37/2^-8)) 306data8 0x8f9dc92f92ea08b1 , 0x00003ffc // log(1/frcpa(1+ 38/2^-8)) 307data8 0x9312e8f36efab5a7 , 0x00003ffc // log(1/frcpa(1+ 39/2^-8)) 308data8 0x968b08643409ceb6 , 0x00003ffc // log(1/frcpa(1+ 40/2^-8)) 309// 310data8 0x9a062cba08a1708c , 0x00003ffc // log(1/frcpa(1+ 41/2^-8)) 311data8 0x9d845b3abf95485c , 0x00003ffc // log(1/frcpa(1+ 42/2^-8)) 312data8 0xa06fd841bc001bb4 , 0x00003ffc // log(1/frcpa(1+ 43/2^-8)) 313data8 0xa3f3a74652fbe0db , 0x00003ffc // log(1/frcpa(1+ 44/2^-8)) 314data8 0xa77a8fb2336f20f5 , 0x00003ffc // log(1/frcpa(1+ 45/2^-8)) 315// 316data8 0xab0497015d28b0a0 , 0x00003ffc // log(1/frcpa(1+ 46/2^-8)) 317data8 0xae91c2be6ba6a615 , 0x00003ffc // log(1/frcpa(1+ 47/2^-8)) 318data8 0xb189d1b99aebb20b , 0x00003ffc // log(1/frcpa(1+ 48/2^-8)) 319data8 0xb51cced5de9c1b2c , 0x00003ffc // log(1/frcpa(1+ 49/2^-8)) 320data8 0xb819bee9e720d42f , 0x00003ffc // log(1/frcpa(1+ 50/2^-8)) 321// 322data8 0xbbb2a0947b093a5d , 0x00003ffc // log(1/frcpa(1+ 51/2^-8)) 323data8 0xbf4ec1505811684a , 0x00003ffc // log(1/frcpa(1+ 52/2^-8)) 324data8 0xc2535bacfa8975ff , 0x00003ffc // log(1/frcpa(1+ 53/2^-8)) 325data8 0xc55a3eafad187eb8 , 0x00003ffc // log(1/frcpa(1+ 54/2^-8)) 326data8 0xc8ff2484b2c0da74 , 0x00003ffc // log(1/frcpa(1+ 55/2^-8)) 327// 328data8 0xcc0b1a008d53ab76 , 0x00003ffc // log(1/frcpa(1+ 56/2^-8)) 329data8 0xcfb6203844b3209b , 0x00003ffc // log(1/frcpa(1+ 57/2^-8)) 330data8 0xd2c73949a47a19f5 , 0x00003ffc // log(1/frcpa(1+ 58/2^-8)) 331data8 0xd5daae18b49d6695 , 0x00003ffc // log(1/frcpa(1+ 59/2^-8)) 332data8 0xd8f08248cf7e8019 , 0x00003ffc // log(1/frcpa(1+ 60/2^-8)) 333// 334data8 0xdca7749f1b3e540e , 0x00003ffc // log(1/frcpa(1+ 61/2^-8)) 335data8 0xdfc28e033aaaf7c7 , 0x00003ffc // log(1/frcpa(1+ 62/2^-8)) 336data8 0xe2e012a5f91d2f55 , 0x00003ffc // log(1/frcpa(1+ 63/2^-8)) 337data8 0xe600064ed9e292a8 , 0x00003ffc // log(1/frcpa(1+ 64/2^-8)) 338data8 0xe9226cce42b39f60 , 0x00003ffc // log(1/frcpa(1+ 65/2^-8)) 339// 340data8 0xec4749fd97a28360 , 0x00003ffc // log(1/frcpa(1+ 66/2^-8)) 341data8 0xef6ea1bf57780495 , 0x00003ffc // log(1/frcpa(1+ 67/2^-8)) 342data8 0xf29877ff38809091 , 0x00003ffc // log(1/frcpa(1+ 68/2^-8)) 343data8 0xf5c4d0b245cb89be , 0x00003ffc // log(1/frcpa(1+ 69/2^-8)) 344data8 0xf8f3afd6fcdef3aa , 0x00003ffc // log(1/frcpa(1+ 70/2^-8)) 345// 346data8 0xfc2519756be1abc7 , 0x00003ffc // log(1/frcpa(1+ 71/2^-8)) 347data8 0xff59119f503e6832 , 0x00003ffc // log(1/frcpa(1+ 72/2^-8)) 348data8 0x8147ce381ae0e146 , 0x00003ffd // log(1/frcpa(1+ 73/2^-8)) 349data8 0x82e45f06cb1ad0f2 , 0x00003ffd // log(1/frcpa(1+ 74/2^-8)) 350data8 0x842f5c7c573cbaa2 , 0x00003ffd // log(1/frcpa(1+ 75/2^-8)) 351// 352data8 0x85ce471968c8893a , 0x00003ffd // log(1/frcpa(1+ 76/2^-8)) 353data8 0x876e8305bc04066d , 0x00003ffd // log(1/frcpa(1+ 77/2^-8)) 354data8 0x891012678031fbb3 , 0x00003ffd // log(1/frcpa(1+ 78/2^-8)) 355data8 0x8a5f1493d766a05f , 0x00003ffd // log(1/frcpa(1+ 79/2^-8)) 356data8 0x8c030c778c56fa00 , 0x00003ffd // log(1/frcpa(1+ 80/2^-8)) 357// 358data8 0x8da85df17e31d9ae , 0x00003ffd // log(1/frcpa(1+ 81/2^-8)) 359data8 0x8efa663e7921687e , 0x00003ffd // log(1/frcpa(1+ 82/2^-8)) 360data8 0x90a22b6875c6a1f8 , 0x00003ffd // log(1/frcpa(1+ 83/2^-8)) 361data8 0x91f62cc8f5d24837 , 0x00003ffd // log(1/frcpa(1+ 84/2^-8)) 362data8 0x93a06cfc3857d980 , 0x00003ffd // log(1/frcpa(1+ 85/2^-8)) 363// 364data8 0x94f66d5e6fd01ced , 0x00003ffd // log(1/frcpa(1+ 86/2^-8)) 365data8 0x96a330156e6772f2 , 0x00003ffd // log(1/frcpa(1+ 87/2^-8)) 366data8 0x97fb3582754ea25b , 0x00003ffd // log(1/frcpa(1+ 88/2^-8)) 367data8 0x99aa8259aad1bbf2 , 0x00003ffd // log(1/frcpa(1+ 89/2^-8)) 368data8 0x9b0492f6227ae4a8 , 0x00003ffd // log(1/frcpa(1+ 90/2^-8)) 369// 370data8 0x9c5f8e199bf3a7a5 , 0x00003ffd // log(1/frcpa(1+ 91/2^-8)) 371data8 0x9e1293b9998c1daa , 0x00003ffd // log(1/frcpa(1+ 92/2^-8)) 372data8 0x9f6fa31e0b41f308 , 0x00003ffd // log(1/frcpa(1+ 93/2^-8)) 373data8 0xa0cda11eaf46390e , 0x00003ffd // log(1/frcpa(1+ 94/2^-8)) 374data8 0xa22c8f029cfa45aa , 0x00003ffd // log(1/frcpa(1+ 95/2^-8)) 375// 376data8 0xa3e48badb7856b34 , 0x00003ffd // log(1/frcpa(1+ 96/2^-8)) 377data8 0xa5459a0aa95849f9 , 0x00003ffd // log(1/frcpa(1+ 97/2^-8)) 378data8 0xa6a79c84480cfebd , 0x00003ffd // log(1/frcpa(1+ 98/2^-8)) 379data8 0xa80a946d0fcb3eb2 , 0x00003ffd // log(1/frcpa(1+ 99/2^-8)) 380data8 0xa96e831a3ea7b314 , 0x00003ffd // log(1/frcpa(1+100/2^-8)) 381// 382data8 0xaad369e3dc544e3b , 0x00003ffd // log(1/frcpa(1+101/2^-8)) 383data8 0xac92e9588952c815 , 0x00003ffd // log(1/frcpa(1+102/2^-8)) 384data8 0xadfa035aa1ed8fdc , 0x00003ffd // log(1/frcpa(1+103/2^-8)) 385data8 0xaf6219eae1ad6e34 , 0x00003ffd // log(1/frcpa(1+104/2^-8)) 386data8 0xb0cb2e6d8160f753 , 0x00003ffd // log(1/frcpa(1+105/2^-8)) 387// 388data8 0xb2354249ad950f72 , 0x00003ffd // log(1/frcpa(1+106/2^-8)) 389data8 0xb3a056e98ef4a3b4 , 0x00003ffd // log(1/frcpa(1+107/2^-8)) 390data8 0xb50c6dba52c6292a , 0x00003ffd // log(1/frcpa(1+108/2^-8)) 391data8 0xb679882c33876165 , 0x00003ffd // log(1/frcpa(1+109/2^-8)) 392data8 0xb78c07429785cedc , 0x00003ffd // log(1/frcpa(1+110/2^-8)) 393// 394data8 0xb8faeb8dc4a77d24 , 0x00003ffd // log(1/frcpa(1+111/2^-8)) 395data8 0xba6ad77eb36ae0d6 , 0x00003ffd // log(1/frcpa(1+112/2^-8)) 396data8 0xbbdbcc915e9bee50 , 0x00003ffd // log(1/frcpa(1+113/2^-8)) 397data8 0xbd4dcc44f8cf12ef , 0x00003ffd // log(1/frcpa(1+114/2^-8)) 398data8 0xbec0d81bf5b531fa , 0x00003ffd // log(1/frcpa(1+115/2^-8)) 399// 400data8 0xc034f19c139186f4 , 0x00003ffd // log(1/frcpa(1+116/2^-8)) 401data8 0xc14cb69f7c5e55ab , 0x00003ffd // log(1/frcpa(1+117/2^-8)) 402data8 0xc2c2abbb6e5fd56f , 0x00003ffd // log(1/frcpa(1+118/2^-8)) 403data8 0xc439b2c193e6771e , 0x00003ffd // log(1/frcpa(1+119/2^-8)) 404data8 0xc553acb9d5c67733 , 0x00003ffd // log(1/frcpa(1+120/2^-8)) 405// 406data8 0xc6cc96e441272441 , 0x00003ffd // log(1/frcpa(1+121/2^-8)) 407data8 0xc8469753eca88c30 , 0x00003ffd // log(1/frcpa(1+122/2^-8)) 408data8 0xc962cf3ce072b05c , 0x00003ffd // log(1/frcpa(1+123/2^-8)) 409data8 0xcadeba8771f694aa , 0x00003ffd // log(1/frcpa(1+124/2^-8)) 410data8 0xcc5bc08d1f72da94 , 0x00003ffd // log(1/frcpa(1+125/2^-8)) 411// 412data8 0xcd7a3f99ea035c29 , 0x00003ffd // log(1/frcpa(1+126/2^-8)) 413data8 0xcef93860c8a53c35 , 0x00003ffd // log(1/frcpa(1+127/2^-8)) 414data8 0xd0192f68a7ed23df , 0x00003ffd // log(1/frcpa(1+128/2^-8)) 415data8 0xd19a201127d3c645 , 0x00003ffd // log(1/frcpa(1+129/2^-8)) 416data8 0xd2bb92f4061c172c , 0x00003ffd // log(1/frcpa(1+130/2^-8)) 417// 418data8 0xd43e80b2ee8cc8fc , 0x00003ffd // log(1/frcpa(1+131/2^-8)) 419data8 0xd56173601fc4ade4 , 0x00003ffd // log(1/frcpa(1+132/2^-8)) 420data8 0xd6e6637efb54086f , 0x00003ffd // log(1/frcpa(1+133/2^-8)) 421data8 0xd80ad9f58f3c8193 , 0x00003ffd // log(1/frcpa(1+134/2^-8)) 422data8 0xd991d1d31aca41f8 , 0x00003ffd // log(1/frcpa(1+135/2^-8)) 423// 424data8 0xdab7d02231484a93 , 0x00003ffd // log(1/frcpa(1+136/2^-8)) 425data8 0xdc40d532cde49a54 , 0x00003ffd // log(1/frcpa(1+137/2^-8)) 426data8 0xdd685f79ed8b265e , 0x00003ffd // log(1/frcpa(1+138/2^-8)) 427data8 0xde9094bbc0e17b1d , 0x00003ffd // log(1/frcpa(1+139/2^-8)) 428data8 0xe01c91b78440c425 , 0x00003ffd // log(1/frcpa(1+140/2^-8)) 429// 430data8 0xe14658f26997e729 , 0x00003ffd // log(1/frcpa(1+141/2^-8)) 431data8 0xe270cdc2391e0d23 , 0x00003ffd // log(1/frcpa(1+142/2^-8)) 432data8 0xe3ffce3a2aa64922 , 0x00003ffd // log(1/frcpa(1+143/2^-8)) 433data8 0xe52bdb274ed82887 , 0x00003ffd // log(1/frcpa(1+144/2^-8)) 434data8 0xe6589852e75d7df6 , 0x00003ffd // log(1/frcpa(1+145/2^-8)) 435// 436data8 0xe786068c79937a7d , 0x00003ffd // log(1/frcpa(1+146/2^-8)) 437data8 0xe91903adad100911 , 0x00003ffd // log(1/frcpa(1+147/2^-8)) 438data8 0xea481236f7d35bb0 , 0x00003ffd // log(1/frcpa(1+148/2^-8)) 439data8 0xeb77d48c692e6b14 , 0x00003ffd // log(1/frcpa(1+149/2^-8)) 440data8 0xeca84b83d7297b87 , 0x00003ffd // log(1/frcpa(1+150/2^-8)) 441// 442data8 0xedd977f4962aa158 , 0x00003ffd // log(1/frcpa(1+151/2^-8)) 443data8 0xef7179a22f257754 , 0x00003ffd // log(1/frcpa(1+152/2^-8)) 444data8 0xf0a450d139366ca7 , 0x00003ffd // log(1/frcpa(1+153/2^-8)) 445data8 0xf1d7e0524ff9ffdb , 0x00003ffd // log(1/frcpa(1+154/2^-8)) 446data8 0xf30c29036a8b6cae , 0x00003ffd // log(1/frcpa(1+155/2^-8)) 447// 448data8 0xf4412bc411ea8d92 , 0x00003ffd // log(1/frcpa(1+156/2^-8)) 449data8 0xf576e97564c8619d , 0x00003ffd // log(1/frcpa(1+157/2^-8)) 450data8 0xf6ad62fa1b5f172f , 0x00003ffd // log(1/frcpa(1+158/2^-8)) 451data8 0xf7e499368b55c542 , 0x00003ffd // log(1/frcpa(1+159/2^-8)) 452data8 0xf91c8d10abaffe22 , 0x00003ffd // log(1/frcpa(1+160/2^-8)) 453// 454data8 0xfa553f7018c966f3 , 0x00003ffd // log(1/frcpa(1+161/2^-8)) 455data8 0xfb8eb13e185d802c , 0x00003ffd // log(1/frcpa(1+162/2^-8)) 456data8 0xfcc8e3659d9bcbed , 0x00003ffd // log(1/frcpa(1+163/2^-8)) 457data8 0xfe03d6d34d487fd2 , 0x00003ffd // log(1/frcpa(1+164/2^-8)) 458data8 0xff3f8c7581e9f0ae , 0x00003ffd // log(1/frcpa(1+165/2^-8)) 459// 460data8 0x803e029e280173ae , 0x00003ffe // log(1/frcpa(1+166/2^-8)) 461data8 0x80dca10cc52d0757 , 0x00003ffe // log(1/frcpa(1+167/2^-8)) 462data8 0x817ba200632755a1 , 0x00003ffe // log(1/frcpa(1+168/2^-8)) 463data8 0x821b05f3b01d6774 , 0x00003ffe // log(1/frcpa(1+169/2^-8)) 464data8 0x82bacd623ff19d06 , 0x00003ffe // log(1/frcpa(1+170/2^-8)) 465// 466data8 0x835af8c88e7a8f47 , 0x00003ffe // log(1/frcpa(1+171/2^-8)) 467data8 0x83c5f8299e2b4091 , 0x00003ffe // log(1/frcpa(1+172/2^-8)) 468data8 0x8466cb43f3d87300 , 0x00003ffe // log(1/frcpa(1+173/2^-8)) 469data8 0x850803a67c80ca4b , 0x00003ffe // log(1/frcpa(1+174/2^-8)) 470data8 0x85a9a1d11a23b461 , 0x00003ffe // log(1/frcpa(1+175/2^-8)) 471// 472data8 0x864ba644a18e6e05 , 0x00003ffe // log(1/frcpa(1+176/2^-8)) 473data8 0x86ee1182dcc432f7 , 0x00003ffe // log(1/frcpa(1+177/2^-8)) 474data8 0x875a925d7e48c316 , 0x00003ffe // log(1/frcpa(1+178/2^-8)) 475data8 0x87fdaa109d23aef7 , 0x00003ffe // log(1/frcpa(1+179/2^-8)) 476data8 0x88a129ed4becfaf2 , 0x00003ffe // log(1/frcpa(1+180/2^-8)) 477// 478data8 0x89451278ecd7f9cf , 0x00003ffe // log(1/frcpa(1+181/2^-8)) 479data8 0x89b29295f8432617 , 0x00003ffe // log(1/frcpa(1+182/2^-8)) 480data8 0x8a572ac5a5496882 , 0x00003ffe // log(1/frcpa(1+183/2^-8)) 481data8 0x8afc2d0ce3b2dadf , 0x00003ffe // log(1/frcpa(1+184/2^-8)) 482data8 0x8b6a69c608cfd3af , 0x00003ffe // log(1/frcpa(1+185/2^-8)) 483// 484data8 0x8c101e106e899a83 , 0x00003ffe // log(1/frcpa(1+186/2^-8)) 485data8 0x8cb63de258f9d626 , 0x00003ffe // log(1/frcpa(1+187/2^-8)) 486data8 0x8d2539c5bd19e2b1 , 0x00003ffe // log(1/frcpa(1+188/2^-8)) 487data8 0x8dcc0e064b29e6f1 , 0x00003ffe // log(1/frcpa(1+189/2^-8)) 488data8 0x8e734f45d88357ae , 0x00003ffe // log(1/frcpa(1+190/2^-8)) 489// 490data8 0x8ee30cef034a20db , 0x00003ffe // log(1/frcpa(1+191/2^-8)) 491data8 0x8f8b0515686d1d06 , 0x00003ffe // log(1/frcpa(1+192/2^-8)) 492data8 0x90336bba039bf32f , 0x00003ffe // log(1/frcpa(1+193/2^-8)) 493data8 0x90a3edd23d1c9d58 , 0x00003ffe // log(1/frcpa(1+194/2^-8)) 494data8 0x914d0de2f5d61b32 , 0x00003ffe // log(1/frcpa(1+195/2^-8)) 495// 496data8 0x91be0c20d28173b5 , 0x00003ffe // log(1/frcpa(1+196/2^-8)) 497data8 0x9267e737c06cd34a , 0x00003ffe // log(1/frcpa(1+197/2^-8)) 498data8 0x92d962ae6abb1237 , 0x00003ffe // log(1/frcpa(1+198/2^-8)) 499data8 0x9383fa6afbe2074c , 0x00003ffe // log(1/frcpa(1+199/2^-8)) 500data8 0x942f0421651c1c4e , 0x00003ffe // log(1/frcpa(1+200/2^-8)) 501// 502data8 0x94a14a3845bb985e , 0x00003ffe // log(1/frcpa(1+201/2^-8)) 503data8 0x954d133857f861e7 , 0x00003ffe // log(1/frcpa(1+202/2^-8)) 504data8 0x95bfd96468e604c4 , 0x00003ffe // log(1/frcpa(1+203/2^-8)) 505data8 0x9632d31cafafa858 , 0x00003ffe // log(1/frcpa(1+204/2^-8)) 506data8 0x96dfaabd86fa1647 , 0x00003ffe // log(1/frcpa(1+205/2^-8)) 507// 508data8 0x9753261fcbb2a594 , 0x00003ffe // log(1/frcpa(1+206/2^-8)) 509data8 0x9800c11b426b996d , 0x00003ffe // log(1/frcpa(1+207/2^-8)) 510data8 0x9874bf4d45ae663c , 0x00003ffe // log(1/frcpa(1+208/2^-8)) 511data8 0x99231f5ee9a74f79 , 0x00003ffe // log(1/frcpa(1+209/2^-8)) 512data8 0x9997a18a56bcad28 , 0x00003ffe // log(1/frcpa(1+210/2^-8)) 513// 514data8 0x9a46c873a3267e79 , 0x00003ffe // log(1/frcpa(1+211/2^-8)) 515data8 0x9abbcfc621eb6cb6 , 0x00003ffe // log(1/frcpa(1+212/2^-8)) 516data8 0x9b310cb0d354c990 , 0x00003ffe // log(1/frcpa(1+213/2^-8)) 517data8 0x9be14cf9e1b3515c , 0x00003ffe // log(1/frcpa(1+214/2^-8)) 518data8 0x9c5710b8cbb73a43 , 0x00003ffe // log(1/frcpa(1+215/2^-8)) 519// 520data8 0x9ccd0abd301f399c , 0x00003ffe // log(1/frcpa(1+216/2^-8)) 521data8 0x9d7e67f3bdce8888 , 0x00003ffe // log(1/frcpa(1+217/2^-8)) 522data8 0x9df4ea81a99daa01 , 0x00003ffe // log(1/frcpa(1+218/2^-8)) 523data8 0x9e6ba405a54514ba , 0x00003ffe // log(1/frcpa(1+219/2^-8)) 524data8 0x9f1e21c8c7bb62b3 , 0x00003ffe // log(1/frcpa(1+220/2^-8)) 525// 526data8 0x9f956593f6b6355c , 0x00003ffe // log(1/frcpa(1+221/2^-8)) 527data8 0xa00ce1092e5498c3 , 0x00003ffe // log(1/frcpa(1+222/2^-8)) 528data8 0xa0c08309c4b912c1 , 0x00003ffe // log(1/frcpa(1+223/2^-8)) 529data8 0xa1388a8c6faa2afa , 0x00003ffe // log(1/frcpa(1+224/2^-8)) 530data8 0xa1b0ca7095b5f985 , 0x00003ffe // log(1/frcpa(1+225/2^-8)) 531// 532data8 0xa22942eb47534a00 , 0x00003ffe // log(1/frcpa(1+226/2^-8)) 533data8 0xa2de62326449d0a3 , 0x00003ffe // log(1/frcpa(1+227/2^-8)) 534data8 0xa357690f88bfe345 , 0x00003ffe // log(1/frcpa(1+228/2^-8)) 535data8 0xa3d0a93f45169a4b , 0x00003ffe // log(1/frcpa(1+229/2^-8)) 536data8 0xa44a22f7ffe65f30 , 0x00003ffe // log(1/frcpa(1+230/2^-8)) 537// 538data8 0xa500c5e5b4c1aa36 , 0x00003ffe // log(1/frcpa(1+231/2^-8)) 539data8 0xa57ad064eb2ebbc2 , 0x00003ffe // log(1/frcpa(1+232/2^-8)) 540data8 0xa5f5152dedf4384e , 0x00003ffe // log(1/frcpa(1+233/2^-8)) 541data8 0xa66f9478856233ec , 0x00003ffe // log(1/frcpa(1+234/2^-8)) 542data8 0xa6ea4e7cca02c32e , 0x00003ffe // log(1/frcpa(1+235/2^-8)) 543// 544data8 0xa765437325341ccf , 0x00003ffe // log(1/frcpa(1+236/2^-8)) 545data8 0xa81e21e6c75b4020 , 0x00003ffe // log(1/frcpa(1+237/2^-8)) 546data8 0xa899ab333fe2b9ca , 0x00003ffe // log(1/frcpa(1+238/2^-8)) 547data8 0xa9157039c51ebe71 , 0x00003ffe // log(1/frcpa(1+239/2^-8)) 548data8 0xa991713433c2b999 , 0x00003ffe // log(1/frcpa(1+240/2^-8)) 549// 550data8 0xaa0dae5cbcc048b3 , 0x00003ffe // log(1/frcpa(1+241/2^-8)) 551data8 0xaa8a27ede5eb13ad , 0x00003ffe // log(1/frcpa(1+242/2^-8)) 552data8 0xab06de228a9e3499 , 0x00003ffe // log(1/frcpa(1+243/2^-8)) 553data8 0xab83d135dc633301 , 0x00003ffe // log(1/frcpa(1+244/2^-8)) 554data8 0xac3fb076adc7fe7a , 0x00003ffe // log(1/frcpa(1+245/2^-8)) 555// 556data8 0xacbd3cbbe47988f1 , 0x00003ffe // log(1/frcpa(1+246/2^-8)) 557data8 0xad3b06b1a5dc57c3 , 0x00003ffe // log(1/frcpa(1+247/2^-8)) 558data8 0xadb90e94af887717 , 0x00003ffe // log(1/frcpa(1+248/2^-8)) 559data8 0xae3754a218f7c816 , 0x00003ffe // log(1/frcpa(1+249/2^-8)) 560data8 0xaeb5d9175437afa2 , 0x00003ffe // log(1/frcpa(1+250/2^-8)) 561// 562data8 0xaf349c322e9c7cee , 0x00003ffe // log(1/frcpa(1+251/2^-8)) 563data8 0xafb39e30d1768d1c , 0x00003ffe // log(1/frcpa(1+252/2^-8)) 564data8 0xb032df51c2c93116 , 0x00003ffe // log(1/frcpa(1+253/2^-8)) 565data8 0xb0b25fd3e6035ad9 , 0x00003ffe // log(1/frcpa(1+254/2^-8)) 566data8 0xb1321ff67cba178c , 0x00003ffe // log(1/frcpa(1+255/2^-8)) 567LOCAL_OBJECT_END(log_table_3) 568 569 570.section .text 571GLOBAL_LIBM_ENTRY(acosh) 572 573{ .mfi 574 getf.exp acosh_GR_f8 = f8 575 fclass.m p6,p0 = f8, 0xc3 // Test for x = NaN 576 mov log_GR_comp2 = 0x1003e 577} 578{ .mfi 579 addl NR_table_address = @ltoff(log_table_1), gp 580 fms.s1 log_y = f8, f8, f1 // y = x^2-1 581 nop.i 0 582} 583;; 584 585{ .mfi 586 getf.sig acosh_GR_f8_sig = f8 587 fclass.m p11,p0 = f8, 0x21 // Test for x=+inf 588 mov log_GR_exp_17_ones = 0x1ffff 589} 590{ .mfi 591 ld8 NR_table_address = [NR_table_address] 592 fms.s1 log_w = f8,f1,f1 // w = x - 1 593 nop.i 0 594} 595;; 596 597{ .mfi 598 nop.m 0 599 fcmp.lt.s1 p7,p8 = f8, f1 // Test for x<1.0 600 addl log_GR_comp = 0x10020C,r0 // Upper 21 bits of signif of 1.0005 601} 602{ .mfb 603 mov log_GR_exp_16_ones = 0xffff //BIAS 604(p6) fma.d.s0 f8 = f8,f1,f0 // quietize nan result if x=nan 605(p6) br.ret.spnt b0 // Exit for x=nan 606} 607;; 608 609{ .mfb 610 //get second table address 611 adds log_table_address2 = 0x40, NR_table_address 612 fcmp.eq.s1 p10,p0 = f8, f1 // Test for x=+1.0 613(p11) br.ret.spnt b0 // Exit for x=+inf 614} 615;; 616 617{ .mfi 618 ldfpd NR1,NR2 = [log_table_address2],16 619 frsqrta.s1 log_y_rs,p0 = log_y // z=1/sqrt(y) 620 nop.i 0 621} 622{ .mfb 623 nop.m 0 624 fma.s1 log_arg = f8,f1,f8 625(p7) br.cond.spnt ACOSH_LESS_ONE // Branch if path 7, x < 1.0 626} 627;; 628 629{ .mfi 630 ldfe log_C4 = [log_table_address2],16 631(p8) fcmp.eq.s0 p6,p0 = f8, f0 // Dummy op sets denorm flag if unorm>=1.0 632 nop.i 0 633} 634{ .mfb 635(p8) cmp.le.unc p13,p0 = log_GR_comp2,acosh_GR_f8 636 nop.f 0 637(p13) br.cond.spnt LOG_COMMON1 // Branch if path 4, x >= 2^63 638} 639;; 640 641{ .mfi 642 ldfe log_C3 = [log_table_address2],16 643(p10) fmerge.s f8 = f0, f0 // Return 0 if x=1.0 644 shr.u acosh_GR_f8_sig = acosh_GR_f8_sig,43 645} 646{ .mib 647 cmp.eq p14,p0 = log_GR_exp_16_ones,acosh_GR_f8 648 nop.i 0 649(p10) br.ret.spnt b0 // Exit for x=1.0 650} 651;; 652 653{ .mfi 654 ldfe log_C2 = [log_table_address2],16 655 frsqrta.s1 acosh_w_rs,p0 = log_w // t=1/sqrt(w) 656 nop.i 0 657} 658{ .mfb 659(p14) cmp.lt.unc p15,p0 = acosh_GR_f8_sig,log_GR_comp 660 nop.f 0 661(p15) br.cond.spnt ACOSH_NEAR_ONE // Branch if path 2, 1.0 < x < 1.0005 662} 663;; 664 665// Here is main path, 1.0005 <= x < 2^63 666/////////////// The first iteration ////////////////////////////////// 667{ .mfi 668 ldfpd acosh_comp,log_P5 = [NR_table_address],16 669 fma.s1 log_y_rs_iter = log_y_rs,log_y,f0 // y*z 670 nop.i 0 671} 672;; 673 674{ .mfi 675 ldfpd log_P4,log_P3 = [NR_table_address],16 676 fnma.s1 log_y_rs_iter = log_y_rs_iter,log_y_rs,NR2 // 3-(y*z)*z 677 nop.i 0 678} 679{ .mfi 680 nop.m 0 681 fma.s1 log_y_rs_iter1 = log_y_rs,NR1,f0 // 0.5*z 682 nop.i 0 683} 684;; 685 686{ .mfi 687 ldfpd log_P2,log_P1 = [NR_table_address],16 688 //(0.5*z)*(3-(y*z)*z) 689 fma.s1 log_y_rs_iter = log_y_rs_iter1,log_y_rs_iter,f0 690 nop.i 0 691} 692;; 693 694/////////////////////////// The second iteration ///////////////////////////// 695{ .mfi 696 nop.m 0 697 fma.s1 log_y_rs = log_y_rs_iter,log_y,f0 698 nop.i 0 699} 700;; 701 702{ .mfi 703 nop.m 0 704 fnma.s1 log_y_rs = log_y_rs,log_y_rs_iter,NR2 705 nop.i 0 706} 707{ .mfi 708 nop.m 0 709 fma.s1 log_y_rs_iter1 = log_y_rs_iter,NR1,f0 710 nop.i 0 711} 712;; 713 714{ .mfi 715 nop.m 0 716 //(0.5*z)*(3-(y*z)*z) 717 fma.s1 log_y_rs_iter = log_y_rs_iter1,log_y_rs,f0 718 nop.i 0 719} 720{ .mfi 721 nop.m 0 722 //(0.5*z)*(3-(y*z)*z) 723 fma.s1 log_arg_early = log_y_rs_iter1,log_y_rs,f0 724 nop.i 0 725} 726;; 727 728//////////////////////////////////////// The third iteration ///////////////// 729{ .mfi 730 nop.m 0 731 fma.s1 log_y_rs = log_y_rs_iter,log_y,f0 732 nop.i 0 733} 734{ .mfi 735 nop.m 0 736 fma.s1 log_y_rs_iter1 = log_y_rs_iter,NR1,f0 737 nop.i 0 738} 739;; 740 741{ .mfi 742 nop.m 0 743 fma.s1 log_arg_early = log_arg_early,log_y,f8 744 nop.i 0 745} 746;; 747 748{ .mfi 749 nop.m 0 750 fnma.s1 log_y_rs = log_y_rs,log_y_rs_iter,NR2 751 nop.i 0 752} 753{ .mfi 754 nop.m 0 755 fma.s1 log_y_rs_iter1 = log_y_rs_iter1,log_y,f0 756 nop.i 0 757} 758;; 759 760{ .mfi 761 nop.m 0 762 frcpa.s1 log_C,p0 = f1,log_arg_early 763 nop.i 0 764} 765;; 766 767{ .mfi 768 getf.exp log_GR_signexp_f8 = log_arg_early 769 nop.f 0 770 nop.i 0 771} 772;; 773 774{ .mfi 775 getf.sig log_GR_significand_f8 = log_arg_early 776 fma.s1 log_arg = log_y_rs_iter1,log_y_rs,f8 // (0.5*z)*(3-(y*z)*z) 777 adds log_table_address3 = 0x70, NR_table_address 778} 779;; 780 781///////////////////////////////// The end NR iterations ///////////////////// 782{ .mfi 783 ldfe log2 = [NR_table_address],16 784 nop.f 0 785 nop.i 0 786} 787;; 788 789{ .mmi 790 //significant bit destruction 791 and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones 792;; 793 //BIAS subtraction 794 sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones 795 nop.i 0 796} 797;; 798 799{ .mfi 800 setf.sig log_int_Nfloat = log_GR_true_exp_f8 801 fms.s1 log_r = log_C,log_arg,f1 // C = frcpa(x); r = C * x - 1 802 extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits 803} 804;; 805 806{ .mmi 807 //pre-index*16 + index 808 shladd log_table_address3 = log_GR_index,4,log_table_address3 809;; 810 ldfe log_T = [log_table_address3] 811 nop.i 0 812} 813;; 814 815{ .mfi 816 nop.m 0 817 fma.s1 log_rsq = log_r, log_r, f0 //r^2 818 nop.i 0 819} 820{ .mfi 821 nop.m 0 822 fma.s1 log_rp_p4 = log_P5, log_r, log_P4 //P5*r + P4 823 nop.i 0 824} 825;; 826 827{ .mfi 828 nop.m 0 829 fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2 830 nop.i 0 831} 832;; 833 834{ .mfi 835 nop.m 0 836 //convert N to the floating-point format log_Nfloat 837 fcvt.xf log_Nfloat = log_int_Nfloat 838 nop.i 0 839} 840;; 841 842{ .mfi 843 nop.m 0 844 fma.s1 log_rcube = log_rsq, log_r, f0 //r^3 845 nop.i 0 846} 847{ .mfi 848 nop.m 0 849 fma.s1 log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r 850 nop.i 0 851} 852;; 853 854{ .mfi 855 nop.m 0 856 //(P5*r + P4)*r^2 + P3*r + P2 857 fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32 858 nop.i 0 859} 860;; 861 862{ .mfi 863 nop.m 0 864 fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T 865 nop.i 0 866} 867{ .mfi 868 nop.m 0 869 //((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r 870 fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10 871 nop.i 0 872} 873;; 874 875{ .mfb 876 nop.m 0 877 // N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r 878 fadd.d.s0 f8 = log_T_plus_Nlog2, log_r2P_r 879 br.ret.sptk b0 // Exit main path, path 3: 1.0005 <= x < 2^63 880} 881;; 882 883// Here if path 2, 1.0 < x < 1.0005 884ACOSH_NEAR_ONE: 885// The first NR iteration 886{ .mfi 887 ldfe log_C1 = [log_table_address2],16 888 fma.s1 acosh_w_iter1 = acosh_w_rs,log_w,f0 //t*w 889 nop.i 0 890} 891{ .mfi 892 nop.m 0 893 fma.s1 acosh_w_1 = f8,log_C4,log_C3 //x*C4 + C3 894 nop.i 0 895} 896;; 897 898{ .mfi 899 ldfe log_C0 = [log_table_address2],16 900 fma.s1 acosh_w_iter2 = acosh_w_rs,NR1,f0 //t*0.5 901 nop.i 0 902} 903{ .mfi 904 nop.m 0 905 fnma.s1 acosh_w_iter1 = acosh_w_iter1,acosh_w_rs,NR2 //3-t*t*w 906 nop.i 0 907} 908;; 909 910{ .mfi 911 nop.m 0 912 //(3-t*t*w)*t*0.5 913 fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0 914 nop.i 0 915} 916{ .mfi 917 nop.m 0 918 fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C2 //(x*C4 + C3)*(x-1) + C2 919 nop.i 0 920} 921;; 922 923// The second NR iteration 924{ .mfi 925 nop.m 0 926 fma.s1 acosh_w_rs = acosh_w_iter2,log_w,f0 //t*w 927 nop.i 0 928} 929{ .mfi 930 nop.m 0 931 //((x*C4 + C3)*(x-1) + C2)*(x-1) + C1 932 fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C1 933 nop.i 0 934} 935;; 936 937{ .mfi 938 nop.m 0 939 fnma.s1 acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2 940 nop.i 0 941} 942{ .mfi 943 nop.m 0 944 fma.s1 acosh_w_iter2 = acosh_w_iter2,NR1,f0 945 nop.i 0 946} 947;; 948 949{ .mfi 950 nop.m 0 951 fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0 952 nop.i 0 953} 954{ .mfi 955 nop.m 0 956 //(((x*C4 + C3)*(x-1) + C2)*(x-1) + C1)*(x-1) + C0 957 fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C0 958 nop.i 0 959} 960;; 961 962//The third NR iteration 963{ .mfi 964 nop.m 0 965 fma.s1 acosh_w_rs = acosh_w_iter2,log_w,f0 //t*w 966 nop.i 0 967} 968;; 969 970{ .mfi 971 nop.m 0 972 fnma.s1 acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2 973 nop.i 0 974} 975{ .mfi 976 nop.m 0 977 fma.s1 acosh_w_iter2 = acosh_w_iter2,NR1,f0 978 nop.i 0 979} 980;; 981 982{ .mfi 983 nop.m 0 984 fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0 985 nop.i 0 986} 987;; 988 989{ .mfi 990 nop.m 0 991 fma.s1 acosh_w_sqrt = acosh_w_iter2,log_w,f0 992 nop.i 0 993} 994;; 995 996{ .mfb 997 nop.m 0 998 fma.d.s0 f8 = acosh_w_1,acosh_w_sqrt,f0 999 br.ret.sptk b0 // Exit path 2, 1.0 < x < 1.0005 1000} 1001;; 1002 1003// Here if path 4, x >= 2^63 1004LOG_COMMON1: 1005{ .mfi 1006 ldfpd acosh_comp,log_P5 = [NR_table_address],16 1007 frcpa.s1 log_C,p0 = f1,log_arg 1008 nop.i 0 1009} 1010;; 1011 1012{ .mmi 1013 getf.exp log_GR_signexp_f8 = log_arg 1014 ldfpd log_P4,log_P3 = [NR_table_address],16 1015 nop.i 0 1016} 1017;; 1018 1019{ .mmi 1020 getf.sig log_GR_significand_f8 = log_arg 1021 ldfpd log_P2,log_P1 = [NR_table_address],16 1022 nop.i 0 1023} 1024;; 1025 1026{ .mfi 1027 adds log_table_address3 = 0x70, NR_table_address 1028 nop.f 0 1029 //significant bit destruction 1030 and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones 1031} 1032;; 1033 1034{ .mmf 1035 ldfe log2 = [NR_table_address],16 1036 //BIAS subtraction 1037 sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones 1038 fms.s1 log_r = log_C,log_arg,f1 // C = frcpa(x); r = C * x - 1 1039} 1040;; 1041 1042{ .mfi 1043 setf.sig log_int_Nfloat = log_GR_true_exp_f8 1044 nop.f 0 1045 extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits 1046} 1047;; 1048 1049{ .mmi 1050 //pre-index*16 + index 1051 shladd log_table_address3 = log_GR_index,4,log_table_address3 1052;; 1053 ldfe log_T = [log_table_address3] 1054 nop.i 0 1055} 1056;; 1057 1058{ .mfi 1059 nop.m 0 1060 fma.s1 log_rsq = log_r, log_r, f0 //r^2 1061 nop.i 0 1062} 1063{ .mfi 1064 nop.m 0 1065 fma.s1 log_rp_p4 = log_P5, log_r, log_P4 //P5*r + P4 1066 nop.i 0 1067} 1068;; 1069 1070{ .mfi 1071 nop.m 0 1072 fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2 1073 nop.i 0 1074} 1075;; 1076 1077{ .mfi 1078 nop.m 0 1079 fma.s1 log_rcube = log_rsq, log_r, f0 //r^3 1080 nop.i 0 1081} 1082{ .mfi 1083 nop.m 0 1084 fma.s1 log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r 1085 nop.i 0 1086} 1087;; 1088 1089{ .mfi 1090 nop.m 0 1091 //convert N to the floating-point format log_Nfloat 1092 fcvt.xf log_Nfloat = log_int_Nfloat 1093 nop.i 0 1094} 1095{ .mfi 1096 nop.m 0 1097 //(P5*r + P4)*r^2 + P3*r + P2 1098 fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32 1099 nop.i 0 1100} 1101;; 1102 1103{ .mfi 1104 nop.m 0 1105 fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T 1106 nop.i 0 1107} 1108{ .mfi 1109 nop.m 0 1110 //((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r 1111 fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10 1112 nop.i 0 1113} 1114;; 1115 1116{ .mfb 1117 nop.m 0 1118 // N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r 1119 fadd.d.s0 f8 = log_T_plus_Nlog2, log_r2P_r 1120 br.ret.sptk b0 // Exit path 4, x >= 2^63 1121} 1122;; 1123 1124// Here if path 7, x < 1.0 1125ACOSH_LESS_ONE: 1126{ .mfi 1127 alloc r32 = ar.pfs,1,3,4,0 1128 fmerge.s f10 = f8,f8 1129 nop.i 0 1130} 1131;; 1132 1133{ .mfb 1134 mov acosh_GR_tag = 136 1135 frcpa.s0 f8,p0 = f0,f0 1136 br.cond.sptk __libm_error_region 1137} 1138;; 1139 1140GLOBAL_LIBM_END(acosh) 1141libm_alias_double_other (acosh, acosh) 1142 1143 1144LOCAL_LIBM_ENTRY(__libm_error_region) 1145.prologue 1146 1147{ .mfi 1148 add GR_Parameter_Y=-32,sp // Parameter 2 value 1149 nop.f 0 1150.save ar.pfs,GR_SAVE_PFS 1151 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 1152} 1153{ .mfi 1154.fframe 64 1155 add sp=-64,sp // Create new stack 1156 nop.f 0 1157 mov GR_SAVE_GP=gp // Save gp 1158};; 1159 1160{ .mmi 1161 stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack 1162 add GR_Parameter_X = 16,sp // Parameter 1 address 1163.save b0, GR_SAVE_B0 1164 mov GR_SAVE_B0=b0 // Save b0 1165};; 1166 1167.body 1168{ .mib 1169 stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack 1170 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 1171 nop.b 0 1172} 1173{ .mib 1174 stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack 1175 add GR_Parameter_Y = -16,GR_Parameter_Y 1176 br.call.sptk b0=__libm_error_support# // Call error handling function 1177};; 1178 1179{ .mmi 1180 add GR_Parameter_RESULT = 48,sp 1181 nop.m 0 1182 nop.i 0 1183};; 1184 1185{ .mmi 1186 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack 1187.restore sp 1188 add sp = 64,sp // Restore stack pointer 1189 mov b0 = GR_SAVE_B0 // Restore return address 1190};; 1191 1192{ .mib 1193 mov gp = GR_SAVE_GP // Restore gp 1194 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 1195 br.ret.sptk b0 // Return 1196};; 1197 1198LOCAL_LIBM_END(__libm_error_region) 1199 1200 1201.type __libm_error_support#,@function 1202.global __libm_error_support# 1203