1.file "libm_lgammal.s" 2 3 4// Copyright (c) 2002 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT 25// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, 28// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code,and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39//********************************************************************* 40// 41// History: 42// 03/28/02 Original version 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 08/21/02 Added support of SIGN(GAMMA(x)) calculation 45// 09/26/02 Algorithm description improved 46// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero 47// 02/10/03 Reordered header: .section, .global, .proc, .align 48// 03/31/05 Reformatted delimiters between data tables 49// 50//********************************************************************* 51// 52// Function: __libm_lgammal(long double x, int* signgam, int szsigngam) 53// computes the principal value of the logarithm of the GAMMA function 54// of x. Signum of GAMMA(x) is stored to memory starting at the address 55// specified by the signgam. 56// 57//********************************************************************* 58// 59// Resources Used: 60// 61// Floating-Point Registers: f8 (Input and Return Value) 62// f9-f15 63// f32-f127 64// 65// General Purpose Registers: 66// r2, r3, r8-r11, r14-r31 67// r32-r65 68// r66-r69 (Used to pass arguments to error handling routine) 69// 70// Predicate Registers: p6-p15 71// 72//********************************************************************* 73// 74// IEEE Special Conditions: 75// 76// __libm_lgammal(+inf) = +inf 77// __libm_lgammal(-inf) = QNaN 78// __libm_lgammal(+/-0) = +inf 79// __libm_lgammal(x<0, x - integer) = QNaN 80// __libm_lgammal(SNaN) = QNaN 81// __libm_lgammal(QNaN) = QNaN 82// 83//********************************************************************* 84// 85// ALGORITHM DESCRIPTION 86// 87// Below we suppose that there is log(z) function which takes an long 88// double argument and returns result as a pair of long double numbers 89// lnHi and lnLo (such that sum lnHi + lnLo provides ~80 correct bits 90// of significand). Algorithm description for such log(z) function 91// see below. 92// Also, it this algorithm description we use the following notational 93// conventions: 94// a) pair A = (Ahi, Alo) means number A represented as sum of Ahi and Alo 95// b) C = A + B = (Ahi, Alo) + (Bhi, Blo) means multi-precision addition. 96// The result would be C = (Chi, Clo). Notice, that Clo shouldn't be 97// equal to Alo + Blo 98// c) D = A*B = (Ahi, Alo)*(Bhi, Blo) = (Dhi, Dlo) multi-precisiion 99// multiplication. 100// 101// So, lgammal has the following computational paths: 102// 1) |x| < 0.5 103// P = A1*|x| + A2*|x|^2 + ... + A22*|x|^22 104// A1, A2, A3 represented as a sum of two double precision 105// numbers and multi-precision computations are used for 3 higher 106// terms of the polynomial. We get polynomial as a sum of two 107// double extended numbers: P = (Phi, Plo) 108// 1.1) x > 0 109// lgammal(x) = P - log(|x|) = (Phi, Plo) - (lnHi(|x|), lnLo(|x|)) 110// 1.2) x < 0 111// lgammal(x) = -P - log(|x|) - log(sin(Pi*x)/(Pi*x)) 112// P and log(|x|) are computed by the same way as in 1.1; 113// - log(sin(Pi*x)/(Pi*x)) is approximated by a polynomial Plnsin. 114// Plnsin:= fLnSin2*|x|^2 + fLnSin4*|x|^4 + ... + fLnSin36*|x|^36 115// The first coefficient of Plnsin is represented as sum of two 116// double precision numbers (fLnSin2, fLnSin2L). Multi-precision 117// computations for higher two terms of Plnsin are used. 118// So, the final result is reconstructed by the following formula 119// lgammal(x) = (-(Phi, Plo) - (lnHi(|x|), lnLo(|x|))) - 120// - (PlnsinHi,PlnsinLo) 121// 122// 2) 0.5 <= x < 0.75 -> t = x - 0.625 123// -0.75 < x <= -0.5 -> t = x + 0.625 124// 2.25 <= x < 4.0 -> t = x/2 - 1.5 125// 4.0 <= x < 8.0 -> t = x/4 - 1.5 126// -0.5 < x <= -0.40625 -> t = x + 0.5 127// -2.6005859375 < x <= -2.5 -> t = x + 2.5 128// 1.3125 <= x < 1.5625 -> t = x - LOC_MIN, where LOC_MIN is point in 129// which lgammal has local minimum. Exact 130// value can be found in the table below, 131// approximate value is ~1.46 132// 133// lgammal(x) is approximated by the polynomial of 25th degree: P25(t) 134// P25(t) = A0 + A1*t + ... + A25*t^25 = (Phi, Plo) + t^4*P21(t), 135// where 136// (Phi, Plo) is sum of four highest terms of the polynomial P25(t): 137// (Phi, Plo) = ((A0, A0L) + (A1, A1L)*t) + t^2 *((A2, A2L) + (A3, A3L)*t), 138// (Ai, AiL) - coefficients represented as pairs of DP numbers. 139// 140// P21(t) = (PolC(t)*t^8 + PolD(t))*t^8 + PolE(t), 141// where 142// PolC(t) = C21*t^5 + C20*t^4 + ... + C16, 143// C21 = A25, C20 = A24, ..., C16 = A20 144// 145// PolD(t) = D7*t^7 + D6*t^6 + ... + D0, 146// D7 = A19, D6 = A18, ..., D0 = A12 147// 148// PolE(t) = E7*t^7 + E6*t^6 + ... + E0, 149// E7 = A11, E6 = A10, ..., E0 = A4 150// 151// Cis and Dis are represented as double precision numbers, 152// Eis are represented as double extended numbers. 153// 154// 3) 0.75 <= x < 1.3125 -> t = x - 1.0 155// 1.5625 <= x < 2.25 -> t = x - 2.0 156// lgammal(x) is approximated by the polynomial of 25th degree: P25(t) 157// P25(t) = A1*t + ... + A25*t^25, and computations are carried out 158// by similar way as in the previous case 159// 160// 4) 10.0 < x <= Overflow Bound ("positive Sterling" range) 161// lgammal(x) is approximated using Sterling's formula: 162// lgammal(x) ~ ((x*(lnHi(x) - 1, lnLo(x))) - 0.5*(lnHi(x), lnLo(x))) + 163// + ((Chi, Clo) + S(1/x)) 164// where 165// C = (Chi, Clo) - pair of double precision numbers representing constant 166// 0.5*ln(2*Pi); 167// S(1/x) = 1/x * (B2 + B4*(1/x)^2 + ... + B20*(1/x)^18), B2, ..., B20 are 168// Bernulli numbers. S is computed in native precision and then added to 169// Clo; 170// lnHi(x) - 1 is computed in native precision and the multiprecision 171// multiplication (x, 0) *(lnHi(x) - 1, lnLo(x)) is used. 172// 173// 5) -INF < x <= -2^63, any negative integer < 0 174// All numbers in this range are integers -> error handler is called 175// 176// 6) -2^63 < x <= -0.75 ("negative Sterling" range), x is "far" from root, 177// lgammal(-t) for positive t is approximated using the following formula: 178// lgammal(-t) = -lgammal(t)-log(t)-log(|dT|)+log(sin(Pi*|dT|)/(Pi*|dT|)) 179// where dT = -t -round_to_nearest_integer(-t) 180// Last item is approximated by the same polynomial as described in 1.2. 181// We split the whole range into three subranges due to different ways of 182// approximation of the first terms. 183// 6.1) -2^63 < x < -6.0 ("negative Sterling" range) 184// lgammal(t) is approximated exactly as in #4. The only difference that 185// for -13.0 < x < -6.0 subrange instead of Bernulli numbers we use their 186// minimax approximation on this range. 187// log(t), log(|dT|) are approximated by the log routine mentioned above. 188// 6.2) -6.0 < x <= -0.75, |x + 1|> 2^(-7) 189// log(t), log(|dT|) are approximated by the log routine mentioned above, 190// lgammal(t) is approximated by polynomials of the 25th degree similar 191// to ones from #2. Arguments z of the polynomials are as follows 192// a) 0.75 <= t < 1.0 - 2^(-7), z = 2*t - 1.5 193// b) 1.0 - 2^(-7) < t < 2.0, z = t - 1.5 194// c) 2.0 < t < 3.0, z = t/2 - 1.5 195// d) 3.0 < t < 4.0, z = t/2 - 1.5. Notice, that range reduction is 196// the same as in case c) but the set of coefficients is different 197// e) 4.0 < t < 6.0, z = t/4 - 1.5 198// 6.3) |x + 1| <= 2^(-7) 199// log(1 + (x-1)) is approximated by Taylor series, 200// log(sin(Pi*|dT|)/(Pi*|dT|)) is still approximated by polynomial but 201// it has just 4th degree. 202// log(|dT|) is approximated by the log routine mentioned above. 203// lgammal(-x) is approximated by polynomial of 8th degree from (-x + 1). 204// 205// 7) -20.0 < x < -2.0, x falls in root "neighbourhood". 206// "Neighbourhood" means that |lgammal(x)| < epsilon, where epsilon is 207// different for every root (and it is stored in the table), but typically 208// it is ~ 0.15. There are 35 roots significant from "double extended" 209// point of view. We split all the roots into two subsets: "left" and "right" 210// roots. Considering [-(N+1), -N] range we call root as "left" one if it 211// lies closer to -(N+1) and "right" otherwise. There is no "left" root in 212// the [-20, -19] range (it exists, but is insignificant for double extended 213// precision). To determine if x falls in root "neighbourhood" we store 214// significands of all the 35 roots as well as epsilon values (expressed 215// by the left and right bound). 216// In these ranges we approximate lgammal(x) by polynomial series of 19th 217// degree: 218// lgammal(x) = P19(t) = A0 + A1*t + ...+ A19*t^19, where t = x - EDP_Root, 219// EDP_Root is the exact value of the corresponding root rounded to double 220// extended precision. So, we have 35 different polynomials which make our 221// table rather big. We may hope that x falls in root "neighbourhood" 222// quite rarely -> ther might be no need in frequent use of different 223// polynomials. 224// A0, A1, A2, A3 are represented as pairs of double precision numbers, 225// A4, A5 are long doubles, and to decrease the size of the table we 226// keep the rest of coefficients in just double precision 227// 228//********************************************************************* 229// Algorithm for log(X) = (lnHi(X), lnLo(X)) 230// 231// ALGORITHM 232// 233// Here we use a table lookup method. The basic idea is that in 234// order to compute logl(Arg) for an argument Arg in [1,2), we 235// construct a value G such that G*Arg is close to 1 and that 236// logl(1/G) is obtainable easily from a table of values calculated 237// beforehand. Thus 238// 239// logl(Arg) = logl(1/G) + logl(G*Arg) 240// = logl(1/G) + logl(1 + (G*Arg - 1)) 241// 242// Because |G*Arg - 1| is small, the second term on the right hand 243// side can be approximated by a short polynomial. We elaborate 244// this method in four steps. 245// 246// Step 0: Initialization 247// 248// We need to calculate logl( X ). Obtain N, S_hi such that 249// 250// X = 2^N * S_hi exactly 251// 252// where S_hi in [1,2) 253// 254// Step 1: Argument Reduction 255// 256// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate 257// 258// G := G_1 * G_2 * G_3 259// r := (G * S_hi - 1) 260// 261// These G_j's have the property that the product is exactly 262// representable and that |r| < 2^(-12) as a result. 263// 264// Step 2: Approximation 265// 266// 267// logl(1 + r) is approximated by a short polynomial poly(r). 268// 269// Step 3: Reconstruction 270// 271// 272// Finally, logl( X ) is given by 273// 274// logl( X ) = logl( 2^N * S_hi ) 275// ~=~ N*logl(2) + logl(1/G) + logl(1 + r) 276// ~=~ N*logl(2) + logl(1/G) + poly(r). 277// 278// IMPLEMENTATION 279// 280// Step 0. Initialization 281// ---------------------- 282// 283// Z := X 284// N := unbaised exponent of Z 285// S_hi := 2^(-N) * Z 286// 287// Step 1. Argument Reduction 288// -------------------------- 289// 290// Let 291// 292// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63 293// 294// We obtain G_1, G_2, G_3 by the following steps. 295// 296// 297// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted 298// from S_hi. 299// 300// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated 301// to lsb = 2^(-4). 302// 303// Define index_1 := [ d_1 d_2 d_3 d_4 ]. 304// 305// Fetch Z_1 := (1/A_1) rounded UP in fixed point with 306// fixed point lsb = 2^(-15). 307// Z_1 looks like z_0.z_1 z_2 ... z_15 308// Note that the fetching is done using index_1. 309// A_1 is actually not needed in the implementation 310// and is used here only to explain how is the value 311// Z_1 defined. 312// 313// Fetch G_1 := (1/A_1) truncated to 21 sig. bits. 314// floating pt. Again, fetching is done using index_1. A_1 315// explains how G_1 is defined. 316// 317// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14) 318// = 1.0 0 0 0 d_5 ... d_14 319// This is accomplished by integer multiplication. 320// It is proved that X_1 indeed always begin 321// with 1.0000 in fixed point. 322// 323// 324// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 325// truncated to lsb = 2^(-8). Similar to A_1, 326// A_2 is not needed in actual implementation. It 327// helps explain how some of the values are defined. 328// 329// Define index_2 := [ d_5 d_6 d_7 d_8 ]. 330// 331// Fetch Z_2 := (1/A_2) rounded UP in fixed point with 332// fixed point lsb = 2^(-15). Fetch done using index_2. 333// Z_2 looks like z_0.z_1 z_2 ... z_15 334// 335// Fetch G_2 := (1/A_2) truncated to 21 sig. bits. 336// floating pt. 337// 338// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14) 339// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14 340// This is accomplished by integer multiplication. 341// It is proved that X_2 indeed always begin 342// with 1.00000000 in fixed point. 343// 344// 345// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1. 346// This is 2^(-14) + X_2 truncated to lsb = 2^(-13). 347// 348// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ]. 349// 350// Fetch G_3 := (1/A_3) truncated to 21 sig. bits. 351// floating pt. Fetch is done using index_3. 352// 353// Compute G := G_1 * G_2 * G_3. 354// 355// This is done exactly since each of G_j only has 21 sig. bits. 356// 357// Compute 358// 359// r := (G*S_hi - 1) 360// 361// 362// Step 2. Approximation 363// --------------------- 364// 365// This step computes an approximation to logl( 1 + r ) where r is the 366// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13); 367// thus logl(1+r) can be approximated by a short polynomial: 368// 369// logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5 370// 371// 372// Step 3. Reconstruction 373// ---------------------- 374// 375// This step computes the desired result of logl(X): 376// 377// logl(X) = logl( 2^N * S_hi ) 378// = N*logl(2) + logl( S_hi ) 379// = N*logl(2) + logl(1/G) + 380// logl(1 + G*S_hi - 1 ) 381// 382// logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers: 383// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are 384// single-precision numbers and the low parts are double precision 385// numbers. These have the property that 386// 387// N*log2_hi + SUM ( log1byGj_hi ) 388// 389// is computable exactly in double-extended precision (64 sig. bits). 390// Finally 391// 392// lnHi(X) := N*log2_hi + SUM ( log1byGj_hi ) 393// lnLo(X) := poly_hi + [ poly_lo + 394// ( SUM ( log1byGj_lo ) + N*log2_lo ) ] 395// 396// 397//********************************************************************* 398// General Purpose Registers 399// scratch registers 400rPolDataPtr = r2 401rLnSinDataPtr = r3 402rExpX = r8 403rSignifX = r9 404rDelta = r10 405rSignExpX = r11 406GR_ad_z_1 = r14 407r17Ones = r15 408GR_Index1 = r16 409rSignif1andQ = r17 410GR_X_0 = r18 411GR_X_1 = r19 412GR_X_2 = r20 413GR_Z_1 = r21 414GR_Z_2 = r22 415GR_N = r23 416rExpHalf = r24 417rExp8 = r25 418rX0Dx = r25 419GR_ad_tbl_1 = r26 420GR_ad_tbl_2 = r27 421GR_ad_tbl_3 = r28 422GR_ad_q = r29 423GR_ad_z_1 = r30 424GR_ad_z_2 = r31 425// stacked registers 426rPFS_SAVED = r32 427GR_ad_z_3 = r33 428rSgnGamAddr = r34 429rSgnGamSize = r35 430rLogDataPtr = r36 431rZ1offsett = r37 432rTmpPtr = r38 433rTmpPtr2 = r39 434rTmpPtr3 = r40 435rExp2 = r41 436rExp2tom7 = r42 437rZ625 = r42 438rExpOne = r43 439rNegSingularity = r44 440rXint = r45 441rTbl1Addr = r46 442rTbl2Addr = r47 443rTbl3Addr = r48 444rZ2Addr = r49 445rRootsAddr = r50 446rRootsBndAddr = r51 447rRoot = r52 448rRightBound = r53 449rLeftBound = r54 450rSignifDx = r55 451rBernulliPtr = r56 452rLnSinTmpPtr = r56 453rIndex1Dx = r57 454rIndexPol = r58 455GR_Index3 = r59 456GR_Index2 = r60 457rSgnGam = r61 458rXRnd = r62 459 460GR_SAVE_B0 = r63 461GR_SAVE_GP = r64 462GR_SAVE_PFS = r65 463// output parameters when calling error handling routine 464GR_Parameter_X = r66 465GR_Parameter_Y = r67 466GR_Parameter_RESULT = r68 467GR_Parameter_TAG = r69 468 469//******************************************************************** 470// Floating Point Registers 471// CAUTION: due to the lack of registers there exist (below in the code) 472// sometimes "unconventional" use of declared registers 473// 474fAbsX = f6 475fDelX4 = f6 476fSignifX = f7 477// macros for error handling routine 478FR_X = f10 // first argument 479FR_Y = f1 // second argument (lgammal has just one) 480FR_RESULT = f8 // result 481 482// First 7 Bernulli numbers 483fB2 = f9 484fLnDeltaL = f9 485fXSqr = f9 486fB4 = f10 487fX4 = f10 488fB6 = f11 489fX6 = f11 490fB8 = f12 491fXSqrL = f12 492fB10 = f13 493fRes7H = f13 494fB12 = f14 495fRes7L = f14 496fB14 = f15 497 498// stack registers 499// Polynomial coefficients: A0, ..., A25 500fA0 = f32 501fA0L = f33 502fInvXL = f33 503fA1 = f34 504fA1L = f35 505fA2 = f36 506fA2L = f37 507fA3 = f38 508fA3L = f39 509fA4 = f40 510fA4L = f41 511fRes6H = f41 512fA5 = f42 513fB2L = f42 514fA5L = f43 515fMinNegStir = f43 516fRes6L = f43 517fA6 = f44 518fMaxNegStir = f44 519fA7 = f45 520fLnDeltaH = f45 521fA8 = f46 522fBrnL = f46 523fA9 = f47 524fBrnH = f47 525fA10 = f48 526fRes5L = f48 527fA11 = f49 528fRes5H = f49 529fA12 = f50 530fDx6 = f50 531fA13 = f51 532fDx8 = f51 533fA14 = f52 534fDx4 = f52 535fA15 = f53 536fYL = f53 537fh3Dx = f53 538fA16 = f54 539fYH = f54 540fH3Dx = f54 541fA17 = f55 542fResLnDxL = f55 543fG3Dx = f55 544fA18 = f56 545fResLnDxH = f56 546fh2Dx = f56 547fA19 = f57 548fFloatNDx = f57 549fA20 = f58 550fPolyHiDx = f58 551fhDx = f58 552fA21 = f59 553fRDxCub = f59 554fHDx = f59 555fA22 = f60 556fRDxSq = f60 557fGDx = f60 558fA23 = f61 559fPolyLoDx = f61 560fInvX3 = f61 561fA24 = f62 562fRDx = f62 563fInvX8 = f62 564fA25 = f63 565fInvX4 = f63 566fPol = f64 567fPolL = f65 568// Coefficients of ln(sin(Pi*x)/Pi*x) 569fLnSin2 = f66 570fLnSin2L = f67 571fLnSin4 = f68 572fLnSin6 = f69 573fLnSin8 = f70 574fLnSin10 = f71 575fLnSin12 = f72 576fLnSin14 = f73 577fLnSin16 = f74 578fLnSin18 = f75 579fDelX8 = f75 580fLnSin20 = f76 581fLnSin22 = f77 582fDelX6 = f77 583fLnSin24 = f78 584fLnSin26 = f79 585fLnSin28 = f80 586fLnSin30 = f81 587fhDelX = f81 588fLnSin32 = f82 589fLnSin34 = f83 590fLnSin36 = f84 591fXint = f85 592fDxSqr = f85 593fRes3L = f86 594fRes3H = f87 595fRes4H = f88 596fRes4L = f89 597fResH = f90 598fResL = f91 599fDx = f92 600FR_MHalf = f93 601fRes1H = f94 602fRes1L = f95 603fRes2H = f96 604fRes2L = f97 605FR_FracX = f98 606fRcpX = f99 607fLnSinH = f99 608fTwo = f100 609fMOne = f100 610FR_G = f101 611FR_H = f102 612FR_h = f103 613FR_G2 = f104 614FR_H2 = f105 615FR_poly_lo = f106 616FR_poly_hi = f107 617FR_h2 = f108 618FR_rsq = f109 619FR_r = f110 620FR_log2_hi = f111 621FR_log2_lo = f112 622fFloatN = f113 623FR_Q4 = f114 624FR_G3 = f115 625FR_H3 = f116 626FR_h3 = f117 627FR_Q3 = f118 628FR_Q2 = f119 629FR_Q1 = f120 630fThirteen = f121 631fSix = f121 632FR_rcub = f121 633// Last three Bernulli numbers 634fB16 = f122 635fB18 = f123 636fB20 = f124 637fInvX = f125 638fLnSinL = f125 639fDxSqrL = f126 640fFltIntX = f126 641fRoot = f127 642fNormDx = f127 643 644// Data tables 645//============================================================== 646RODATA 647// ************* DO NOT CHANGE THE ORDER OF THESE TABLES ************* 648.align 16 649LOCAL_OBJECT_START(lgammal_right_roots_data) 650// List of all right roots themselves 651data8 0x9D3FE4B007C360AB, 0x0000C000 // Range [-3, -2] 652data8 0xC9306DE4F2CD7BEE, 0x0000C000 // Range [-4, -3] 653data8 0x814273C2CCAC0618, 0x0000C001 // Range [-5, -4] 654data8 0xA04352BF85B6C865, 0x0000C001 // Range [-6, -5] 655data8 0xC00B592C4BE4676C, 0x0000C001 // Range [-7, -6] 656data8 0xE0019FEF6FF0F5BF, 0x0000C001 // Range [-8, -7] 657data8 0x80001A01459FC9F6, 0x0000C002 // Range [-9, -8] 658data8 0x900002E3BB47D86D, 0x0000C002 // Range [-10, -9] 659data8 0xA0000049F93BB992, 0x0000C002 // Range [-11, -10] 660data8 0xB0000006B9915316, 0x0000C002 // Range [-12, -11] 661data8 0xC00000008F76C773, 0x0000C002 // Range [-13, -12] 662data8 0xD00000000B09230A, 0x0000C002 // Range [-14, -13] 663data8 0xE000000000C9CBA5, 0x0000C002 // Range [-15, -14] 664data8 0xF0000000000D73FA, 0x0000C002 // Range [-16, -15] 665data8 0x8000000000006BA0, 0x0000C003 // Range [-17, -16] 666data8 0x8800000000000655, 0x0000C003 // Range [-18, -17] 667data8 0x900000000000005A, 0x0000C003 // Range [-19, -18] 668data8 0x9800000000000005, 0x0000C003 // Range [-20, -19] 669// List of bounds of ranges with special polynomial approximation near root 670// Only significands of bounds are actually stored 671data8 0xA000000000000000, 0x9800000000000000 // Bounds for root on [-3, -2] 672data8 0xCAB88035C5EFBB41, 0xC7E05E31F4B02115 // Bounds for root on [-4, -3] 673data8 0x817831B899735C72, 0x8114633941B8053A // Bounds for root on [-5, -4] 674data8 0xA04E8B34C6AA9476, 0xA039B4A42978197B // Bounds for root on [-6, -5] 675data8 0xC00D3D5E588A78A9, 0xC009BA25F7E858A6 // Bounds for root on [-7, -6] 676data8 0xE001E54202991EB4, 0xE001648416CE897F // Bounds for root on [-8, -7] 677data8 0x80001E56D13A6B9F, 0x8000164A3BAD888A // Bounds for root on [-9, -8] 678data8 0x9000035F0529272A, 0x9000027A0E3D94F0 // Bounds for root on [-10, -9] 679data8 0xA00000564D705880, 0xA000003F67EA0CC7 // Bounds for root on [-11, -10] 680data8 0xB0000007D87EE0EF, 0xB0000005C3A122A5 // Bounds for root on [-12, -11] 681data8 0xC0000000A75FE8B1, 0xC00000007AF818AC // Bounds for root on [-13, -12] 682data8 0xD00000000CDFFE36, 0xD000000009758BBF // Bounds for root on [-14, -13] 683data8 0xE000000000EB6D96, 0xE000000000ACF7B2 // Bounds for root on [-15, -14] 684data8 0xF0000000000FB1F9, 0xF0000000000B87FB // Bounds for root on [-16, -15] 685data8 0x8000000000007D90, 0x8000000000005C40 // Bounds for root on [-17, -16] 686data8 0x8800000000000763, 0x880000000000056D // Bounds for root on [-18, -17] 687data8 0x9000000000000069, 0x900000000000004D // Bounds for root on [-19, -18] 688data8 0x9800000000000006, 0x9800000000000005 // Bounds for root on [-20, -19] 689// List of all left roots themselves 690data8 0xAFDA0850DEC8065E, 0x0000C000 // Range [-3, -2] 691data8 0xFD238AA3E17F285C, 0x0000C000 // Range [-4, -3] 692data8 0x9FBABBD37757E6A2, 0x0000C001 // Range [-5, -4] 693data8 0xBFF497AC8FA06AFC, 0x0000C001 // Range [-6, -5] 694data8 0xDFFE5FBB5C377FE8, 0x0000C001 // Range [-7, -6] 695data8 0xFFFFCBFC0ACE7879, 0x0000C001 // Range [-8, -7] 696data8 0x8FFFFD1C425E8100, 0x0000C002 // Range [-9, -8] 697data8 0x9FFFFFB606BDFDCD, 0x0000C002 // Range [-10, -9] 698data8 0xAFFFFFF9466E9F1B, 0x0000C002 // Range [-11, -10] 699data8 0xBFFFFFFF70893874, 0x0000C002 // Range [-12, -11] 700data8 0xCFFFFFFFF4F6DCF6, 0x0000C002 // Range [-13, -12] 701data8 0xDFFFFFFFFF36345B, 0x0000C002 // Range [-14, -13] 702data8 0xEFFFFFFFFFF28C06, 0x0000C002 // Range [-15, -14] 703data8 0xFFFFFFFFFFFF28C0, 0x0000C002 // Range [-16, -15] 704data8 0x87FFFFFFFFFFF9AB, 0x0000C003 // Range [-17, -16] 705data8 0x8FFFFFFFFFFFFFA6, 0x0000C003 // Range [-18, -17] 706data8 0x97FFFFFFFFFFFFFB, 0x0000C003 // Range [-19, -18] 707data8 0x0000000000000000, 0x00000000 // pad to keep logic in the main path 708// List of bounds of ranges with special polynomial approximation near root 709// Only significands of bounds are actually stored 710data8 0xB235880944CC758E, 0xADD2F1A9FBE76C8B // Bounds for root on [-3, -2] 711data8 0xFD8E7844F307B07C, 0xFCA655C2152BDE4D // Bounds for root on [-4, -3] 712data8 0x9FC4D876EE546967, 0x9FAEE4AF68BC4292 // Bounds for root on [-5, -4] 713data8 0xBFF641FFBFCC44F1, 0xBFF2A47919F4BA89 // Bounds for root on [-6, -5] 714data8 0xDFFE9C803DEFDD59, 0xDFFE18932EB723FE // Bounds for root on [-7, -6] 715data8 0xFFFFD393FA47AFC3, 0xFFFFC317CF638AE1 // Bounds for root on [-8, -7] 716data8 0x8FFFFD8840279925, 0x8FFFFC9DCECEEE92 // Bounds for root on [-9, -8] 717data8 0x9FFFFFC0D34E2AF8, 0x9FFFFFA9619AA3B7 // Bounds for root on [-10, -9] 718data8 0xAFFFFFFA41C18246, 0xAFFFFFF82025A23C // Bounds for root on [-11, -10] 719data8 0xBFFFFFFF857ACB4E, 0xBFFFFFFF58032378 // Bounds for root on [-12, -11] 720data8 0xCFFFFFFFF6934AB8, 0xCFFFFFFFF313EF0A // Bounds for root on [-13, -12] 721data8 0xDFFFFFFFFF53A9E9, 0xDFFFFFFFFF13B5A5 // Bounds for root on [-14, -13] 722data8 0xEFFFFFFFFFF482CB, 0xEFFFFFFFFFF03F4F // Bounds for root on [-15, -14] 723data8 0xFFFFFFFFFFFF482D, 0xFFFFFFFFFFFF03F5 // Bounds for root on [-16, -15] 724data8 0x87FFFFFFFFFFFA98, 0x87FFFFFFFFFFF896 // Bounds for root on [-17, -16] 725data8 0x8FFFFFFFFFFFFFB3, 0x8FFFFFFFFFFFFF97 // Bounds for root on [-18, -17] 726data8 0x97FFFFFFFFFFFFFC, 0x97FFFFFFFFFFFFFB // Bounds for root on [-19, -18] 727LOCAL_OBJECT_END(lgammal_right_roots_data) 728 729LOCAL_OBJECT_START(lgammal_0_Half_data) 730// Polynomial coefficients for the lgammal(x), 0.0 < |x| < 0.5 731data8 0xBFD9A4D55BEAB2D6, 0xBC8AA3C097746D1F //A3 732data8 0x3FEA51A6625307D3, 0x3C7180E7BD2D0DCC //A2 733data8 0xBFE2788CFC6FB618, 0xBC9E9346C4692BCC //A1 734data8 0x8A8991563EC1BD13, 0x00003FFD //A4 735data8 0xD45CE0BD52C27EF2, 0x0000BFFC //A5 736data8 0xADA06587FA2BBD47, 0x00003FFC //A6 737data8 0x9381D0ED2194902A, 0x0000BFFC //A7 738data8 0x80859B3CF92D4192, 0x00003FFC //A8 739data8 0xE4033517C622A946, 0x0000BFFB //A9 740data8 0xCD00CE67A51FC82A, 0x00003FFB //A10 741data8 0xBA44E2A96C3B5700, 0x0000BFFB //A11 742data8 0xAAAD008FA46DBD99, 0x00003FFB //A12 743data8 0x9D604AC65A41153D, 0x0000BFFB //A13 744data8 0x917CECB864B5A861, 0x00003FFB //A14 745data8 0x85A4810EB730FDE4, 0x0000BFFB //A15 746data8 0xEF2761C38BD21F77, 0x00003FFA //A16 747data8 0xC913043A128367DA, 0x0000BFFA //A17 748data8 0x96A29B71FF7AFFAA, 0x00003FFA //A18 749data8 0xBB9FFA1A5FE649BB, 0x0000BFF9 //A19 750data8 0xB17982CD2DAA0EE3, 0x00003FF8 //A20 751data8 0xDE1DDCBFFB9453F0, 0x0000BFF6 //A21 752data8 0x87FBF5D7ACD9FA9D, 0x00003FF4 //A22 753LOCAL_OBJECT_END(lgammal_0_Half_data) 754 755LOCAL_OBJECT_START(Constants_Q) 756// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 757data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 758data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 759data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000 760data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000 761data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000 762data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 763LOCAL_OBJECT_END(Constants_Q) 764 765LOCAL_OBJECT_START(Constants_Z_1) 766// Z1 - 16 bit fixed 767data4 0x00008000 768data4 0x00007879 769data4 0x000071C8 770data4 0x00006BCB 771data4 0x00006667 772data4 0x00006187 773data4 0x00005D18 774data4 0x0000590C 775data4 0x00005556 776data4 0x000051EC 777data4 0x00004EC5 778data4 0x00004BDB 779data4 0x00004925 780data4 0x0000469F 781data4 0x00004445 782data4 0x00004211 783LOCAL_OBJECT_END(Constants_Z_1) 784 785LOCAL_OBJECT_START(Constants_G_H_h1) 786// G1 and H1 - IEEE single and h1 - IEEE double 787data4 0x3F800000,0x00000000,0x00000000,0x00000000 788data4 0x3F70F0F0,0x3D785196,0x617D741C,0x3DA163A6 789data4 0x3F638E38,0x3DF13843,0xCBD3D5BB,0x3E2C55E6 790data4 0x3F579430,0x3E2FF9A0,0xD86EA5E7,0xBE3EB0BF 791data4 0x3F4CCCC8,0x3E647FD6,0x86B12760,0x3E2E6A8C 792data4 0x3F430C30,0x3E8B3AE7,0x5C0739BA,0x3E47574C 793data4 0x3F3A2E88,0x3EA30C68,0x13E8AF2F,0x3E20E30F 794data4 0x3F321640,0x3EB9CEC8,0xF2C630BD,0xBE42885B 795data4 0x3F2AAAA8,0x3ECF9927,0x97E577C6,0x3E497F34 796data4 0x3F23D708,0x3EE47FC5,0xA6B0A5AB,0x3E3E6A6E 797data4 0x3F1D89D8,0x3EF8947D,0xD328D9BE,0xBDF43E3C 798data4 0x3F17B420,0x3F05F3A1,0x0ADB090A,0x3E4094C3 799data4 0x3F124920,0x3F0F4303,0xFC1FE510,0xBE28FBB2 800data4 0x3F0D3DC8,0x3F183EBF,0x10FDE3FA,0x3E3A7895 801data4 0x3F088888,0x3F20EC80,0x7CC8C98F,0x3E508CE5 802data4 0x3F042108,0x3F29516A,0xA223106C,0xBE534874 803LOCAL_OBJECT_END(Constants_G_H_h1) 804 805LOCAL_OBJECT_START(Constants_Z_2) 806// Z2 - 16 bit fixed 807data4 0x00008000 808data4 0x00007F81 809data4 0x00007F02 810data4 0x00007E85 811data4 0x00007E08 812data4 0x00007D8D 813data4 0x00007D12 814data4 0x00007C98 815data4 0x00007C20 816data4 0x00007BA8 817data4 0x00007B31 818data4 0x00007ABB 819data4 0x00007A45 820data4 0x000079D1 821data4 0x0000795D 822data4 0x000078EB 823LOCAL_OBJECT_END(Constants_Z_2) 824 825LOCAL_OBJECT_START(Constants_G_H_h2) 826// G2 and H2 - IEEE single and h2 - IEEE double 827data4 0x3F800000,0x00000000,0x00000000,0x00000000 828data4 0x3F7F00F8,0x3B7F875D,0x22C42273,0x3DB5A116 829data4 0x3F7E03F8,0x3BFF015B,0x21F86ED3,0x3DE620CF 830data4 0x3F7D08E0,0x3C3EE393,0x484F34ED,0xBDAFA07E 831data4 0x3F7C0FC0,0x3C7E0586,0x3860BCF6,0xBDFE07F0 832data4 0x3F7B1880,0x3C9E75D2,0xA78093D6,0x3DEA370F 833data4 0x3F7A2328,0x3CBDC97A,0x72A753D0,0x3DFF5791 834data4 0x3F792FB0,0x3CDCFE47,0xA7EF896B,0x3DFEBE6C 835data4 0x3F783E08,0x3CFC15D0,0x409ECB43,0x3E0CF156 836data4 0x3F774E38,0x3D0D874D,0xFFEF71DF,0xBE0B6F97 837data4 0x3F766038,0x3D1CF49B,0x5D59EEE8,0xBE080483 838data4 0x3F757400,0x3D2C531D,0xA9192A74,0x3E1F91E9 839data4 0x3F748988,0x3D3BA322,0xBF72A8CD,0xBE139A06 840data4 0x3F73A0D0,0x3D4AE46F,0xF8FBA6CF,0x3E1D9202 841data4 0x3F72B9D0,0x3D5A1756,0xBA796223,0xBE1DCCC4 842data4 0x3F71D488,0x3D693B9D,0xB6B7C239,0xBE049391 843LOCAL_OBJECT_END(Constants_G_H_h2) 844 845LOCAL_OBJECT_START(Constants_G_H_h3) 846// G3 and H3 - IEEE single and h3 - IEEE double 847data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595 848data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2 849data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D 850data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291 851data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8 852data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707 853data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9 854data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47 855data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E 856data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D 857data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441 858data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95 859data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC 860data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337 861data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B 862data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B 863data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21 864data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4 865data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070 866data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC 867data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83 868data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40 869data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7 870data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B 871data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E 872data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06 873data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1 874data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103 875data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B 876data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19 877data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502 878data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17 879LOCAL_OBJECT_END(Constants_G_H_h3) 880 881LOCAL_OBJECT_START(lgammal_data) 882// Positive overflow value 883data8 0xB8D54C8BFFFDEBF4, 0x00007FF1 884LOCAL_OBJECT_END(lgammal_data) 885 886LOCAL_OBJECT_START(lgammal_Stirling) 887// Coefficients needed for Strirling's formula 888data8 0x3FED67F1C864BEB4 // High part of 0.5*ln(2*Pi) 889data8 0x3C94D252F2400510 // Low part of 0.5*ln(2*Pi) 890// 891// Bernulli numbers used in Striling's formula for -2^63 < |x| < -13.0 892//(B1H, B1L) = 8.3333333333333333333262747254e-02 893data8 0x3FB5555555555555, 0x3C55555555555555 894data8 0xB60B60B60B60B60B, 0x0000BFF6 //B2 = -2.7777777777777777777777777778e-03 895data8 0xD00D00D00D00D00D, 0x00003FF4 //B3 = 7.9365079365079365079365079365e-04 896data8 0x9C09C09C09C09C0A, 0x0000BFF4 //B4 = -5.9523809523809523809523809524e-04 897data8 0xDCA8F158C7F91AB8, 0x00003FF4 //B5 = 8.4175084175084175084175084175e-04 898data8 0xFB5586CCC9E3E410, 0x0000BFF5 //B6 = -1.9175269175269175269175269175e-03 899data8 0xD20D20D20D20D20D, 0x00003FF7 //B7 = 6.4102564102564102564102564103e-03 900data8 0xF21436587A9CBEE1, 0x0000BFF9 //B8 = -2.9550653594771241830065359477e-02 901data8 0xB7F4B1C0F033FFD1, 0x00003FFC //B9 = 1.7964437236883057316493849002e-01 902data8 0xB23B3808C0F9CF6E, 0x0000BFFF //B10 = -1.3924322169059011164274322169e+00 903// Polynomial coefficients for Stirling's formula, -13.0 < x < -6.0 904data8 0x3FB5555555555555, 0x3C4D75060289C58B //A0 905data8 0xB60B60B60B0F0876, 0x0000BFF6 //A1 906data8 0xD00D00CE54B1256C, 0x00003FF4 //A2 907data8 0x9C09BF46B58F75E1, 0x0000BFF4 //A3 908data8 0xDCA8483BC91ACC6D, 0x00003FF4 //A4 909data8 0xFB3965C939CC9FEE, 0x0000BFF5 //A5 910data8 0xD0723ADE3F0BC401, 0x00003FF7 //A6 911data8 0xE1ED7434E81F0B73, 0x0000BFF9 //A7 912data8 0x8069C6982F993283, 0x00003FFC //A8 913data8 0xC271F65BFA5BEE3F, 0x0000BFFD //A9 914LOCAL_OBJECT_END(lgammal_Stirling) 915 916LOCAL_OBJECT_START(lgammal_lnsin_data) 917// polynomial approximation of -ln(sin(Pi*x)/(Pi*x)), 0 < x <= 0.5 918data8 0x3FFA51A6625307D3, 0x3C81873332FAF94C //A2 919data8 0x8A8991563EC241C3, 0x00003FFE //A4 920data8 0xADA06588061805DF, 0x00003FFD //A6 921data8 0x80859B57C338D0F7, 0x00003FFD //A8 922data8 0xCD00F1C2D78754BD, 0x00003FFC //A10 923data8 0xAAB56B1D3A1F4655, 0x00003FFC //A12 924data8 0x924B6F2FBBED12B1, 0x00003FFC //A14 925data8 0x80008E58765F43FC, 0x00003FFC //A16 926data8 0x3FBC718EC115E429//A18 927data8 0x3FB99CE544FE183E//A20 928data8 0x3FB7251C09EAAD89//A22 929data8 0x3FB64A970733628C//A24 930data8 0x3FAC92D6802A3498//A26 931data8 0x3FC47E1165261586//A28 932data8 0xBFCA1BAA434750D4//A30 933data8 0x3FE460001C4D5961//A32 934data8 0xBFE6F06A3E4908AD//A34 935data8 0x3FE300889EBB203A//A36 936LOCAL_OBJECT_END(lgammal_lnsin_data) 937 938LOCAL_OBJECT_START(lgammal_half_3Q_data) 939// Polynomial coefficients for the lgammal(x), 0.5 <= x < 0.75 940data8 0xBFF7A648EE90C62E, 0x3C713F326857E066 // A3, A0L 941data8 0xBFF73E4B8BA780AE, 0xBCA953BC788877EF // A1, A1L 942data8 0x403774DCD58D0291, 0xC0415254D5AE6623 // D0, D1 943data8 0x40B07213855CBFB0, 0xC0B8855E25D2D229 // C20, C21 944data8 0x3FFB359F85FF5000, 0x3C9BAECE6EF9EF3A // A2, A2L 945data8 0x3FD717D498A3A8CC, 0xBC9088E101CFEDFA // A0, A3L 946data8 0xAFEF36CC5AEC3FF0, 0x00004002 // E6 947data8 0xABE2054E1C34E791, 0x00004001 // E4 948data8 0xB39343637B2900D1, 0x00004000 // E2 949data8 0xD74FB710D53F58F6, 0x00003FFF // E0 950data8 0x4070655963BA4256, 0xC078DA9D263C4EA3 // D6, D7 951data8 0x405CD2B6A9B90978, 0xC065B3B9F4F4F171 // D4, D5 952data8 0x4049BC2204CF61FF, 0xC05337227E0BA152 // D2, D3 953data8 0x4095509A50C07A96, 0xC0A0747949D2FB45 // C18, C19 954data8 0x4082ECCBAD709414, 0xC08CD02FB088A702 // C16, C17 955data8 0xFFE4B2A61B508DD5, 0x0000C002 // E7 956data8 0xF461ADB8AE17E0A5, 0x0000C001 // E5 957data8 0xF5BE8B0B90325F20, 0x0000C000 // E3 958data8 0x877B275F3FB78DCA, 0x0000C000 // E1 959LOCAL_OBJECT_END(lgammal_half_3Q_data) 960 961LOCAL_OBJECT_START(lgammal_half_3Q_neg_data) 962// Polynomial coefficients for the lgammal(x), -0.75 < x <= -0.5 963data8 0xC014836EFD94899C, 0x3C9835679663B44F // A3, A0L 964data8 0xBFF276C7B4FB1875, 0xBC92D3D9FA29A1C0 // A1, A1L 965data8 0x40C5178F24E1A435, 0xC0D9DE84FBC5D76A // D0, D1 966data8 0x41D4D1B236BF6E93, 0xC1EBB0445CE58550 // C20, C21 967data8 0x4015718CD67F63D3, 0x3CC5354B6F04B59C // A2, A2L 968data8 0x3FF554493087E1ED, 0xBCB72715E37B02B9 // A0, A3L 969data8 0xE4AC7E915FA72229, 0x00004009 // E6 970data8 0xA28244206395FCC6, 0x00004007 // E4 971data8 0xFB045F19C07B2544, 0x00004004 // E2 972data8 0xE5C8A6E6A9BA7D7B, 0x00004002 // E0 973data8 0x4143943B55BF5118, 0xC158AC05EA675406 // D6, D7 974data8 0x4118F6833D19717C, 0xC12F51A6F375CC80 // D4, D5 975data8 0x40F00C209483481C, 0xC103F1DABF750259 // D2, D3 976data8 0x4191038F2D8F9E40, 0xC1A413066DA8AE4A // C18, C19 977data8 0x4170B537EDD833DE, 0xC1857E79424C61CE // C16, C17 978data8 0x8941D8AB4855DB73, 0x0000C00B // E7 979data8 0xBB822B131BD2E813, 0x0000C008 // E5 980data8 0x852B4C03B83D2D4F, 0x0000C006 // E3 981data8 0xC754CA7E2DDC0F1F, 0x0000C003 // E1 982LOCAL_OBJECT_END(lgammal_half_3Q_neg_data) 983 984LOCAL_OBJECT_START(lgammal_2Q_4_data) 985// Polynomial coefficients for the lgammal(x), 2.25 <= |x| < 4.0 986data8 0xBFCA4D55BEAB2D6F, 0x3C7ABC9DA14141F5 // A3, A0L 987data8 0x3FFD8773039049E7, 0x3C66CB7957A95BA4 // A1, A1L 988data8 0x3F45C3CC79E91E7D, 0xBF3A8E5005937E97 // D0, D1 989data8 0x3EC951E35E1C9203, 0xBEB030A90026C5DF // C20, C21 990data8 0x3FE94699894C1F4C, 0x3C91884D21D123F1 // A2, A2L 991data8 0x3FE62E42FEFA39EF, 0xBC66480CEB70870F // A0, A3L 992data8 0xF1C2EAFF0B3A7579, 0x00003FF5 // E6 993data8 0xB36AF863926B55A3, 0x00003FF7 // E4 994data8 0x9620656185BB44CA, 0x00003FF9 // E2 995data8 0xA264558FB0906AFF, 0x00003FFB // E0 996data8 0x3F03D59E9666C961, 0xBEF91115893D84A6 // D6, D7 997data8 0x3F19333611C46225, 0xBF0F89EB7D029870 // D4, D5 998data8 0x3F3055A96B347AFE, 0xBF243B5153E178A8 // D2, D3 999data8 0x3ED9A4AEF30C4BB2, 0xBED388138B1CEFF2 // C18, C19 1000data8 0x3EEF7945A3C3A254, 0xBEE36F32A938EF11 // C16, C17 1001data8 0x9028923F47C82118, 0x0000BFF5 // E7 1002data8 0xCE0DAAFB6DC93B22, 0x0000BFF6 // E5 1003data8 0xA0D0983B34AC4C8D, 0x0000BFF8 // E3 1004data8 0x94D6C50FEB8B0CE7, 0x0000BFFA // E1 1005LOCAL_OBJECT_END(lgammal_2Q_4_data) 1006 1007LOCAL_OBJECT_START(lgammal_4_8_data) 1008// Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 8.0 1009data8 0xBFD6626BC9B31B54, 0x3CAA53C82493A92B // A3, A0L 1010data8 0x401B4C420A50AD7C, 0x3C8C6E9929F789A3 // A1, A1L 1011data8 0x3F49410427E928C2, 0xBF3E312678F8C146 // D0, D1 1012data8 0x3ED51065F7CD5848, 0xBED052782A03312F // C20, C21 1013data8 0x3FF735973273D5EC, 0x3C831DFC65BF8CCF // A2, A2L 1014data8 0x401326643C4479C9, 0xBC6FA0498C5548A6 // A0, A3L 1015data8 0x9382D8B3CD4EB7E3, 0x00003FF6 // E6 1016data8 0xE9F92CAD8A85CBCD, 0x00003FF7 // E4 1017data8 0xD58389FE38258CEC, 0x00003FF9 // E2 1018data8 0x81310136363AE8AA, 0x00003FFC // E0 1019data8 0x3F04F0AE38E78570, 0xBEF9E2144BB8F03C // D6, D7 1020data8 0x3F1B5E992A6CBC2A, 0xBF10F3F400113911 // D4, D5 1021data8 0x3F323EE00AAB7DEE, 0xBF2640FDFA9FB637 // D2, D3 1022data8 0x3ED2143EBAFF067A, 0xBEBBDEB92D6FF35D // C18, C19 1023data8 0x3EF173A42B69AAA4, 0xBEE78B9951A2EAA5 // C16, C17 1024data8 0xAB3CCAC6344E52AA, 0x0000BFF5 // E7 1025data8 0x81ACCB8915B16508, 0x0000BFF7 // E5 1026data8 0xDA62C7221102C426, 0x0000BFF8 // E3 1027data8 0xDF1BD44C4083580A, 0x0000BFFA // E1 1028LOCAL_OBJECT_END(lgammal_4_8_data) 1029 1030LOCAL_OBJECT_START(lgammal_loc_min_data) 1031// Polynomial coefficients for the lgammal(x), 1.3125 <= x < 1.5625 1032data8 0xBB16C31AB5F1FB71, 0x00003FFF // xMin - point of local minimum 1033data8 0xBFC2E4278DC6BC23, 0xBC683DA8DDCA9650 // A3, A0L 1034data8 0x3BD4DB7D0CA61D5F, 0x386E719EDD01D801 // A1, A1L 1035data8 0x3F4CC72638E1D93F, 0xBF4228EC9953CCB9 // D0, D1 1036data8 0x3ED222F97A04613E,0xBED3DDD58095CB6C // C20, C21 1037data8 0x3FDEF72BC8EE38AB, 0x3C863AFF3FC48940 // A2, A2L 1038data8 0xBFBF19B9BCC38A41, 0xBC7425F1BFFC1442// A0, A3L 1039data8 0x941890032BEB34C3, 0x00003FF6 // E6 1040data8 0xC7E701591CE534BC, 0x00003FF7 // E4 1041data8 0x93373CBD05138DD4, 0x00003FF9 // E2 1042data8 0x845A14A6A81C05D6, 0x00003FFB // E0 1043data8 0x3F0F6C4DF6D47A13, 0xBF045DCDB5B49E19 // D6, D7 1044data8 0x3F22E23345DDE59C, 0xBF1851159AFB1735 // D4, D5 1045data8 0x3F37101EA4022B78, 0xBF2D721E6323AF13 // D2, D3 1046data8 0x3EE691EBE82DF09D, 0xBEDD42550961F730 // C18, C19 1047data8 0x3EFA793EDE99AD85, 0xBEF14000108E70BE // C16, C17 1048data8 0xB7CBC033ACE0C99C, 0x0000BFF5 // E7 1049data8 0xF178D1F7B1A45E27, 0x0000BFF6 // E5 1050data8 0xA8FCFCA8106F471C, 0x0000BFF8 // E3 1051data8 0x864D46FA898A9AD2, 0x0000BFFA // E1 1052LOCAL_OBJECT_END(lgammal_loc_min_data) 1053 1054LOCAL_OBJECT_START(lgammal_03Q_1Q_data) 1055// Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.3125 1056data8 0x3FD151322AC7D848, 0x3C7184DE0DB7B4EE // A4, A2L 1057data8 0x3FD9A4D55BEAB2D6, 0x3C9E934AAB10845F // A3, A1L 1058data8 0x3FB111289C381259, 0x3FAFFFCFB32AE18D // D2, D3 1059data8 0x3FB3B1D9E0E3E00D, 0x3FB2496F0D3768DF // D0, D1 1060data8 0xBA461972C057D439, 0x00003FFB // E6 1061data8 0x3FEA51A6625307D3, 0x3C76ABC886A72DA2 // A2, A4L 1062data8 0x3FA8EFE46B32A70E, 0x3F8F31B3559576B6 // C17, C20 1063data8 0xE403383700387D85, 0x00003FFB // E4 1064data8 0x9381D0EE74BF7251, 0x00003FFC // E2 1065data8 0x3FAA2177A6D28177, 0x3FA4895E65FBD995 // C18, C19 1066data8 0x3FAAED2C77DBEE5D, 0x3FA94CA59385512C // D6, D7 1067data8 0x3FAE1F522E8A5941, 0x3FAC785EF56DD87E // D4, D5 1068data8 0x3FB556AD5FA56F0A, 0x3FA81F416E87C783 // E7, C16 1069data8 0xCD00F1C2DC2C9F1E, 0x00003FFB // E5 1070data8 0x3FE2788CFC6FB618, 0x3C8E52519B5B17CB // A1, A3L 1071data8 0x80859B57C3E7F241, 0x00003FFC // E3 1072data8 0xADA065880615F401, 0x00003FFC // E1 1073data8 0xD45CE0BD530AB50E, 0x00003FFC // E0 1074LOCAL_OBJECT_END(lgammal_03Q_1Q_data) 1075 1076LOCAL_OBJECT_START(lgammal_13Q_2Q_data) 1077// Polynomial coefficients for the lgammal(x), 1.5625 <= |x| < 2.25 1078data8 0x3F951322AC7D8483, 0x3C71873D88C6539D // A4, A2L 1079data8 0xBFB13E001A557606, 0x3C56CB907018A101 // A3, A1L 1080data8 0xBEC11B2EC1E7F6FC, 0x3EB0064ED9824CC7 // D2, D3 1081data8 0xBEE3CBC963EC103A, 0x3ED2597A330C107D // D0, D1 1082data8 0xBC6F2DEBDFE66F38, 0x0000BFF0 // E6 1083data8 0x3FD4A34CC4A60FA6, 0x3C3AFC9BF775E8A0 // A2, A4L 1084data8 0x3E48B0C542F85B32, 0xBE347F12EAF787AB // C17, C20 1085data8 0xE9FEA63B6984FA1E, 0x0000BFF2 // E4 1086data8 0x9C562E15FC703BBF, 0x0000BFF5 // E2 1087data8 0xBE3C12A50AB0355E, 0xBE1C941626AE4717 // C18, C19 1088data8 0xBE7AFA8714342BC4,0x3E69A12D2B7761CB // D6, D7 1089data8 0xBE9E25EF1D526730, 0x3E8C762291889B99 // D4, D5 1090data8 0x3EF580DCEE754733, 0xBE57C811D070549C // E7, C16 1091data8 0xD093D878BE209C98, 0x00003FF1 // E5 1092data8 0x3FDB0EE6072093CE, 0xBC6024B9E81281C4 // A1, A3L 1093data8 0x859B57C31CB77D96, 0x00003FF4 // E3 1094data8 0xBD6EB756DB617E8D, 0x00003FF6 // E1 1095data8 0xF2027E10C7AF8C38, 0x0000BFF7 // E0 1096LOCAL_OBJECT_END(lgammal_13Q_2Q_data) 1097 1098LOCAL_OBJECT_START(lgammal_8_10_data) 1099// Polynomial coefficients for the lgammal(x), 8.0 <= |x| < 10.0 1100// Multi Precision terms 1101data8 0x40312008A3A23E5C, 0x3CE020B4F2E4083A //A1 1102data8 0x4025358E82FCB70C, 0x3CD4A5A74AF7B99C //A0 1103// Native precision terms 1104data8 0xF0AA239FFBC616D2, 0x00004000 //A2 1105data8 0x96A8EA798FE57D66, 0x0000BFFF //A3 1106data8 0x8D501B7E3B9B9BDB, 0x00003FFE //A4 1107data8 0x9EE062401F4B1DC2, 0x0000BFFD //A5 1108data8 0xC63FD8CD31E93431, 0x00003FFC //A6 1109data8 0x8461101709C23C30, 0x0000BFFC //A7 1110data8 0xB96D7EA7EF3648B2, 0x00003FFB //A8 1111data8 0x86886759D2ACC906, 0x0000BFFB //A9 1112data8 0xC894B6E28265B183, 0x00003FFA //A10 1113data8 0x98C4348CAD821662, 0x0000BFFA //A11 1114data8 0xEC9B092226A94DF2, 0x00003FF9 //A12 1115data8 0xB9F169FF9B98CDDC, 0x0000BFF9 //A13 1116data8 0x9A3A32BB040894D3, 0x00003FF9 //A14 1117data8 0xF9504CCC1003B3C3, 0x0000BFF8 //A15 1118LOCAL_OBJECT_END(lgammal_8_10_data) 1119 1120LOCAL_OBJECT_START(lgammal_03Q_6_data) 1121// Polynomial coefficients for the lgammal(x), 0.75 <= |x| < 1.0 1122data8 0xBFBC47DCA479E295, 0xBC607E6C1A379D55 //A3 1123data8 0x3FCA051C372609ED, 0x3C7B02D73EB7D831 //A0 1124data8 0xBFE15FAFA86B04DB, 0xBC3F52EE4A8945B5 //A1 1125data8 0x3FD455C4FF28F0BF, 0x3C75F8C6C99F30BB //A2 1126data8 0xD2CF04CD934F03E1, 0x00003FFA //A4 1127data8 0xDB4ED667E29256E1, 0x0000BFF9 //A5 1128data8 0xF155A33A5B6021BF, 0x00003FF8 //A6 1129data8 0x895E9B9D386E0338, 0x0000BFF8 //A7 1130data8 0xA001BE94B937112E, 0x00003FF7 //A8 1131data8 0xBD82846E490ED048, 0x0000BFF6 //A9 1132data8 0xE358D24EC30DBB5D, 0x00003FF5 //A10 1133data8 0x89C4F3652446B78B, 0x0000BFF5 //A11 1134data8 0xA86043E10280193D, 0x00003FF4 //A12 1135data8 0xCF3A2FBA61EB7682, 0x0000BFF3 //A13 1136data8 0x3F300900CC9200EC //A14 1137data8 0xBF23F42264B94AE8 //A15 1138data8 0x3F18EEF29895FE73 //A16 1139data8 0xBF0F3C4563E3EDFB //A17 1140data8 0x3F0387DBBC385056 //A18 1141data8 0xBEF81B4004F92900 //A19 1142data8 0x3EECA6692A9A5B81 //A20 1143data8 0xBEDF61A0059C15D3 //A21 1144data8 0x3ECDA9F40DCA0111 //A22 1145data8 0xBEB60FE788217BAF //A23 1146data8 0x3E9661D795DFC8C6 //A24 1147data8 0xBE66C7756A4EDEE5 //A25 1148// Polynomial coefficients for the lgammal(x), 1.0 <= |x| < 2.0 1149data8 0xBFC1AE55B180726B, 0xBC7DE1BC478453F5 //A3 1150data8 0xBFBEEB95B094C191, 0xBC53456FF6F1C9D9 //A0 1151data8 0x3FA2AED059BD608A, 0x3C0B65CC647D557F //A1 1152data8 0x3FDDE9E64DF22EF2, 0x3C8993939A8BA8E4 //A2 1153data8 0xF07C206D6B100CFF, 0x00003FFA //A4 1154data8 0xED2CEA9BA52FE7FB, 0x0000BFF9 //A5 1155data8 0xFCE51CED52DF3602, 0x00003FF8 //A6 1156data8 0x8D45D27872326619, 0x0000BFF8 //A7 1157data8 0xA2B78D6BCEBE27F7, 0x00003FF7 //A8 1158data8 0xBF6DC0996A895B6F, 0x0000BFF6 //A9 1159data8 0xE4B9AD335AF82D79, 0x00003FF5 //A10 1160data8 0x8A451880195362A1, 0x0000BFF5 //A11 1161data8 0xA8BE35E63089A7A9, 0x00003FF4 //A12 1162data8 0xCF7FA175FA11C40C, 0x0000BFF3 //A13 1163data8 0x3F300C282FAA3B02 //A14 1164data8 0xBF23F6AEBDA68B80 //A15 1165data8 0x3F18F6860E2224DD //A16 1166data8 0xBF0F542B3CE32F28 //A17 1167data8 0x3F039436218C9BF8 //A18 1168data8 0xBEF8AE6307677AEC //A19 1169data8 0x3EF0B55527B3A211 //A20 1170data8 0xBEE576AC995E7605 //A21 1171data8 0x3ED102DDC1365D2D //A22 1172data8 0xBEC442184F97EA54 //A23 1173data8 0x3ED4D2283DFE5FC6 //A24 1174data8 0xBECB9219A9B46787 //A25 1175// Polynomial coefficients for the lgammal(x), 2.0 <= |x| < 3.0 1176data8 0xBFCA4D55BEAB2D6F, 0xBC66F80E5BFD5AF5 //A3 1177data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B347E3D //A0 1178data8 0x3FFD8773039049E7, 0x3C66CB9007C426EA //A1 1179data8 0x3FE94699894C1F4C, 0x3C918726EB111663 //A2 1180data8 0xA264558FB0906209, 0x00003FFB //A4 1181data8 0x94D6C50FEB902ADC, 0x0000BFFA //A5 1182data8 0x9620656184243D17, 0x00003FF9 //A6 1183data8 0xA0D0983B8BCA910B, 0x0000BFF8 //A7 1184data8 0xB36AF8559B222BD3, 0x00003FF7 //A8 1185data8 0xCE0DACB3260AE6E5, 0x0000BFF6 //A9 1186data8 0xF1C2C0BF0437C7DB, 0x00003FF5 //A10 1187data8 0x902A2F2F3AB74A92, 0x0000BFF5 //A11 1188data8 0xAE05009B1B2C6E4C, 0x00003FF4 //A12 1189data8 0xD5B71F6456D7D4CB, 0x0000BFF3 //A13 1190data8 0x3F2F0351D71BC9C6 //A14 1191data8 0xBF2B53BC56A3B793 //A15 1192data8 0xBF18B12DC6F6B861 //A16 1193data8 0xBF43EE6EB5215C2F //A17 1194data8 0xBF5474787CDD455E //A18 1195data8 0xBF642B503C9C060A //A19 1196data8 0xBF6E07D1AA254AA3 //A20 1197data8 0xBF71C785443AAEE8 //A21 1198data8 0xBF6F67BF81B71052 //A22 1199data8 0xBF63E4BCCF4FFABF //A23 1200data8 0xBF50067F8C671D5A //A24 1201data8 0xBF29C770D680A5AC //A25 1202// Polynomial coefficients for the lgammal(x), 4.0 <= |x| < 6.0 1203data8 0xBFD6626BC9B31B54, 0xBC85AABE08680902 //A3 1204data8 0x401326643C4479C9, 0x3CAA53C26F31E364 //A0 1205data8 0x401B4C420A50AD7C, 0x3C8C76D55E57DD8D //A1 1206data8 0x3FF735973273D5EC, 0x3C83A0B78E09188A //A2 1207data8 0x81310136363AAB6D, 0x00003FFC //A4 1208data8 0xDF1BD44C4075C0E6, 0x0000BFFA //A5 1209data8 0xD58389FE38D8D664, 0x00003FF9 //A6 1210data8 0xDA62C7221D5B5F87, 0x0000BFF8 //A7 1211data8 0xE9F92CAD0263E157, 0x00003FF7 //A8 1212data8 0x81ACCB8606C165FE, 0x0000BFF7 //A9 1213data8 0x9382D8D263D1C2A3, 0x00003FF6 //A10 1214data8 0xAB3CCBA4C853B12C, 0x0000BFF5 //A11 1215data8 0xCA0818BBCCC59296, 0x00003FF4 //A12 1216data8 0xF18912691CBB5BD0, 0x0000BFF3 //A13 1217data8 0x3F323EF5D8330339 //A14 1218data8 0xBF2641132EA571F7 //A15 1219data8 0x3F1B5D9576175CA9 //A16 1220data8 0xBF10F56A689C623D //A17 1221data8 0x3F04CACA9141A18D //A18 1222data8 0xBEFA307AC9B4E85D //A19 1223data8 0x3EF4B625939FBE32 //A20 1224data8 0xBECEE6AC1420F86F //A21 1225data8 0xBE9A95AE2E485964 //A22 1226data8 0xBF039EF47F8C09BB //A23 1227data8 0xBF05345957F7B7A9 //A24 1228data8 0xBEF85AE6385D4CCC //A25 1229// Polynomial coefficients for the lgammal(x), 3.0 <= |x| < 4.0 1230data8 0xBFCA4D55BEAB2D6F, 0xBC667B20FF46C6A8 //A3 1231data8 0x3FE62E42FEFA39EF, 0x3C7ABC9E3B398012 //A0 1232data8 0x3FFD8773039049E7, 0x3C66CB9070238D77 //A1 1233data8 0x3FE94699894C1F4C, 0x3C91873D8839B1CD //A2 1234data8 0xA264558FB0906D7E, 0x00003FFB //A4 1235data8 0x94D6C50FEB8AFD72, 0x0000BFFA //A5 1236data8 0x9620656185B68F14, 0x00003FF9 //A6 1237data8 0xA0D0983B34B7088A, 0x0000BFF8 //A7 1238data8 0xB36AF863964AA440, 0x00003FF7 //A8 1239data8 0xCE0DAAFB5497AFB8, 0x0000BFF6 //A9 1240data8 0xF1C2EAFA79CC2864, 0x00003FF5 //A10 1241data8 0x9028922A839572B8, 0x0000BFF5 //A11 1242data8 0xAE1E62F870BA0278, 0x00003FF4 //A12 1243data8 0xD4726F681E2ABA29, 0x0000BFF3 //A13 1244data8 0x3F30559B9A02FADF //A14 1245data8 0xBF243ADEB1266CAE //A15 1246data8 0x3F19303B6F552603 //A16 1247data8 0xBF0F768C288EC643 //A17 1248data8 0x3F039D5356C21DE1 //A18 1249data8 0xBEF81BCA8168E6BE //A19 1250data8 0x3EEC74A53A06AD54 //A20 1251data8 0xBEDED52D1A5DACDF //A21 1252data8 0x3ECCB4C2C7087342 //A22 1253data8 0xBEB4F1FAFDFF5C2F //A23 1254data8 0x3E94C80B52D58904 //A24 1255data8 0xBE64A328CBE92A27 //A25 1256LOCAL_OBJECT_END(lgammal_03Q_6_data) 1257 1258LOCAL_OBJECT_START(lgammal_1pEps_data) 1259// Polynomial coefficients for the lgammal(x), 1 - 2^(-7) <= |x| < 1 + 2^(-7) 1260data8 0x93C467E37DB0C7A5, 0x00003FFE //A1 1261data8 0xD28D3312983E9919, 0x00003FFE //A2 1262data8 0xCD26AADF559A47E3, 0x00003FFD //A3 1263data8 0x8A8991563EC22E81, 0x00003FFD //A4 1264data8 0x3FCA8B9C168D52FE //A5 1265data8 0x3FC5B40CB0696370 //A6 1266data8 0x3FC270AC2229A65D //A7 1267data8 0x3FC0110AF10FCBFC //A8 1268// Polynomial coefficients for the log1p(x), - 2^(-7) <= |x| < 2^(-7) 1269data8 0x3FBC71C71C71C71C //P8 1270data8 0xBFC0000000000000 //P7 1271data8 0x3FC2492492492492 //P6 1272data8 0xBFC5555555555555 //P5 1273data8 0x3FC999999999999A //P4 1274data8 0xBFD0000000000000 //P3 1275data8 0x3FD5555555555555 //P2 1276data8 0xBFE0000000000000 //P1 1277// short version of "lnsin" polynomial 1278data8 0xD28D3312983E9918, 0x00003FFF //A2 1279data8 0x8A8991563EC241B6, 0x00003FFE //A4 1280data8 0xADA06588061830A5, 0x00003FFD //A6 1281data8 0x80859B57C31CB746, 0x00003FFD //A8 1282LOCAL_OBJECT_END(lgammal_1pEps_data) 1283 1284LOCAL_OBJECT_START(lgammal_neg2andHalf_data) 1285// Polynomial coefficients for the lgammal(x), -2.005859375 <= x < -2.5 1286data8 0xBF927781D4BB093A, 0xBC511D86D85B7045 // A3, A0L 1287data8 0x3FF1A68793DEFC15, 0x3C9852AE2DA7DEEF // A1, A1L 1288data8 0x408555562D45FAFD, 0xBF972CDAFE5FEFAD // D0, D1 1289data8 0xC18682331EF492A5, 0xC1845E3E0D29606B // C20, C21 1290data8 0x4013141822E16979, 0x3CCF8718B6E75F6C // A2, A2L 1291data8 0xBFACCBF9F5ED0F15, 0xBBDD1AEB73297401 // A0, A3L 1292data8 0xCCCDB17423046445, 0x00004006 // E6 1293data8 0x800514E230A3A452, 0x00004005 // E4 1294data8 0xAAE9A48EC162E76F, 0x00004003 // E2 1295data8 0x81D4F88B3F3EA0FC, 0x00004002 // E0 1296data8 0x40CF3F3E35238DA0, 0xC0F8B340945F1A7E // D6, D7 1297data8 0x40BF89EC0BD609C6, 0xC095897242AEFEE2 // D4, D5 1298data8 0x40A2482FF01DBC5C, 0xC02095E275FDCF62 // D2, D3 1299data8 0xC1641354F2312A6A, 0xC17B3657F85258E9 // C18, C19 1300data8 0xC11F964E9ECBE2C9, 0xC146D7A90F70696C // C16, C17 1301data8 0xE7AECDE6AF8EA816, 0x0000BFEF // E7 1302data8 0xD711252FEBBE1091, 0x0000BFEB // E5 1303data8 0xE648BD10F8C43391, 0x0000BFEF // E3 1304data8 0x948A1E78AA00A98D, 0x0000BFF4 // E1 1305LOCAL_OBJECT_END(lgammal_neg2andHalf_data) 1306 1307LOCAL_OBJECT_START(lgammal_near_neg_half_data) 1308// Polynomial coefficients for the lgammal(x), -0.5 < x < -0.40625 1309data8 0xBFC1AE55B180726C, 0x3C8053CD734E6A1D // A3, A0L 1310data8 0x3FA2AED059BD608A, 0x3C0CD3D2CDBA17F4 // A1, A1L 1311data8 0x40855554DBCD1E1E, 0x3F96C51AC2BEE9E1 // D0, D1 1312data8 0xC18682331EF4927D, 0x41845E3E0D295DFC // C20, C21 1313data8 0x4011DE9E64DF22EF, 0x3CA692B70DAD6B7B // A2, A2L 1314data8 0x3FF43F89A3F0EDD6, 0xBC4955AED0FA087D // A0, A3L 1315data8 0xCCCD3F1DF4A2C1DD, 0x00004006 // E6 1316data8 0x80028ADE33C7FCD9, 0x00004005 // E4 1317data8 0xAACA474E485507EF, 0x00004003 // E2 1318data8 0x80F07C206D6B0ECD, 0x00004002 // E0 1319data8 0x40CF3F3E33E83056, 0x40F8B340944633D9 // D6, D7 1320data8 0x40BF89EC059931F0, 0x409589723307AD20 // D4, D5 1321data8 0x40A2482FD0054824, 0x402095CE7F19D011 // D2, D3 1322data8 0xC1641354F2313614, 0x417B3657F8525354 // C18, C19 1323data8 0xC11F964E9ECFD21C, 0x4146D7A90F701836 // C16, C17 1324data8 0x86A9C01F0EA11E5A, 0x0000BFF5 // E7 1325data8 0xBF6D8469142881C0, 0x0000BFF6 // E5 1326data8 0x8D45D277BA8255F1, 0x0000BFF8 // E3 1327data8 0xED2CEA9BA528BCC3, 0x0000BFF9 // E1 1328LOCAL_OBJECT_END(lgammal_near_neg_half_data) 1329 1330//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1331////////////// POLYNOMIAL COEFFICIENTS FOR "NEAR ROOTS" RANGES ///////////// 1332////////////// THIS PART OF TABLE SHOULD BE ADDRESSED REALLY RARE ///////////// 1333//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1334LOCAL_OBJECT_START(lgammal_right_roots_polynomial_data) 1335// Polynomial coefficients for right root on [-3, -2] 1336// Lgammal is approximated by polynomial within [-.056244 ; .158208 ] range 1337data8 0xBBBD5E9DCD11030B, 0xB867411D9FF87DD4 //A0 1338data8 0x3FF83FE966AF535E, 0x3CAA21235B8A769A //A1 1339data8 0x40136EEBB002F55C, 0x3CC3959A6029838E //A2 1340data8 0xB4A5302C53C2BEDD, 0x00003FFF //A3 1341data8 0x8B8C6BE504F2DA1C, 0x00004002 //A4 1342data8 0xB99CFF02593B4D98, 0x00004001 //A5 1343data8 0x4038D32F682AA1CF //A6 1344data8 0x403809F04EE6C5B5 //A7 1345data8 0x40548EAA81634CEE //A8 1346data8 0x4059297ADB6BC03D //A9 1347data8 0x407286FB8EC5C9DA //A10 1348data8 0x407A92E05B744CFB //A11 1349data8 0x4091A9D4144258CD //A12 1350data8 0x409C4D01D24F367E //A13 1351data8 0x40B1871B9A426A83 //A14 1352data8 0x40BE51C48BD9A583 //A15 1353data8 0x40D2140D0C6153E7 //A16 1354data8 0x40E0FB2C989CE4A3 //A17 1355data8 0x40E52739AB005641 //A18 1356data8 0x41161E3E6DDF503A //A19 1357// Polynomial coefficients for right root on [-4, -3] 1358// Lgammal is approximated by polynomial within [-.172797 ; .171573 ] range 1359data8 0x3C172712B248E42E, 0x38CB8D17801A5D67 //A0 1360data8 0x401F20A65F2FAC54, 0x3CCB9EA1817A824E //A1 1361data8 0x4039D4D2977150EF, 0x3CDA42E149B6276A //A2 1362data8 0xE089B8926AE2D9CB, 0x00004005 //A3 1363data8 0x933901EBBB586C37, 0x00004008 //A4 1364data8 0xCCD319BED1CFA1CD, 0x0000400A //A5 1365data8 0x40D293C3F78D3C37 //A6 1366data8 0x40FBB97AA0B6DD02 //A7 1367data8 0x41251EA3345E5EB9 //A8 1368data8 0x415057F65C92E7B0 //A9 1369data8 0x41799C865241B505 //A10 1370data8 0x41A445209EFE896B //A11 1371data8 0x41D02D21880C953B //A12 1372data8 0x41F9FFDE8C63E16D //A13 1373data8 0x422504DC8302D2BE //A14 1374data8 0x425111BF18C95414 //A15 1375data8 0x427BCBE74A2B8EF7 //A16 1376data8 0x42A7256F59B286F7 //A17 1377data8 0x42D462D1586DE61F //A18 1378data8 0x42FBB1228D6C5118 //A19 1379// Polynomial coefficients for right root on [-5, -4] 1380// Lgammal is approximated by polynomial within [-.163171 ; .161988 ] range 1381data8 0x3C5840FBAFDEE5BB, 0x38CAC0336E8C490A //A0 1382data8 0x403ACA5CF4921642, 0x3CCEDCDDA5491E56 //A1 1383data8 0x40744415CD813F8E, 0x3CFBFEBC17E39146 //A2 1384data8 0xAACD88D954E3E1BD, 0x0000400B //A3 1385data8 0xCB68C710D75ED802, 0x0000400F //A4 1386data8 0x8130F5AB997277AC, 0x00004014 //A5 1387data8 0x41855E3DBF99EBA7 //A6 1388data8 0x41CD14FE49C49FC2 //A7 1389data8 0x421433DCE281F07D //A8 1390data8 0x425C8399C7A92B6F //A9 1391data8 0x42A45FBE67840F1A //A10 1392data8 0x42ED68D75F9E6C98 //A11 1393data8 0x433567291C27E5BE //A12 1394data8 0x437F5ED7A9D9FD28 //A13 1395data8 0x43C720A65C8AB711 //A14 1396data8 0x441120A6C1D40B9B //A15 1397data8 0x44596F561F2D1CBE //A16 1398data8 0x44A3507DA81D5C01 //A17 1399data8 0x44EF06A31E39EEDF //A18 1400data8 0x45333774C99F523F //A19 1401// Polynomial coefficients for right root on [-6, -5] 1402// Lgammal is approximated by polynomial within [-.156450 ; .156126 ] range 1403data8 0x3C71B82D6B2B3304, 0x3917186E3C0DC231 //A0 1404data8 0x405ED72E0829AE02, 0x3C960C25157980EB //A1 1405data8 0x40BCECC32EC22F9B, 0x3D5D8335A32F019C //A2 1406data8 0x929EC2B1FB931F17, 0x00004012 //A3 1407data8 0xD112EF96D37316DE, 0x00004018 //A4 1408data8 0x9F00BB9BB13416AB, 0x0000401F //A5 1409data8 0x425F7D8D5BDCB223 //A6 1410data8 0x42C9A8D00C776CC6 //A7 1411data8 0x433557FD8C481424 //A8 1412data8 0x43A209221A953EF0 //A9 1413data8 0x440EDC98D5618AB7 //A10 1414data8 0x447AABD25E367378 //A11 1415data8 0x44E73DE20CC3B288 //A12 1416data8 0x455465257B4E0BD8 //A13 1417data8 0x45C2011532085353 //A14 1418data8 0x462FEE4CC191945B //A15 1419data8 0x469C63AEEFEF0A7F //A16 1420data8 0x4709D045390A3810 //A17 1421data8 0x4778D360873C9F64 //A18 1422data8 0x47E26965BE9A682A //A19 1423// Polynomial coefficients for right root on [-7, -6] 1424// Lgammal is approximated by polynomial within [-.154582 ; .154521 ] range 1425data8 0x3C75F103A1B00A48, 0x391C041C190C726D //A0 1426data8 0x40869DE49E3AF2AA, 0x3D1C17E1F813063B //A1 1427data8 0x410FCE23484CFD10, 0x3DB6F38C2F11DAB9 //A2 1428data8 0xEF281D1E1BE2055A, 0x00004019 //A3 1429data8 0xFCE3DA92AC55DFF8, 0x00004022 //A4 1430data8 0x8E9EA838A20BD58E, 0x0000402C //A5 1431data8 0x4354F21E2FB9E0C9 //A6 1432data8 0x43E9500994CD4F09 //A7 1433data8 0x447F3A2C23C033DF //A8 1434data8 0x45139152656606D8 //A9 1435data8 0x45A8D45F8D3BF2E8 //A10 1436data8 0x463FD32110E5BFE5 //A11 1437data8 0x46D490B3BDBAE0BE //A12 1438data8 0x476AC3CAD905DD23 //A13 1439data8 0x48018558217AD473 //A14 1440data8 0x48970AF371D30585 //A15 1441data8 0x492E6273A8BEFFE3 //A16 1442data8 0x49C47CC9AE3F1073 //A17 1443data8 0x4A5D38E8C35EFF45 //A18 1444data8 0x4AF0123E89694CD8 //A19 1445// Polynomial coefficients for right root on [-8, -7] 1446// Lgammal is approximated by polynomial within [-.154217 ; .154208 ] range 1447data8 0xBCD2507D818DDD68, 0xB97F6940EA2871A0 //A0 1448data8 0x40B3B407AA387BCB, 0x3D6320238F2C43D1 //A1 1449data8 0x41683E85DAAFBAC7, 0x3E148D085958EA3A //A2 1450data8 0x9F2A95AF1E10A548, 0x00004022 //A3 1451data8 0x92F21522F482300E, 0x0000402E //A4 1452data8 0x90B51AB03A1F244D, 0x0000403A //A5 1453data8 0x44628E1C70EF534F //A6 1454data8 0x452393E2BC32D244 //A7 1455data8 0x45E5164141F4BA0B //A8 1456data8 0x46A712B3A8AF5808 //A9 1457data8 0x47698FD36CEDD0F2 //A10 1458data8 0x482C9AE6BBAA3637 //A11 1459data8 0x48F023821857C8E9 //A12 1460data8 0x49B2569053FC106F //A13 1461data8 0x4A74F646D5C1604B //A14 1462data8 0x4B3811CF5ABA4934 //A15 1463data8 0x4BFBB5DD6C84E233 //A16 1464data8 0x4CC05021086F637B //A17 1465data8 0x4D8450A345B0FB49 //A18 1466data8 0x4E43825848865DB2 //A19 1467// Polynomial coefficients for right root on [-9, -8] 1468// Lgammal is approximated by polynomial within [-.154160 ; .154158 ] range 1469data8 0x3CDF4358564F2B46, 0x397969BEE6042F81 //A0 1470data8 0x40E3B088FED67721, 0x3D82787BA937EE85 //A1 1471data8 0x41C83A3893550EF4, 0x3E542ED57E244DA8 //A2 1472data8 0x9F003C6DC56E0B8E, 0x0000402B //A3 1473data8 0x92BDF64A3213A699, 0x0000403A //A4 1474data8 0x9074F503AAD417AF, 0x00004049 //A5 1475data8 0x4582843E1313C8CD //A6 1476data8 0x467387BD6A7826C1 //A7 1477data8 0x4765074E788CF440 //A8 1478data8 0x4857004DD9D1E09D //A9 1479data8 0x4949792ED7530EAF //A10 1480data8 0x4A3C7F089A292ED3 //A11 1481data8 0x4B30125BF0AABB86 //A12 1482data8 0x4C224175195E307E //A13 1483data8 0x4D14DC4C8B32C08D //A14 1484data8 0x4E07F1DB2786197E //A15 1485data8 0x4EFB8EA1C336DACB //A16 1486data8 0x4FF03797EACD0F23 //A17 1487data8 0x50E4304A8E68A730 //A18 1488data8 0x51D3618FB2EC9F93 //A19 1489// Polynomial coefficients for right root on [-10, -9] 1490// Lgammal is approximated by polynomial within [-.154152 ; .154152 ] range 1491data8 0x3D42F34DA97ECF0C, 0x39FD1256F345B0D0 //A0 1492data8 0x4116261203919787, 0x3DC12D44055588EB //A1 1493data8 0x422EA8F32FB7FE99, 0x3ED849CE4E7B2D77 //A2 1494data8 0xE25BAF73477A57B5, 0x00004034 //A3 1495data8 0xEB021FD10060504A, 0x00004046 //A4 1496data8 0x8220A208EE206C5F, 0x00004059 //A5 1497data8 0x46B2C3903EC9DA14 //A6 1498data8 0x47D64393744B9C67 //A7 1499data8 0x48FAF79CCDC604DD //A8 1500data8 0x4A20975DB8061EBA //A9 1501data8 0x4B44AB9CBB38DB21 //A10 1502data8 0x4C6A032F60094FE9 //A11 1503data8 0x4D908103927634B4 //A12 1504data8 0x4EB516CA21D30861 //A13 1505data8 0x4FDB1BF12C58D318 //A14 1506data8 0x510180AAE094A553 //A15 1507data8 0x5226A8F2A2D45D57 //A16 1508data8 0x534E00B6B0C8B809 //A17 1509data8 0x5475022FE21215B2 //A18 1510data8 0x5596B02BF6C5E19B //A19 1511// Polynomial coefficients for right root on [-11, -10] 1512// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1513data8 0x3D7AA9C2E2B1029C, 0x3A15FB37578544DB //A0 1514data8 0x414BAF825A0C91D4, 0x3DFB9DA2CE398747 //A1 1515data8 0x4297F3EC8AE0AF03, 0x3F34208B55FB8781 //A2 1516data8 0xDD0C97D3197F56DE, 0x0000403E //A3 1517data8 0x8F6F3AF7A5499674, 0x00004054 //A4 1518data8 0xC68DA1AF6D878EEB, 0x00004069 //A5 1519data8 0x47F1E4E1E2197CE0 //A6 1520data8 0x494A8A28E597C3EB //A7 1521data8 0x4AA4175D0D35D705 //A8 1522data8 0x4BFEE6F0AF69E814 //A9 1523data8 0x4D580FE7B3DBB3C6 //A10 1524data8 0x4EB2ECE60E4608AF //A11 1525data8 0x500E04BE3E2B4F24 //A12 1526data8 0x5167F9450F0FB8FD //A13 1527data8 0x52C342BDE747603F //A14 1528data8 0x541F1699D557268C //A15 1529data8 0x557927C5F079864E //A16 1530data8 0x56D4D10FEEDB030C //A17 1531data8 0x5832385DF86AD28A //A18 1532data8 0x598898914B4D6523 //A19 1533// Polynomial coefficients for right root on [-12, -11] 1534// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1535data8 0xBD96F61647C58B03, 0xBA3ABB0C2A6C755B //A0 1536data8 0x418308A82714B70D, 0x3E1088FC6A104C39 //A1 1537data8 0x4306A493DD613C39, 0x3FB2341ECBF85741 //A2 1538data8 0x8FA8FE98339474AB, 0x00004049 //A3 1539data8 0x802CCDF570BA7942, 0x00004062 //A4 1540data8 0xF3F748AF11A32890, 0x0000407A //A5 1541data8 0x493E3B567EF178CF //A6 1542data8 0x4ACED38F651BA362 //A7 1543data8 0x4C600B357337F946 //A8 1544data8 0x4DF0F71A52B54CCF //A9 1545data8 0x4F8229F3B9FA2C70 //A10 1546data8 0x5113A4C4979B770E //A11 1547data8 0x52A56BC367F298D5 //A12 1548data8 0x543785CF31842DC0 //A13 1549data8 0x55C9FC37E3E40896 //A14 1550data8 0x575CD5D1BA556C82 //A15 1551data8 0x58F00A7AD99A9E08 //A16 1552data8 0x5A824088688B008D //A17 1553data8 0x5C15F75EF7E08EBD //A18 1554data8 0x5DA462EA902F0C90 //A19 1555// Polynomial coefficients for right root on [-13, -12] 1556// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1557data8 0x3DC3191752ACFC9D, 0x3A26CB6629532DBF //A0 1558data8 0x41BC8CFC051191BD, 0x3E68A84DA4E62AF2 //A1 1559data8 0x43797926294A0148, 0x400F345FF3723CFF //A2 1560data8 0xF26D2AF700B82625, 0x00004053 //A3 1561data8 0xA238B24A4B1F7B15, 0x00004070 //A4 1562data8 0xE793B5C0A41A264F, 0x0000408C //A5 1563data8 0x4A9585BDDACE863D //A6 1564data8 0x4C6075953448088A //A7 1565data8 0x4E29B2F38D1FC670 //A8 1566data8 0x4FF4619B079C440F //A9 1567data8 0x51C05DAE118D8AD9 //A10 1568data8 0x538A8C7F87326AD4 //A11 1569data8 0x5555B6937588DAB3 //A12 1570data8 0x5721E1F8B6E6A7DB //A13 1571data8 0x58EDA1D7A77DD6E5 //A14 1572data8 0x5AB8A9616B7DC9ED //A15 1573data8 0x5C84942AA209ED17 //A16 1574data8 0x5E518FC34C6F54EF //A17 1575data8 0x601FB3F17BCCD9A0 //A18 1576data8 0x61E61128D512FE97 //A1 1577// Polynomial coefficients for right root on [-14, -13] 1578// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1579data8 0xBE170D646421B3F5, 0xBAAD95F79FCB5097 //A0 1580data8 0x41F7328CBFCD9AC7, 0x3E743B8B1E8AEDB1 //A1 1581data8 0x43F0D0FA2DBDA237, 0x40A0422D6A227B55 //A2 1582data8 0x82082DF2D32686CC, 0x0000405F //A3 1583data8 0x8D64EE9B42E68B43, 0x0000407F //A4 1584data8 0xA3FFD82E08C5F1F1, 0x0000409F //A5 1585data8 0x4BF8C49D99123454 //A6 1586data8 0x4DFEC79DDF11342F //A7 1587data8 0x50038615A892F6BD //A8 1588data8 0x520929453DB32EF1 //A9 1589data8 0x54106A7808189A7F //A10 1590data8 0x5615A302D03C207B //A11 1591data8 0x581CC175AA736F5E //A12 1592data8 0x5A233E071147C017 //A13 1593data8 0x5C29E81917243F22 //A14 1594data8 0x5E3184B0B5AC4707 //A15 1595data8 0x6037C11DE62D8388 //A16 1596data8 0x6240787C4B1C9D6C //A17 1597data8 0x6448289235E80977 //A18 1598data8 0x664B5352C6C3449E //A19 1599// Polynomial coefficients for right root on [-15, -14] 1600// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1601data8 0x3E562C2E34A9207D, 0x3ADC00DA3DFF7A83 //A0 1602data8 0x42344C3B2F0D90AB, 0x3EB8A2E979F24536 //A1 1603data8 0x4469BFFF28B50D07, 0x41181E3D05C1C294 //A2 1604data8 0xAE38F64DCB24D9F8, 0x0000406A //A3 1605data8 0xA5C3F52C1B350702, 0x0000408E //A4 1606data8 0xA83BC857BCD67A1B, 0x000040B2 //A5 1607data8 0x4D663B4727B4D80A //A6 1608data8 0x4FA82C965B0F7788 //A7 1609data8 0x51EAD58C02908D95 //A8 1610data8 0x542E427970E073D8 //A9 1611data8 0x56714644C558A818 //A10 1612data8 0x58B3EC2040C77BAE //A11 1613data8 0x5AF72AE6A83D45B1 //A12 1614data8 0x5D3B214F611F5D12 //A13 1615data8 0x5F7FF5E49C54E92A //A14 1616data8 0x61C2E917AB765FB2 //A15 1617data8 0x64066FD70907B4C1 //A16 1618data8 0x664B3998D60D0F9B //A17 1619data8 0x689178710782FA8B //A18 1620data8 0x6AD14A66C1C7BEC3 //A19 1621// Polynomial coefficients for right root on [-16, -15] 1622// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1623data8 0xBE6D7E7192615BAE, 0xBB0137677D7CC719 //A0 1624data8 0x4273077763F6628C, 0x3F09250FB8FC8EC9 //A1 1625data8 0x44E6A1BF095B1AB3, 0x4178D5A74F6CB3B3 //A2 1626data8 0x8F8E0D5060FCC76E, 0x00004076 //A3 1627data8 0x800CC1DCFF092A63, 0x0000409E //A4 1628data8 0xF3AB0BA9D14CDA72, 0x000040C5 //A5 1629data8 0x4EDE3000A2F6D54F //A6 1630data8 0x515EC613B9C8E241 //A7 1631data8 0x53E003309FEEEA96 //A8 1632data8 0x5660ED908D7C9A90 //A9 1633data8 0x58E21E9B517B1A50 //A10 1634data8 0x5B639745E4374EE2 //A11 1635data8 0x5DE55BB626B2075D //A12 1636data8 0x606772B7506BA747 //A13 1637data8 0x62E9E581AB2E057B //A14 1638data8 0x656CBAD1CF85D396 //A15 1639data8 0x67EFF4EBD7989872 //A16 1640data8 0x6A722D2B19B7E2F9 //A17 1641data8 0x6CF5DEB3073B0743 //A18 1642data8 0x6F744AC11550B93A //A19 1643// Polynomial coefficients for right root on [-17, -16] 1644// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1645data8 0xBEDCC6291188207E, 0xBB872E3FDD48F5B7 //A0 1646data8 0x42B3076EE7525EF9, 0x3F6687A5038CA81C //A1 1647data8 0x4566A1AAD96EBCB5, 0x421F0FEDFBF548D2 //A2 1648data8 0x8F8D4D3DE9850DBA, 0x00004082 //A3 1649data8 0x800BDD6DA2CE1859, 0x000040AE //A4 1650data8 0xF3A8EC4C9CDC1CE5, 0x000040D9 //A5 1651data8 0x505E2FAFDB812628 //A6 1652data8 0x531EC5B3A7508719 //A7 1653data8 0x55E002F77E99B628 //A8 1654data8 0x58A0ED4C9B4DAE54 //A9 1655data8 0x5B621E4A8240F90C //A10 1656data8 0x5E2396E5C8849814 //A11 1657data8 0x60E55B43D8C5CE71 //A12 1658data8 0x63A7722F5D45D01D //A13 1659data8 0x6669E4E010DCE45A //A14 1660data8 0x692CBA120D5E78F6 //A15 1661data8 0x6BEFF4045350B22E //A16 1662data8 0x6EB22C9807C21819 //A17 1663data8 0x7175DE20D04617C4 //A18 1664data8 0x74344AB87C6D655F //A19 1665// Polynomial coefficients for right root on [-18, -17] 1666// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1667data8 0xBF28AEEE7B61D77C, 0xBBDBBB5FC57ABF79 //A0 1668data8 0x42F436F56B3B8A0C, 0x3FA43EE3C5C576E9 //A1 1669data8 0x45E98A22535D115D, 0x42984678BE78CC48 //A2 1670data8 0xAC176F3775E6FCFC, 0x0000408E //A3 1671data8 0xA3114F53A9FEB922, 0x000040BE //A4 1672data8 0xA4D168A8334ABF41, 0x000040EE //A5 1673data8 0x51E5B0E7EC7182BB //A6 1674data8 0x54E77D67B876EAB6 //A7 1675data8 0x57E9F7C30C09C4B6 //A8 1676data8 0x5AED29B0488614CA //A9 1677data8 0x5DF09486F87E79F9 //A10 1678data8 0x60F30B199979654E //A11 1679data8 0x63F60E02C7DCCC5F //A12 1680data8 0x66F9B8A00EB01684 //A13 1681data8 0x69FE2D3ED0700044 //A14 1682data8 0x6D01C8363C7DCC84 //A15 1683data8 0x700502B29C2F06E3 //A16 1684data8 0x730962B4500F4A61 //A17 1685data8 0x76103C6ED099192A //A18 1686data8 0x79100C7132CFD6E3 //A19 1687// Polynomial coefficients for right root on [-19, -18] 1688// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1689data8 0x3F3C19A53328A0C3, 0x3BE04ADC3FBE1458 //A0 1690data8 0x4336C16C16C16C19, 0x3FE58CE3AC4A7C28 //A1 1691data8 0x46702E85C0898B70, 0x432C922E412CEC6E //A2 1692data8 0xF57B99A1C034335D, 0x0000409A //A3 1693data8 0x82EC9634223DF909, 0x000040CF //A4 1694data8 0x94F66D7557E2EA60, 0x00004103 //A5 1695data8 0x5376118B79AE34D0 //A6 1696data8 0x56BAE7106D52E548 //A7 1697data8 0x5A00BD48CC8E25AB //A8 1698data8 0x5D4529722821B493 //A9 1699data8 0x608B1654AF31BBC1 //A10 1700data8 0x63D182CC98AEA859 //A11 1701data8 0x6716D43D5EEB05E8 //A12 1702data8 0x6A5DF884FC172E1C //A13 1703data8 0x6DA3CA7EBB97976B //A14 1704data8 0x70EA416D0BE6D2EF //A15 1705data8 0x743176C31EBB65F2 //A16 1706data8 0x7777C401A8715CF9 //A17 1707data8 0x7AC1110C6D350440 //A18 1708data8 0x7E02D0971CF84865 //A19 1709// Polynomial coefficients for right root on [-20, -19] 1710// Lgammal is approximated by polynomial within [-.154151 ; .154151 ] range 1711data8 0xBFAB767F9BE21803, 0xBC5ACEF5BB1BD8B5 //A0 1712data8 0x4379999999999999, 0x4029241C7F5914C8 //A1 1713data8 0x46F47AE147AE147A, 0x43AC2979B64B9D7E //A2 1714data8 0xAEC33E1F67152993, 0x000040A7 //A3 1715data8 0xD1B71758E219616F, 0x000040DF //A4 1716data8 0x8637BD05AF6CF468, 0x00004118 //A5 1717data8 0x55065E9F80F293DE //A6 1718data8 0x588EADA78C44EE66 //A7 1719data8 0x5C15798EE22DEF09 //A8 1720data8 0x5F9E8ABFD644FA63 //A9 1721data8 0x6325FD7FE29BD7CD //A10 1722data8 0x66AFFC5C57E1F802 //A11 1723data8 0x6A3774CD7D5C0181 //A12 1724data8 0x6DC152724DE2A6FE //A13 1725data8 0x7149BB138EB3D0C2 //A14 1726data8 0x74D32FF8A70896C2 //A15 1727data8 0x785D3749F9C72BD7 //A16 1728data8 0x7BE5CCF65EBC4E40 //A17 1729data8 0x7F641A891B5FC652 //A18 1730data8 0x7FEFFFFFFFFFFFFF //A19 1731LOCAL_OBJECT_END(lgammal_right_roots_polynomial_data) 1732 1733LOCAL_OBJECT_START(lgammal_left_roots_polynomial_data) 1734// Polynomial coefficients for left root on [-3, -2] 1735// Lgammal is approximated by polynomial within [.084641 ; -.059553 ] range 1736data8 0xBC0844590979B82E, 0xB8BC7CE8CE2ECC3B //A0 1737data8 0xBFFEA12DA904B18C, 0xBC91A6B2BAD5EF6E //A1 1738data8 0x4023267F3C265A51, 0x3CD7055481D03AED //A2 1739data8 0xA0C2D618645F8E00, 0x0000C003 //A3 1740data8 0xFA8256664F8CD2BE, 0x00004004 //A4 1741data8 0xC2C422C103F57158, 0x0000C006 //A5 1742data8 0x4084373F7CC70AF5 //A6 1743data8 0xC0A12239BDD6BB95 //A7 1744data8 0x40BDBA65E2709397 //A8 1745data8 0xC0DA2D2504DFB085 //A9 1746data8 0x40F758173CA5BF3C //A10 1747data8 0xC11506C65C267E72 //A11 1748data8 0x413318EE3A6B05FC //A12 1749data8 0xC1517767F247DA98 //A13 1750data8 0x41701237B4754D73 //A14 1751data8 0xC18DB8A03BC5C3D8 //A15 1752data8 0x41AB80953AC14A07 //A16 1753data8 0xC1C9B7B76638D0A4 //A17 1754data8 0x41EA727E3033E2D9 //A18 1755data8 0xC20812C297729142 //A19 1756// 1757// Polynomial coefficients for left root on [-4, -3] 1758// Lgammal is approximated by polynomial within [.147147 ; -.145158 ] range 1759data8 0xBC3130AE5C4F54DB, 0xB8ED23294C13398A //A0 1760data8 0xC034B99D966C5646, 0xBCE2E5FE3BC3DBB9 //A1 1761data8 0x406F76DEAE0436BD, 0x3D14974DDEC057BD //A2 1762data8 0xE929ACEA5979BE96, 0x0000C00A //A3 1763data8 0xF47C14F8A0D52771, 0x0000400E //A4 1764data8 0x88B7BC036937481C, 0x0000C013 //A5 1765data8 0x4173E8F3AB9FC266 //A6 1766data8 0xC1B7DBBE062FB11B //A7 1767data8 0x41FD2F76DE7A47A7 //A8 1768data8 0xC242225FE53B124D //A9 1769data8 0x4286D12AE2FBFA30 //A10 1770data8 0xC2CCFFC267A3C4C0 //A11 1771data8 0x431294E10008E014 //A12 1772data8 0xC357FAC8C9A2DF6A //A13 1773data8 0x439F2190AB9FAE01 //A14 1774data8 0xC3E44C1D8E8C67C3 //A15 1775data8 0x442A8901105D5A38 //A16 1776data8 0xC471C4421E908C3A //A17 1777data8 0x44B92CD4D59D6D17 //A18 1778data8 0xC4FB3A078B5247FA //A19 1779// Polynomial coefficients for left root on [-5, -4] 1780// Lgammal is approximated by polynomial within [.155671 ; -.155300 ] range 1781data8 0xBC57BF3C6E8A94C1, 0xB902FB666934AC9E //A0 1782data8 0xC05D224A3EF9E41F, 0xBCF6F5713913E440 //A1 1783data8 0x40BB533C678A3955, 0x3D688E53E3C72538 //A2 1784data8 0x869FBFF732E99B84, 0x0000C012 //A3 1785data8 0xBA9537AD61392DEC, 0x00004018 //A4 1786data8 0x89EAE8B1DEA06B05, 0x0000C01F //A5 1787data8 0x425A8C5C53458D3C //A6 1788data8 0xC2C5068B3ED6509B //A7 1789data8 0x4330FFA575E99B4E //A8 1790data8 0xC39BEC12DDDF7669 //A9 1791data8 0x44073825725F74F9 //A10 1792data8 0xC47380EBCA299047 //A11 1793data8 0x44E084DD9B666437 //A12 1794data8 0xC54C2DA6BF787ACF //A13 1795data8 0x45B82D65C8D6FA42 //A14 1796data8 0xC624D62113FE950A //A15 1797data8 0x469200CC19B45016 //A16 1798data8 0xC6FFDDC6DD938E2E //A17 1799data8 0x476DD7C07184B9F9 //A18 1800data8 0xC7D554A30085C052 //A19 1801// Polynomial coefficients for left root on [-6, -5] 1802// Lgammal is approximated by polynomial within [.157425 ; -.157360 ] range 1803data8 0x3C9E20A87C8B79F1, 0x39488BE34B2427DB //A0 1804data8 0xC08661F6A43A5E12, 0xBD3D912526D759CC //A1 1805data8 0x410F79DCB794F270, 0x3DB9BEE7CD3C1BF5 //A2 1806data8 0xEB7404450D0005DB, 0x0000C019 //A3 1807data8 0xF7AE9846DFE4D4AB, 0x00004022 //A4 1808data8 0x8AF535855A95B6DA, 0x0000C02C //A5 1809data8 0x43544D54E9FE240E //A6 1810data8 0xC3E8684E40CE6CFC //A7 1811data8 0x447DF44C1D803454 //A8 1812data8 0xC512AC305439B2BA //A9 1813data8 0x45A79226AF79211A //A10 1814data8 0xC63E0DFF7244893A //A11 1815data8 0x46D35216C3A83AF3 //A12 1816data8 0xC76903BE0C390E28 //A13 1817data8 0x48004A4DECFA4FD5 //A14 1818data8 0xC8954FBD243DB8BE //A15 1819data8 0x492BF3A31EB18DDA //A16 1820data8 0xC9C2C6A864521F3A //A17 1821data8 0x4A5AB127C62E8DA1 //A18 1822data8 0xCAECF60EF3183C57 //A19 1823// Polynomial coefficients for left root on [-7, -6] 1824// Lgammal is approximated by polynomial within [.157749 ; -.157739 ] range 1825data8 0x3CC9B9E8B8D551D6, 0x3961813C8E1E10DB //A0 1826data8 0xC0B3ABF7A5CEA91F, 0xBD55638D4BCB4CC4 //A1 1827data8 0x4168349A25504236, 0x3E0287ECE50CCF76 //A2 1828data8 0x9EC8ED6E4C219E67, 0x0000C022 //A3 1829data8 0x9279EB1B799A3FF3, 0x0000402E //A4 1830data8 0x90213EF8D9A5DBCF, 0x0000C03A //A5 1831data8 0x4462775E857FB71C //A6 1832data8 0xC52377E70B45FDBF //A7 1833data8 0x45E4F3D28EDA8C28 //A8 1834data8 0xC6A6E85571BD2D0B //A9 1835data8 0x47695BB17E74DF74 //A10 1836data8 0xC82C5AC0ED6A662F //A11 1837data8 0x48EFF8159441C2E3 //A12 1838data8 0xC9B22602C1B68AE5 //A13 1839data8 0x4A74BA8CE7B34100 //A14 1840data8 0xCB37C7E208482E4B //A15 1841data8 0x4BFB5A1D57352265 //A16 1842data8 0xCCC01CB3021212FF //A17 1843data8 0x4D841613AC3431D1 //A18 1844data8 0xCE431C9E9EE43AD9 //A19 1845// Polynomial coefficients for left root on [-8, -7] 1846// Lgammal is approximated by polynomial within [.157799 ; -.157798 ] range 1847data8 0xBCF9C7A33AD9478C, 0xB995B0470F11E5ED //A0 1848data8 0xC0E3AF76FE4C2F8B, 0xBD8DBCD503250511 //A1 1849data8 0x41C838E76CAAF0D5, 0x3E5D79F5E2E069C3 //A2 1850data8 0x9EF345992B262CE0, 0x0000C02B //A3 1851data8 0x92AE0292985FD559, 0x0000403A //A4 1852data8 0x90615420C08F7D8C, 0x0000C049 //A5 1853data8 0x45828139342CEEB7 //A6 1854data8 0xC67384066C31E2D3 //A7 1855data8 0x476502BC4DAC2C35 //A8 1856data8 0xC856FAADFF22ADC6 //A9 1857data8 0x49497243255AB3CE //A10 1858data8 0xCA3C768489520F6B //A11 1859data8 0x4B300D1EA47AF838 //A12 1860data8 0xCC223B0508AC620E //A13 1861data8 0x4D14D46583338CD8 //A14 1862data8 0xCE07E7A87AA068E4 //A15 1863data8 0x4EFB811AD2F8BEAB //A16 1864data8 0xCFF0351B51508523 //A17 1865data8 0x50E4364CCBF53100 //A18 1866data8 0xD1D33CFD0BF96FA6 //A19 1867// Polynomial coefficients for left root on [-9, -8] 1868// Lgammal is approximated by polynomial within [.157806 ; -.157806 ] range 1869data8 0x3D333E4438B1B9D4, 0x39E7B956B83964C1 //A0 1870data8 0xC11625EDFC63DCD8, 0xBDCF39625709EFAC //A1 1871data8 0x422EA8C150480F16, 0x3EC16ED908AB7EDD //A2 1872data8 0xE2598725E2E11646, 0x0000C034 //A3 1873data8 0xEAFF2346DE3EBC98, 0x00004046 //A4 1874data8 0x821E90DE12A0F05F, 0x0000C059 //A5 1875data8 0x46B2C334AE5366FE //A6 1876data8 0xC7D64314B43191B6 //A7 1877data8 0x48FAF6ED5899E01B //A8 1878data8 0xCA2096E4472AF37D //A9 1879data8 0x4B44AAF49FB7E4C8 //A10 1880data8 0xCC6A02469F2BD920 //A11 1881data8 0x4D9080626D2EFC07 //A12 1882data8 0xCEB515EDCF0695F7 //A13 1883data8 0x4FDB1AC69BF36960 //A14 1884data8 0xD1017F8274339270 //A15 1885data8 0x5226A684961BAE2F //A16 1886data8 0xD34E085C088404A5 //A17 1887data8 0x547511892FF8960E //A18 1888data8 0xD5968FA3B1ED67A9 //A19 1889// Polynomial coefficients for left root on [-10, -9] 1890// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 1891data8 0xBD355818A2B42BA2, 0xB9B7320B6A0D61EA //A0 1892data8 0xC14BAF7DA5F3770E, 0xBDE64AF9A868F719 //A1 1893data8 0x4297F3E8791F9CD3, 0x3F2A553E59B4835E //A2 1894data8 0xDD0C5F7E551BD13C, 0x0000C03E //A3 1895data8 0x8F6F0A3B2EB08BBB, 0x00004054 //A4 1896data8 0xC68D4D5AD230BA08, 0x0000C069 //A5 1897data8 0x47F1E4D8C35D1A3E //A6 1898data8 0xC94A8A191DB0A466 //A7 1899data8 0x4AA4174F65FE6AE8 //A8 1900data8 0xCBFEE6D90F94E9DD //A9 1901data8 0x4D580FD3438BE16C //A10 1902data8 0xCEB2ECD456D50224 //A11 1903data8 0x500E049F7FE64546 //A12 1904data8 0xD167F92D9600F378 //A13 1905data8 0x52C342AE2B43261A //A14 1906data8 0xD41F15DEEDA4B67E //A15 1907data8 0x55792638748AFB7D //A16 1908data8 0xD6D4D760074F6E6B //A17 1909data8 0x5832469D58ED3FA9 //A18 1910data8 0xD988769F3DC76642 //A19 1911// Polynomial coefficients for left root on [-11, -10] 1912// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 1913data8 0xBDA050601F39778A, 0xBA0D4D1CE53E8241 //A0 1914data8 0xC18308A7D8EA4039, 0xBE370C379D3EAD41 //A1 1915data8 0x4306A49380644E6C, 0x3FBBB143C0E7B5C8 //A2 1916data8 0x8FA8FB233E4AA6D2, 0x0000C049 //A3 1917data8 0x802CC9D8AEAC207D, 0x00004062 //A4 1918data8 0xF3F73EE651A37A13, 0x0000C07A //A5 1919data8 0x493E3B550A7B9568 //A6 1920data8 0xCACED38DAA060929 //A7 1921data8 0x4C600B346BAB3BC6 //A8 1922data8 0xCDF0F719193E3D26 //A9 1923data8 0x4F8229F24528B151 //A10 1924data8 0xD113A4C2D32FBBE2 //A11 1925data8 0x52A56BC13DC4474D //A12 1926data8 0xD43785CFAF5E3CE3 //A13 1927data8 0x55C9FC3EA5941202 //A14 1928data8 0xD75CD545A3341AF5 //A15 1929data8 0x58F009911F77C282 //A16 1930data8 0xDA8246294D210BEC //A17 1931data8 0x5C1608AAC32C3A8E //A18 1932data8 0xDDA446E570A397D5 //A19 1933// Polynomial coefficients for left root on [-12, -11] 1934// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 1935data8 0x3DEACBB3081C502E, 0x3A8AA6F01DEDF745 //A0 1936data8 0xC1BC8CFBFB0A9912, 0xBE6556B6504A2AE6 //A1 1937data8 0x43797926206941D7, 0x40289A9644C2A216 //A2 1938data8 0xF26D2A78446D0839, 0x0000C053 //A3 1939data8 0xA238B1D937FFED38, 0x00004070 //A4 1940data8 0xE793B4F6DE470538, 0x0000C08C //A5 1941data8 0x4A9585BDC44DC45D //A6 1942data8 0xCC60759520342C47 //A7 1943data8 0x4E29B2F3694C0404 //A8 1944data8 0xCFF4619AE7B6BBAB //A9 1945data8 0x51C05DADF52B89E8 //A10 1946data8 0xD38A8C7F48819A4A //A11 1947data8 0x5555B6932D687860 //A12 1948data8 0xD721E1FACB6C1B5B //A13 1949data8 0x58EDA1E2677C8F91 //A14 1950data8 0xDAB8A8EC523C1F71 //A15 1951data8 0x5C84930133F30411 //A16 1952data8 0xDE51952FDFD1EC49 //A17 1953data8 0x601FCCEC1BBD25F1 //A18 1954data8 0xE1E5F2D76B610920 //A19 1955// Polynomial coefficients for left root on [-13, -12] 1956// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 1957data8 0xBE01612F373268ED, 0xBA97B7A18CDF103B //A0 1958data8 0xC1F7328CBF7A4FAC, 0xBE89A25A6952F481 //A1 1959data8 0x43F0D0FA2DBDA237, 0x40A0422EC1CE6084 //A2 1960data8 0x82082DF2D32686C5, 0x0000C05F //A3 1961data8 0x8D64EE9B42E68B36, 0x0000407F //A4 1962data8 0xA3FFD82E08C630C9, 0x0000C09F //A5 1963data8 0x4BF8C49D99123466 //A6 1964data8 0xCDFEC79DDF1119ED //A7 1965data8 0x50038615A892D242 //A8 1966data8 0xD20929453DC8B537 //A9 1967data8 0x54106A78083BA1EE //A10 1968data8 0xD615A302C69E27B2 //A11 1969data8 0x581CC175870FF16F //A12 1970data8 0xDA233E0979E12B74 //A13 1971data8 0x5C29E822BC568C80 //A14 1972data8 0xDE31845DB5340FBC //A15 1973data8 0x6037BFC6D498D5F9 //A16 1974data8 0xE2407D92CD613E82 //A17 1975data8 0x64483B9B62367EB7 //A18 1976data8 0xE64B2DC830E8A799 //A1 1977// Polynomial coefficients for left root on [-14, -13] 1978// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 1979data8 0x3E563D0B930B371F, 0x3AE779957E14F012 //A0 1980data8 0xC2344C3B2F083767, 0xBEC0B7769AA3DD66 //A1 1981data8 0x4469BFFF28B50D07, 0x41181E3F13ED2401 //A2 1982data8 0xAE38F64DCB24D9EE, 0x0000C06A //A3 1983data8 0xA5C3F52C1B3506F2, 0x0000408E //A4 1984data8 0xA83BC857BCD6BA92, 0x0000C0B2 //A5 1985data8 0x4D663B4727B4D81A //A6 1986data8 0xCFA82C965B0F62E9 //A7 1987data8 0x51EAD58C02905B71 //A8 1988data8 0xD42E427970FA56AD //A9 1989data8 0x56714644C57D8476 //A10 1990data8 0xD8B3EC2037EC95F2 //A11 1991data8 0x5AF72AE68BBA5B3D //A12 1992data8 0xDD3B2152C67AA6B7 //A13 1993data8 0x5F7FF5F082861B8B //A14 1994data8 0xE1C2E8BE125A5B7A //A15 1995data8 0x64066E92FE9EBE7D //A16 1996data8 0xE64B4201CDF9F138 //A17 1997data8 0x689186351E58AA88 //A18 1998data8 0xEAD132A585DFC60A //A19 1999// Polynomial coefficients for left root on [-15, -14] 2000// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 2001data8 0xBE6D7DDE12700AC1, 0xBB1E025BF1667FB5 //A0 2002data8 0xC273077763F60AD5, 0xBF2A1698184C7A9A //A1 2003data8 0x44E6A1BF095B1AB3, 0x4178D5AE8A4A2874 //A2 2004data8 0x8F8E0D5060FCC767, 0x0000C076 //A3 2005data8 0x800CC1DCFF092A57, 0x0000409E //A4 2006data8 0xF3AB0BA9D14D37D1, 0x0000C0C5 //A5 2007data8 0x4EDE3000A2F6D565 //A6 2008data8 0xD15EC613B9C8C800 //A7 2009data8 0x53E003309FEECCAA //A8 2010data8 0xD660ED908D8B15C4 //A9 2011data8 0x58E21E9B51A1C4AE //A10 2012data8 0xDB639745DB82210D //A11 2013data8 0x5DE55BB60C68FCF6 //A12 2014data8 0xE06772BA3FCA23C6 //A13 2015data8 0x62E9E58B4F702C31 //A14 2016data8 0xE56CBA49B071ABE2 //A15 2017data8 0x67EFF31E4F2BA36A //A16 2018data8 0xEA7232C8804F32C3 //A17 2019data8 0x6CF5EFEE929A0928 //A18 2020data8 0xEF742EE03EC3E8FF //A19 2021// Polynomial coefficients for left root on [-16, -15] 2022// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 2023data8 0xBEDCC628FEAC7A1B, 0xBB80582C8BEBB198 //A0 2024data8 0xC2B3076EE752595E, 0xBF5388F55AFAE53E //A1 2025data8 0x4566A1AAD96EBCB5, 0x421F0FEFE2444293 //A2 2026data8 0x8F8D4D3DE9850DB2, 0x0000C082 //A3 2027data8 0x800BDD6DA2CE184C, 0x000040AE //A4 2028data8 0xF3A8EC4C9CDC7A43, 0x0000C0D9 //A5 2029data8 0x505E2FAFDB81263F //A6 2030data8 0xD31EC5B3A7506CD9 //A7 2031data8 0x55E002F77E999810 //A8 2032data8 0xD8A0ED4C9B5C2900 //A9 2033data8 0x5B621E4A8267C401 //A10 2034data8 0xDE2396E5BFCFDA7A //A11 2035data8 0x60E55B43BE6F9A79 //A12 2036data8 0xE3A772324C7405FA //A13 2037data8 0x6669E4E9B7E57A2D //A14 2038data8 0xE92CB989F8A8FB37 //A15 2039data8 0x6BEFF2368849A36E //A16 2040data8 0xEEB23234FE191D55 //A17 2041data8 0x7175EF5D1080B105 //A18 2042data8 0xF4342ED7B1B7BE31 //A19 2043// Polynomial coefficients for left root on [-17, -16] 2044// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 2045data8 0xBF28AEEE7B58C790, 0xBBC4448DE371FA0A //A0 2046data8 0xC2F436F56B3B89B1, 0xBF636755245AC63A //A1 2047data8 0x45E98A22535D115D, 0x4298467DA93DB784 //A2 2048data8 0xAC176F3775E6FCF2, 0x0000C08E //A3 2049data8 0xA3114F53A9FEB908, 0x000040BE //A4 2050data8 0xA4D168A8334AFE5A, 0x0000C0EE //A5 2051data8 0x51E5B0E7EC7182CF //A6 2052data8 0xD4E77D67B876D6B4 //A7 2053data8 0x57E9F7C30C098C83 //A8 2054data8 0xDAED29B0489EF7A7 //A9 2055data8 0x5DF09486F8A524B8 //A10 2056data8 0xE0F30B19910A2393 //A11 2057data8 0x63F60E02AB3109F4 //A12 2058data8 0xE6F9B8A3431854D5 //A13 2059data8 0x69FE2D4A6D94218E //A14 2060data8 0xED01C7E272A73560 //A15 2061data8 0x7005017D82B186B6 //A16 2062data8 0xF3096A81A69BD8AE //A17 2063data8 0x76104951BAD67D5C //A18 2064data8 0xF90FECC99786FD5B //A19 2065// Polynomial coefficients for left root on [-18, -17] 2066// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 2067data8 0x3F3C19A53328E26A, 0x3BE238D7BA036B3B //A0 2068data8 0xC336C16C16C16C13, 0xBFEACE245DEC56F3 //A1 2069data8 0x46702E85C0898B70, 0x432C922B64FD1DA4 //A2 2070data8 0xF57B99A1C0343350, 0x0000C09A //A3 2071data8 0x82EC9634223DF90D, 0x000040CF //A4 2072data8 0x94F66D7557E3237D, 0x0000C103 //A5 2073data8 0x5376118B79AE34D6 //A6 2074data8 0xD6BAE7106D52CE49 //A7 2075data8 0x5A00BD48CC8E11AB //A8 2076data8 0xDD4529722833E2DF //A9 2077data8 0x608B1654AF5F46AF //A10 2078data8 0xE3D182CC90D8723F //A11 2079data8 0x6716D43D46706AA0 //A12 2080data8 0xEA5DF888C5B428D3 //A13 2081data8 0x6DA3CA85888931A6 //A14 2082data8 0xF0EA40EF2AC7E070 //A15 2083data8 0x743175D1A251AFCD //A16 2084data8 0xF777CB6E2B550D73 //A17 2085data8 0x7AC11E468A134A51 //A18 2086data8 0xFE02B6BDD0FC40AA //A19 2087// Polynomial coefficients for left root on [-19, -18] 2088// Lgammal is approximated by polynomial within [.157807 ; -.157807 ] range 2089data8 0xBFAB767F9BE217FC, 0xBC4A5541CE0D8D0D //A0 2090data8 0xC379999999999999, 0xC01A84981B490BE8 //A1 2091data8 0x46F47AE147AE147A, 0x43AC2987BBC466EB //A2 2092data8 0xAEC33E1F67152987, 0x0000C0A7 //A3 2093data8 0xD1B71758E2196153, 0x000040DF //A4 2094data8 0x8637BD05AF6D420E, 0x0000C118 //A5 2095data8 0x55065E9F80F293B2 //A6 2096data8 0xD88EADA78C44BFA7 //A7 2097data8 0x5C15798EE22EC6CD //A8 2098data8 0xDF9E8ABFD67895CF //A9 2099data8 0x6325FD7FE13B0DE0 //A10 2100data8 0xE6AFFC5C3DE70858 //A11 2101data8 0x6A3774CE81C70D43 //A12 2102data8 0xEDC1527412D8129F //A13 2103data8 0x7149BABCDA8B7A72 //A14 2104data8 0xF4D330AD49071BB5 //A15 2105data8 0x785D4046F4C5F1FD //A16 2106data8 0xFBE59BFEDBA73FAF //A17 2107data8 0x7F64BEF2B2EC8DA1 //A18 2108data8 0xFFEFFFFFFFFFFFFF //A19 2109LOCAL_OBJECT_END(lgammal_left_roots_polynomial_data) 2110 2111 2112//============================================================== 2113// Code 2114//============================================================== 2115 2116.section .text 2117GLOBAL_LIBM_ENTRY(__libm_lgammal) 2118{ .mfi 2119 getf.exp rSignExpX = f8 2120 // Test x for NaTVal, NaN, +/-0, +/-INF, denormals 2121 fclass.m p6,p0 = f8,0x1EF 2122 addl r17Ones = 0x1FFFF, r0 // exponent mask 2123} 2124{ .mfi 2125 addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp 2126 fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR) 2127 adds rDelta = 0x3FC, r0 2128} 2129;; 2130{ .mfi 2131 getf.sig rSignifX = f8 2132 fcmp.lt.s1 p15, p14 = f8, f0 2133 shl rDelta = rDelta, 20 // single precision 1.5 2134} 2135{ .mfi 2136 ld8 GR_ad_z_1 = [GR_ad_z_1]// get pointer to Constants_Z_1 2137 fma.s1 fTwo = f1, f1, f1 // 2.0 2138 addl rExp8 = 0x10002, r0 // exponent of 8.0 2139} 2140;; 2141{ .mfi 2142 alloc rPFS_SAVED = ar.pfs, 0, 34, 4, 0 // get some registers 2143 fmerge.s fAbsX = f1, f8 // |x| 2144 and rExpX = rSignExpX, r17Ones // mask sign bit 2145} 2146{ .mib 2147 addl rExpHalf = 0xFFFE, r0 // exponent of 0.5 2148 addl rExp2 = 0x10000, r0 // exponent of 2.0 2149 // branch out if x is NaTVal, NaN, +/-0, +/-INF, or denormalized number 2150(p6) br.cond.spnt lgammal_spec 2151} 2152;; 2153_deno_back_to_main_path: 2154{ .mfi 2155 // Point to Constants_G_H_h1 2156 add rTbl1Addr = 0x040, GR_ad_z_1 2157 frcpa.s1 fRcpX, p0 = f1, f8 // initial approximation of 1/x 2158 extr.u GR_Index1 = rSignifX, 59, 4 2159} 2160{ .mib 2161(p14) cmp.ge.unc p8, p0 = rExpX, rExp8 // p8 = 1 if x >= 8.0 2162 adds rZ625 = 0x3F2, r0 2163(p8) br.cond.spnt lgammal_big_positive // branch out if x >= 8.0 2164} 2165;; 2166{ .mfi 2167 shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1 2168 fmerge.se fSignifX = f1, f8 // sifnificand of x 2169 // Get high 15 bits of significand 2170 extr.u GR_X_0 = rSignifX, 49, 15 2171} 2172{ .mib 2173 cmp.lt.unc p9, p0 = rExpX, rExpHalf // p9 = 1 if |x| < 0.5 2174 // set p11 if 2 <= x < 4 2175(p14) cmp.eq.unc p11, p0 = rExpX, rExp2 2176(p9) br.cond.spnt lgammal_0_half // branch out if |x| < 0.5 2177} 2178;; 2179{ .mfi 2180 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1 2181 fms.s1 fA5L = f1, f1, f8 // for 0.75 <= x < 1.3125 path 2182 shl rZ625 = rZ625, 20 // sinfle precision 0.625 2183} 2184{ .mib 2185 setf.s FR_MHalf = rDelta 2186 // set p10 if x >= 4.0 2187(p14) cmp.gt.unc p10, p0 = rExpX, rExp2 2188 // branch to special path for 4.0 <= x < 8 2189(p10) br.cond.spnt lgammal_4_8 2190} 2191;; 2192{ .mfi 2193 // for 1.3125 <= x < 1.5625 path 2194 addl rPolDataPtr= @ltoff(lgammal_loc_min_data),gp 2195 // argument of polynomial approximation for 1.5625 <= x < 2.25 2196 fms.s1 fB4 = f8, f1, fTwo 2197 cmp.eq p12, p0 = rExpX, rExpHalf 2198} 2199{ .mib 2200 addl rExpOne = 0xFFFF, r0 // exponent of 1.0 2201 // set p10 if significand of x >= 1.125 2202(p11) cmp.le p11, p0 = 2, GR_Index1 2203(p11) br.cond.spnt lgammal_2Q_4 2204} 2205;; 2206{ .mfi 2207 // point to xMin for 1.3125 <= x < 1.5625 path 2208 ld8 rPolDataPtr = [rPolDataPtr] 2209 fcvt.xf fFltIntX = fXint // RTN(x) 2210(p14) cmp.eq.unc p13, p7 = rExpX, rExpOne // p13 set if 1.0 <= x < 2.0 2211} 2212{ .mib 2213 setf.s FR_FracX = rZ625 2214 // set p12 if |x| < 0.75 2215(p12) cmp.gt.unc p12, p0 = 8, GR_Index1 2216 // branch out to special path for |x| < 0.75 2217(p12) br.cond.spnt lgammal_half_3Q 2218} 2219;; 2220.pred.rel "mutex", p7, p13 2221{ .mfi 2222 getf.sig rXRnd = fXint // integer part of the input value 2223 fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration 2224 // Get bits 30-15 of X_0 * Z_1 2225 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 2226} 2227{ .mib 2228(p7) cmp.eq p6, p0 = rExpX, rExp2 // p6 set if 2.0 <= x < 2.25 2229(p13) cmp.le p6, p0 = 9, GR_Index1 2230 // branch to special path 1.5625 <= x < 2.25 2231(p6) br.cond.spnt lgammal_13Q_2Q 2232} 2233;; 2234// 2235// For performance, don't use result of pmpyshr2.u for 4 cycles. 2236// 2237{ .mfi 2238 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr // Point to G_1 2239 fma.s1 fSix = fTwo, fTwo, fTwo // 6.0 2240 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q 2241} 2242{ .mib 2243 add rTmpPtr3 = -0x50, GR_ad_z_1 2244(p13) cmp.gt p7, p0 = 5, GR_Index1 2245 // branch to special path 0.75 <= x < 1.3125 2246(p7) br.cond.spnt lgammal_03Q_1Q 2247} 2248;; 2249{ .mfi 2250 add rTmpPtr = 8, GR_ad_tbl_1 2251 fma.s1 fRoot = f8, f1, f1 // x + 1 2252 // Absolute value of int arg. Will be used as index in table with roots 2253 sub rXRnd = r0, rXRnd 2254} 2255{ .mib 2256 ldfe fA5L = [rPolDataPtr], 16 // xMin 2257 addl rNegSingularity = 0x3003E, r0 2258(p14) br.cond.spnt lgammal_loc_min 2259} 2260;; 2261{ .mfi 2262 ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1 2263 nop.f 0 2264 add rZ2Addr = 0x140, GR_ad_z_1 // Point to Constants_Z_2 2265} 2266{ .mib 2267 ldfd FR_h = [rTmpPtr] // Load h_1 2268 // If arg is less or equal to -2^63 2269 cmp.geu.unc p8,p0 = rSignExpX, rNegSingularity 2270 // Singularity for x < -2^63 since all such arguments are integers 2271 // branch to special code which deals with singularity 2272(p8) br.cond.spnt lgammal_singularity 2273} 2274;; 2275{ .mfi 2276 ldfe FR_log2_hi = [GR_ad_q], 32 // Load log2_hi 2277 nop.f 0 2278 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 2279} 2280{ .mfi 2281 ldfe FR_log2_lo = [rTmpPtr3], 32 // Load log2_lo 2282 fms.s1 fDx = f8, f1, fFltIntX // x - RTN(x) 2283 // index in table with roots and bounds 2284 adds rXint = -2, rXRnd 2285} 2286;; 2287{ .mfi 2288 ldfe FR_Q4 = [GR_ad_q], 32 // Load Q4 2289 nop.f 0 2290 // set p12 if x may be close to negative root: -19.5 < x < -2.0 2291 cmp.gtu p12, p0 = 18, rXint 2292} 2293{ .mfi 2294 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 2295 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration 2296 // Point to Constants_G_H_h2 2297 add rTbl2Addr = 0x180, GR_ad_z_1 2298} 2299;; 2300{ .mfi 2301 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 2302 // set p9 if x is integer and negative 2303 fcmp.eq.s1 p9, p0 = f8,fFltIntX 2304 // Point to Constants_G_H_h3 2305 add rTbl3Addr = 0x280, GR_ad_z_1 2306} 2307{ .mfi 2308 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 2309 nop.f 0 2310 sub GR_N = rExpX, rExpHalf, 1 2311} 2312;; 2313{ .mfi 2314 ldfe FR_Q3 = [rTmpPtr3], 32 // Load Q3 2315 nop.f 0 2316 // Point to lnsin polynomial coefficients 2317 adds rLnSinDataPtr = 864, rTbl3Addr 2318} 2319{ .mfi 2320 ldfe FR_Q2 = [GR_ad_q],32 // Load Q2 2321 nop.f 0 2322 add rTmpPtr = 8, GR_ad_tbl_2 2323} 2324;; 2325{ .mfi 2326 ldfe FR_Q1 = [rTmpPtr3] // Load Q1 2327 fcmp.lt.s1 p0, p15 = fAbsX, fSix // p15 is set when x < -6.0 2328 // point to table with roots and bounds 2329 adds rRootsBndAddr = -1296, GR_ad_z_1 2330} 2331{ .mfb 2332 // Put integer N into rightmost significand 2333 setf.sig fFloatN = GR_N 2334 fma.s1 fThirteen = fSix, fTwo, f1 // 13.0 2335 // Singularity if -2^63 < x < 0 and x is integer 2336 // branch to special code which deals with singularity 2337(p9) br.cond.spnt lgammal_singularity 2338} 2339;; 2340{ .mfi 2341 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2 2342 // y = |x|/2^(exponent(x)) - 1.5 2343 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf 2344 // Get bits 30-15 of X_1 * Z_2 2345 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 2346} 2347{ .mfi 2348 ldfd FR_h2 = [rTmpPtr] // Load h_2 2349 fma.s1 fDxSqr = fDx, fDx, f0 // deltaX^2 2350 adds rTmpPtr3 = 128, rLnSinDataPtr 2351} 2352;; 2353// 2354// For performance, don't use result of pmpyshr2.u for 4 cycles. 2355// 2356{ .mfi 2357 getf.exp rRoot = fRoot // sign and biased exponent of (x + 1) 2358 nop.f 0 2359 // set p6 if -4 < x <= -2 2360 cmp.eq p6, p0 = rExpX, rExp2 2361} 2362{ .mfi 2363 ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16 2364 fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration 2365 sub rIndexPol = rExpX, rExpHalf // index of polynom 2366} 2367;; 2368{ .mfi 2369 ldfe fLnSin4 = [rLnSinDataPtr], 96 2370 // p10 is set if x is potential "right" root 2371 // p11 set for possible "left" root 2372 fcmp.lt.s1 p10, p11 = fDx, f0 2373 shl rIndexPol = rIndexPol, 6 // (i*16)*4 2374} 2375{ .mfi 2376 ldfpd fLnSin18, fLnSin20 = [rTmpPtr3], 16 2377 nop.f 0 2378 mov rExp2tom7 = 0x0fff8 // Exponent of 2^-7 2379} 2380;; 2381{ .mfi 2382 getf.sig rSignifDx = fDx // Get significand of RTN(x) 2383 nop.f 0 2384 // set p6 if -4 < x <= -3.0 2385(p6) cmp.le.unc p6, p0 = 0x8, GR_Index1 2386} 2387{ .mfi 2388 ldfpd fLnSin22, fLnSin24 = [rTmpPtr3], 16 2389 nop.f 0 2390 // mask sign bit in the exponent of (x + 1) 2391 and rRoot = rRoot, r17Ones 2392} 2393;; 2394{ .mfi 2395 ldfe fLnSin16 = [rLnSinDataPtr], -80 2396 nop.f 0 2397 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 2398} 2399{ .mfi 2400 ldfpd fLnSin26, fLnSin28 = [rTmpPtr3], 16 2401 nop.f 0 2402 and rXRnd = 1, rXRnd 2403} 2404;; 2405{ .mfi 2406 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 2407 fms.s1 fDxSqrL = fDx, fDx, fDxSqr // low part of deltaX^2 2408 // potential "left" root 2409(p11) adds rRootsBndAddr = 560, rRootsBndAddr 2410} 2411{ .mib 2412 ldfpd fLnSin30, fLnSin32 = [rTmpPtr3], 16 2413 // set p7 if |x+1| < 2^-7 2414 cmp.lt p7, p0 = rRoot, rExp2tom7 2415 // branch to special path for |x+1| < 2^-7 2416(p7) br.cond.spnt _closeToNegOne 2417} 2418;; 2419{ .mfi 2420 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3 2421 fcmp.lt.s1 p14, p0 = fAbsX, fThirteen // set p14 if x > -13.0 2422 // base address of polynomial on range [-6.0, -0.75] 2423 adds rPolDataPtr = 3440, rTbl3Addr 2424} 2425{ .mfi 2426 // (i*16)*4 + (i*16)*8 - offsett of polynomial on range [-6.0, -0.75] 2427 shladd rTmpPtr = rIndexPol, 2, rIndexPol 2428 fma.s1 fXSqr = FR_FracX, FR_FracX, f0 // y^2 2429 // point to left "near root" bound 2430(p12) shladd rRootsBndAddr = rXint, 4, rRootsBndAddr 2431} 2432;; 2433{ .mfi 2434 ldfpd fLnSin34, fLnSin36 = [rTmpPtr3], 16 2435 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration 2436 // add special offsett if -4 < x <= -3.0 2437(p6) adds rPolDataPtr = 640, rPolDataPtr 2438} 2439{ .mfi 2440 // point to right "near root" bound 2441 adds rTmpPtr2 = 8, rRootsBndAddr 2442 fnma.s1 fMOne = f1, f1, f0 // -1.0 2443 // Point to Bernulli numbers 2444 adds rBernulliPtr = 544, rTbl3Addr 2445} 2446;; 2447{ .mfi 2448 // left bound of "near root" range 2449(p12) ld8 rLeftBound = [rRootsBndAddr] 2450 fmerge.se fNormDx = f1, fDx // significand of DeltaX 2451 // base address + offsett for polynomial coeff. on range [-6.0, -0.75] 2452 add rPolDataPtr = rPolDataPtr, rTmpPtr 2453} 2454{ .mfi 2455 // right bound of "near root" range 2456(p12) ld8 rRightBound = [rTmpPtr2] 2457 fcvt.xf fFloatN = fFloatN 2458 // special "Bernulli" numbers for Stirling's formula for -13 < x < -6 2459(p14) adds rBernulliPtr = 160, rBernulliPtr 2460} 2461;; 2462{ .mfi 2463 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 2464 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 2465 adds rTmpPtr3 = -160, rTmpPtr3 2466} 2467{ .mfb 2468 adds rTmpPtr = 80, rPolDataPtr 2469 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 2470 // p15 is set if -2^63 < x < 6.0 and x is not an integer 2471 // branch to path with implementation using Stirling's formula for neg. x 2472(p15) br.cond.spnt _negStirling 2473} 2474;; 2475{ .mfi 2476 ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3 2477 fma.s1 fDelX4 = fDxSqr, fDxSqr, f0 // deltaX^4 2478 // Get high 4 bits of signif 2479 extr.u rIndex1Dx = rSignifDx, 59, 4 2480} 2481{ .mfi 2482 ldfe fA5 = [rTmpPtr], -16 // A5 2483 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 2484 adds rLnSinTmpPtr = 16, rLnSinDataPtr 2485} 2486;; 2487{ .mfi 2488 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0 2489 fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin18 2490 // Get high 15 bits of significand 2491 extr.u rX0Dx = rSignifDx, 49, 15 2492} 2493{ .mfi 2494 ldfe fA4 = [rTmpPtr], 192 // A4 2495 fms.s1 fXSqrL = FR_FracX, FR_FracX, fXSqr // low part of y^2 2496 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1 2497} 2498;; 2499{ .mfi 2500 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 2501 fma.s1 fX4 = fXSqr, fXSqr, f0 // y^4 2502 adds rTmpPtr2 = 32, rTmpPtr 2503} 2504{ .mfi 2505 ldfpd fA18, fA19 = [rTmpPtr], 16 // A18, A19 2506 fma.s1 fLnSin24 = fLnSin24, fDxSqr, fLnSin22 2507 nop.i 0 2508} 2509;; 2510{ .mfi 2511 ldfe fLnSin6 = [rLnSinDataPtr], 32 2512 fma.s1 fLnSin28 = fLnSin28, fDxSqr, fLnSin26 2513 nop.i 0 2514} 2515{ .mfi 2516 ldfe fLnSin8 = [rLnSinTmpPtr], 32 2517 nop.f 0 2518 nop.i 0 2519} 2520;; 2521{ .mfi 2522 ldfpd fA20, fA21 = [rTmpPtr], 16 // A20, A21 2523 fma.s1 fLnSin32 = fLnSin32, fDxSqr, fLnSin30 2524 nop.i 0 2525} 2526{ .mfi 2527 ldfpd fA22, fA23 = [rTmpPtr2], 16 // A22, A23 2528 fma.s1 fB20 = f1, f1, FR_MHalf // 2.5 2529(p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound 2530} 2531;; 2532{ .mfi 2533 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2 2534 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 2535 // set p6 if x falls in "near root" range 2536(p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound 2537} 2538{ .mfb 2539 adds rTmpPtr3 = -64, rTmpPtr 2540 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 2541 // branch to special path if x falls in "near root" range 2542(p6) br.cond.spnt _negRoots 2543} 2544;; 2545{ .mfi 2546 ldfpd fA24, fA25 = [rTmpPtr2], 16 // A24, A25 2547 fma.s1 fLnSin36 = fLnSin36, fDxSqr, fLnSin34 2548(p11) cmp.eq.unc p7, p0 = 1,rXint // p7 set if -3.0 < x < -2.5 2549} 2550{ .mfi 2551 adds rTmpPtr = -48, rTmpPtr 2552 fma.s1 fLnSin20 = fLnSin20, fDxSqr, fLnSin16 2553 addl rDelta = 0x5338, r0 // significand of -2.605859375 2554} 2555;; 2556{ .mfi 2557 getf.exp GR_N = fDx // Get N = exponent of DeltaX 2558 fma.s1 fX6 = fX4, fXSqr, f0 // y^6 2559 // p7 set if -2.605859375 <= x < -2.5 2560(p7) cmp.gt.unc p7, p0 = rDelta, GR_X_0 2561} 2562{ .mfb 2563 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1 2564 fma.s1 fDelX8 = fDelX4, fDelX4, f0 // deltaX^8 2565 // branch to special path for -2.605859375 <= x < -2.5 2566(p7) br.cond.spnt _neg2andHalf 2567} 2568;; 2569{ .mfi 2570 ldfpd fA14, fA15 = [rTmpPtr3], 16 // A14, A15 2571 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 2572 adds rTmpPtr2 = 128 , rPolDataPtr 2573} 2574{ .mfi 2575 ldfpd fA16, fA17 = [rTmpPtr], 16 // A16, A17 2576 fma.s1 fLnSin28 = fLnSin28, fDelX4, fLnSin24 2577 adds rPolDataPtr = 144 , rPolDataPtr 2578} 2579;; 2580{ .mfi 2581 ldfe fLnSin10 = [rLnSinDataPtr], 32 2582 fma.s1 fRes1H = fA3, FR_FracX, f0 // (A3*y)hi 2583 and GR_N = GR_N, r17Ones // mask sign bit 2584} 2585{ .mfi 2586 ldfe fLnSin12 = [rLnSinTmpPtr] 2587 fma.s1 fDelX6 = fDxSqr, fDelX4, f0 // DeltaX^6 2588 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1 2589} 2590;; 2591{ .mfi 2592 ldfe fA13 = [rPolDataPtr], -32 // A13 2593 fma.s1 fA4 = fA5, FR_FracX, fA4 // A5*y + A4 2594 // Get bits 30-15 of X_0 * Z_1 2595 pmpyshr2.u GR_X_1 = rX0Dx, GR_Z_1, 15 2596} 2597{ .mfi 2598 ldfe fA12 = [rTmpPtr2], -32 // A12 2599 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1 2600 sub GR_N = GR_N, rExpHalf, 1 // unbisaed exponent of DeltaX 2601} 2602;; 2603// 2604// For performance, don't use result of pmpyshr2.u for 4 cycles. 2605// 2606.pred.rel "mutex",p10,p11 2607{ .mfi 2608 ldfe fA11 = [rPolDataPtr], -32 // A11 2609 // High part of log(|x|) = Y_hi = N * log2_hi + H 2610 fma.s1 fResH = fFloatN, FR_log2_hi, FR_H 2611(p10) cmp.eq p8, p9 = rXRnd, r0 2612} 2613{ .mfi 2614 ldfe fA10 = [rTmpPtr2], -32 // A10 2615 fma.s1 fRes6H = fA1, FR_FracX, f0 // (A1*y)hi 2616(p11) cmp.eq p9, p8 = rXRnd, r0 2617} 2618;; 2619{ .mfi 2620 ldfe fA9 = [rPolDataPtr], -32 // A9 2621 fma.s1 fB14 = fLnSin6, fDxSqr, f0 // (LnSin6*deltaX^2)hi 2622 cmp.eq p6, p7 = 4, rSgnGamSize 2623} 2624{ .mfi 2625 ldfe fA8 = [rTmpPtr2], -32 // A8 2626 fma.s1 fA18 = fA19, FR_FracX, fA18 2627 nop.i 0 2628} 2629;; 2630{ .mfi 2631 ldfe fA7 = [rPolDataPtr] // A7 2632 fma.s1 fA23 = fA23, FR_FracX, fA22 2633 nop.i 0 2634} 2635{ .mfi 2636 ldfe fA6 = [rTmpPtr2] // A6 2637 fma.s1 fA21 = fA21, FR_FracX, fA20 2638 nop.i 0 2639} 2640;; 2641{ .mfi 2642 ldfe fLnSin14 = [rLnSinDataPtr] 2643 fms.s1 fRes1L = fA3, FR_FracX, fRes1H // delta((A3*y)hi) 2644 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 2645} 2646{ .mfi 2647 setf.sig fFloatNDx = GR_N 2648 fadd.s1 fPol = fRes1H, fA2 // (A3*y + A2)hi 2649 nop.i 0 2650} 2651;; 2652{ .mfi 2653 ldfps FR_G, FR_H = [GR_ad_tbl_1], 8 // Load G_1, H_1 2654 fma.s1 fRes2H = fA4, fXSqr, f0 // ((A5 + A4*y)*y^2)hi 2655 nop.i 0 2656} 2657{ .mfi 2658 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 2659 fma.s1 fA25 = fA25, FR_FracX, fA24 2660 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 2661} 2662;; 2663.pred.rel "mutex",p8,p9 2664{ .mfi 2665 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 2666 fms.s1 fRes6L = fA1, FR_FracX, fRes6H // delta((A1*y)hi) 2667 // sign of GAMMA(x) is negative 2668(p8) adds rSgnGam = -1, r0 2669} 2670{ .mfi 2671 adds rTmpPtr = 8, GR_ad_tbl_2 2672 fadd.s1 fRes3H = fRes6H, fA0 // (A1*y + A0)hi 2673 // sign of GAMMA(x) is positive 2674(p9) adds rSgnGam = 1, r0 2675} 2676;; 2677{ .mfi 2678 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2] // Load G_2, H_2 2679 // (LnSin6*deltaX^2 + LnSin4)hi 2680 fadd.s1 fLnSinH = fB14, fLnSin4 2681 nop.i 0 2682} 2683{ .mfi 2684 ldfd FR_h2 = [rTmpPtr] // Load h_2 2685 fms.s1 fB16 = fLnSin6, fDxSqr, fB14 // delta(LnSin6*deltaX^2) 2686 nop.i 0 2687} 2688;; 2689{ .mfi 2690 ldfd fhDelX = [GR_ad_tbl_1] // Load h_1 2691 fma.s1 fA21 = fA21, fXSqr, fA18 2692 nop.i 0 2693} 2694{ .mfi 2695 nop.m 0 2696 fma.s1 fLnSin36 = fLnSin36, fDelX4, fLnSin32 2697 nop.i 0 2698} 2699;; 2700{ .mfi 2701 nop.m 0 2702 fma.s1 fRes1L = fA3L, FR_FracX, fRes1L // (A3*y)lo 2703 // Get bits 30-15 of X_1 * Z_ 2704 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 2705} 2706{ .mfi 2707 nop.m 0 2708 fsub.s1 fPolL = fA2, fPol 2709 nop.i 0 2710} 2711;; 2712// 2713// For performance, don't use result of pmpyshr2.u for 4 cycles. 2714// 2715{ .mfi 2716 nop.m 0 2717 // delta(((A5 + A4*y)*y^2)hi) 2718 fms.s1 fRes2L = fA4, fXSqr, fRes2H 2719 nop.i 0 2720} 2721{ .mfi 2722 nop.m 0 2723 // (((A5 + A4*y)*y^2) + A3*y + A2)hi 2724 fadd.s1 fRes4H = fRes2H, fPol 2725 nop.i 0 2726} 2727;; 2728{ .mfi 2729 // store signgam if size of variable is 4 bytes 2730(p6) st4 [rSgnGamAddr] = rSgnGam 2731 fma.s1 fRes6L = fA1L, FR_FracX, fRes6L // (A1*y)lo 2732 nop.i 0 2733} 2734{ .mfi 2735 // store signgam if size of variable is 8 bytes 2736(p7) st8 [rSgnGamAddr] = rSgnGam 2737 fsub.s1 fRes3L = fA0, fRes3H 2738 nop.i 0 2739} 2740;; 2741{ .mfi 2742 nop.m 0 2743 fsub.s1 fLnSinL = fLnSin4, fLnSinH 2744 nop.i 0 2745} 2746{ .mfi 2747 nop.m 0 2748 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)hi 2749 fma.s1 fB18 = fLnSinH, fDxSqr, f0 2750 nop.i 0 2751} 2752;; 2753{ .mfi 2754 adds rTmpPtr = 8, rTbl3Addr 2755 fma.s1 fB16 = fLnSin6, fDxSqrL, fB16 // (LnSin6*deltaX^2)lo 2756 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 2757} 2758{ .mfi 2759 nop.m 0 2760 fma.s1 fA25 = fA25, fXSqr, fA23 2761 nop.i 0 2762} 2763;; 2764{ .mfi 2765 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 2766 fadd.s1 fPolL = fPolL, fRes1H 2767 nop.i 0 2768} 2769{ .mfi 2770 shladd rTmpPtr = GR_Index3, 4, rTmpPtr // Point to G_3 2771 fadd.s1 fRes1L = fRes1L, fA2L // (A3*y)lo + A2lo 2772 nop.i 0 2773} 2774;; 2775{ .mfi 2776 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3] // Load G_3, H_3 2777 fma.s1 fRes2L = fA4, fXSqrL, fRes2L // ((A5 + A4*y)*y^2)lo 2778 nop.i 0 2779} 2780{ .mfi 2781 ldfd FR_h3 = [rTmpPtr] // Load h_3 2782 fsub.s1 fRes4L = fPol, fRes4H 2783 nop.i 0 2784} 2785;; 2786{ .mfi 2787 nop.m 0 2788 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)hi 2789 fma.s1 fRes7H = fRes4H, fXSqr, f0 2790 nop.i 0 2791} 2792{ .mfi 2793 nop.m 0 2794 fma.s1 fA15 = fA15, FR_FracX, fA14 2795 nop.i 0 2796} 2797;; 2798{ .mfi 2799 nop.m 0 2800 fadd.s1 fRes3L = fRes3L, fRes6H 2801 nop.i 0 2802} 2803{ .mfi 2804 nop.m 0 2805 fadd.s1 fRes6L = fRes6L, fA0L // (A1*y)lo + A0lo 2806 nop.i 0 2807} 2808;; 2809{ .mfi 2810 nop.m 0 2811 fadd.s1 fLnSinL = fLnSinL, fB14 2812 2813 nop.i 0 2814} 2815{ .mfi 2816 nop.m 0 2817 // delta((LnSin6*deltaX^2 + LnSin4)*deltaX^2) 2818 fms.s1 fB20 = fLnSinH, fDxSqr, fB18 2819 nop.i 0 2820} 2821;; 2822{ .mfi 2823 nop.m 0 2824 fadd.s1 fPolL = fPolL, fRes1L // (A3*y + A2)lo 2825 2826 nop.i 0 2827} 2828{ .mfi 2829 nop.m 0 2830 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)hi 2831 fadd.s1 fLnSin6 = fB18, fLnSin2 2832 nop.i 0 2833} 2834;; 2835{ .mfi 2836 nop.m 0 2837 fadd.s1 fRes4L = fRes4L, fRes2H 2838 nop.i 0 2839} 2840{ .mfi 2841 nop.m 0 2842 fma.s1 fA17 = fA17, FR_FracX, fA16 2843 nop.i 0 2844} 2845;; 2846{ .mfi 2847 nop.m 0 2848 // delta(((((A5 + A4*y)*y^2) + A3*y + A2)*y^2) 2849 fms.s1 fRes7L = fRes4H, fXSqr, fRes7H 2850 nop.i 0 2851} 2852{ .mfi 2853 nop.m 0 2854 fadd.s1 fPol = fRes7H, fRes3H 2855 nop.i 0 2856} 2857;; 2858{ .mfi 2859 nop.m 0 2860 fadd.s1 fRes3L = fRes3L, fRes6L // (A1*y + A0)lo 2861 nop.i 0 2862} 2863{ .mfi 2864 nop.m 0 2865 fma.s1 fA25 = fA25, fX4, fA21 2866 nop.i 0 2867} 2868;; 2869{ .mfi 2870 nop.m 0 2871 // (LnSin6*deltaX^2 + LnSin4)lo 2872 fadd.s1 fLnSinL = fLnSinL, fB16 2873 nop.i 0 2874} 2875{ .mfi 2876 nop.m 0 2877 fma.s1 fB20 = fLnSinH, fDxSqrL, fB20 2878 nop.i 0 2879} 2880;; 2881{ .mfi 2882 nop.m 0 2883 fsub.s1 fLnSin4 = fLnSin2, fLnSin6 2884 nop.i 0 2885} 2886{ .mfi 2887 nop.m 0 2888 // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)hi 2889 fma.s1 fLnSinH = fLnSin6, fDxSqr, f0 2890 nop.i 0 2891} 2892;; 2893{ .mfi 2894 nop.m 0 2895 // ((A5 + A4*y)*y^2)lo + (A3*y + A2)lo 2896 fadd.s1 fRes2L = fRes2L, fPolL 2897 nop.i 0 2898} 2899{ .mfi 2900 nop.m 0 2901 fma.s1 fA17 = fA17, fXSqr, fA15 2902 nop.i 0 2903} 2904;; 2905{ .mfi 2906 nop.m 0 2907 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo 2908 fma.s1 fRes7L = fRes4H, fXSqrL, fRes7L 2909 nop.i 0 2910} 2911{ .mfi 2912 nop.m 0 2913 fsub.s1 fPolL = fRes3H, fPol 2914 nop.i 0 2915} 2916;; 2917{ .mfi 2918 nop.m 0 2919 fma.s1 fA13 = fA13, FR_FracX, fA12 2920 nop.i 0 2921} 2922{ .mfi 2923 nop.m 0 2924 fma.s1 fA11 = fA11, FR_FracX, fA10 2925 nop.i 0 2926} 2927;; 2928{ .mfi 2929 nop.m 0 2930 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo 2931 fma.s1 fB20 = fLnSinL, fDxSqr, fB20 2932 nop.i 0 2933} 2934{ .mfi 2935 nop.m 0 2936 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 2937 nop.i 0 2938} 2939;; 2940{ .mfi 2941 nop.m 0 2942 fadd.s1 fLnSin4 = fLnSin4, fB18 2943 nop.i 0 2944} 2945{ .mfi 2946 nop.m 0 2947 fms.s1 fLnSinL = fLnSin6, fDxSqr, fLnSinH 2948 nop.i 0 2949} 2950;; 2951{ .mfi 2952 nop.m 0 2953 // (((A5 + A4*y)*y^2) + A3*y + A2)lo 2954 fadd.s1 fRes4L = fRes4L, fRes2L 2955 nop.i 0 2956} 2957{ .mfi 2958 nop.m 0 2959 fadd.s1 fhDelX = fhDelX, FR_h2 // h = h_1 + h_2 2960 nop.i 0 2961} 2962;; 2963{ .mfi 2964 nop.m 0 2965 fadd.s1 fRes7L = fRes7L, fRes3L 2966 nop.i 0 2967} 2968{ .mfi 2969 nop.m 0 2970 fadd.s1 fPolL = fPolL, fRes7H 2971 nop.i 0 2972} 2973;; 2974{ .mfi 2975 nop.m 0 2976 fcvt.xf fFloatNDx = fFloatNDx 2977 nop.i 0 2978} 2979{ .mfi 2980 nop.m 0 2981 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 2982 nop.i 0 2983} 2984;; 2985{ .mfi 2986 nop.m 0 2987 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 2988 nop.i 0 2989} 2990{ .mfi 2991 nop.m 0 2992 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2)lo + (LnSin2)lo 2993 fadd.s1 fLnSin2L = fLnSin2L, fB20 2994 nop.i 0 2995} 2996;; 2997{ .mfi 2998 nop.m 0 2999 fma.s1 fA25 = fA25, fX4, fA17 3000 nop.i 0 3001} 3002{ .mfi 3003 nop.m 0 3004 fma.s1 fA13 = fA13, fXSqr, fA11 3005 nop.i 0 3006} 3007;; 3008{ .mfi 3009 nop.m 0 3010 fma.s1 fA9 = fA9, FR_FracX, fA8 3011 nop.i 0 3012} 3013{ .mfi 3014 nop.m 0 3015 fma.s1 fA7 = fA7, FR_FracX, fA6 3016 nop.i 0 3017} 3018;; 3019{ .mfi 3020 nop.m 0 3021 fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin28 3022 nop.i 0 3023} 3024{ .mfi 3025 nop.m 0 3026 fma.s1 fLnSin14 = fLnSin14, fDxSqr, fLnSin12 3027 nop.i 0 3028} 3029;; 3030{ .mfi 3031 nop.m 0 3032 fma.s1 fLnSin10 = fLnSin10, fDxSqr, fLnSin8 3033 nop.i 0 3034} 3035{ .mfi 3036 nop.m 0 3037 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 3038 nop.i 0 3039} 3040;; 3041{ .mfi 3042 nop.m 0 3043 fms.s1 fRDx = FR_G, fNormDx, f1 // r = G * S_hi - 1 3044 nop.i 0 3045} 3046{ .mfi 3047 nop.m 0 3048 // poly_lo = r * Q4 + Q3 3049 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 3050 nop.i 0 3051} 3052;; 3053{ .mfi 3054 nop.m 0 3055 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r 3056 nop.i 0 3057} 3058{ .mfi 3059 nop.m 0 3060 // ((((A5 + A4*y)*y^2) + A3*y + A2)*y^2)lo + (A1*y + A0)lo 3061 fma.s1 fRes7L = fRes4L, fXSqr, fRes7L 3062 nop.i 0 3063} 3064;; 3065{ .mfi 3066 nop.m 0 3067 fma.s1 fA25 = fA25, fX4, fA13 3068 nop.i 0 3069} 3070{ .mfi 3071 nop.m 0 3072 fma.s1 fA9 = fA9, fXSqr, fA7 3073 nop.i 0 3074} 3075;; 3076{ .mfi 3077 nop.m 0 3078 // h = N * log2_lo + h 3079 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h 3080 nop.i 0 3081} 3082{ .mfi 3083 nop.m 0 3084 fadd.s1 fhDelX = fhDelX, FR_h3 // h = (h_1 + h_2) + h_3 3085 nop.i 0 3086} 3087;; 3088{ .mfi 3089 nop.m 0 3090 fma.s1 fLnSin36 = fLnSin36, fDelX6, fLnSin20 3091 nop.i 0 3092} 3093{ .mfi 3094 nop.m 0 3095 fma.s1 fLnSin14 = fLnSin14, fDelX4, fLnSin10 3096 nop.i 0 3097} 3098;; 3099{ .mfi 3100 nop.m 0 3101 // poly_lo = r * Q4 + Q3 3102 fma.s1 fPolyLoDx = fRDx, FR_Q4, FR_Q3 3103 nop.i 0 3104} 3105{ .mfi 3106 nop.m 0 3107 fmpy.s1 fRDxSq = fRDx, fRDx // rsq = r * r 3108 nop.i 0 3109} 3110;; 3111{ .mfi 3112 nop.m 0 3113 // Y_hi = N * log2_hi + H 3114 fma.s1 fResLnDxH = fFloatNDx, FR_log2_hi, FR_H 3115 nop.i 0 3116} 3117{ .mfi 3118 nop.m 0 3119 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 3120 nop.i 0 3121} 3122;; 3123{ .mfi 3124 nop.m 0 3125 fma.s1 fA9 = fA25, fX4, fA9 3126 nop.i 0 3127} 3128{ .mfi 3129 nop.m 0 3130 fadd.s1 fPolL = fPolL, fRes7L 3131 nop.i 0 3132} 3133;; 3134{ .mfi 3135 nop.m 0 3136 fadd.s1 fLnSin4 = fLnSin4, fLnSin2L 3137 nop.i 0 3138} 3139{ .mfi 3140 nop.m 0 3141 // h = N * log2_lo + h 3142 fma.s1 fhDelX = fFloatNDx, FR_log2_lo, fhDelX 3143 nop.i 0 3144} 3145;; 3146{ .mfi 3147 nop.m 0 3148 fma.s1 fLnSin36 = fLnSin36, fDelX8, fLnSin14 3149 nop.i 0 3150} 3151{ .mfi 3152 nop.m 0 3153 // ((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)lo 3154 fma.s1 fLnSinL = fLnSin6, fDxSqrL, fLnSinL 3155 nop.i 0 3156} 3157;; 3158{ .mfi 3159 nop.m 0 3160 // poly_lo = poly_lo * r + Q2 3161 fma.s1 fPolyLoDx = fPolyLoDx, fRDx, FR_Q2 3162 nop.i 0 3163} 3164{ .mfi 3165 nop.m 0 3166 fma.s1 fRDxCub = fRDxSq, fRDx, f0 // rcub = r^3 3167 nop.i 0 3168} 3169;; 3170{ .mfi 3171 nop.m 0 3172 famax.s0 fRes5H = fPol, fResH 3173 nop.i 0 3174} 3175{ .mfi 3176 nop.m 0 3177 // High part of (lgammal(|x|) + log(|x|)) 3178 fadd.s1 fRes1H = fPol, fResH 3179 nop.i 0 3180} 3181;; 3182{ .mfi 3183 nop.m 0 3184 // poly_lo = poly_lo * r + Q2 3185 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 3186 nop.i 0 3187} 3188{ .mfi 3189 nop.m 0 3190 fma.s1 fPolL = fA9, fX6, fPolL // P25lo 3191 nop.i 0 3192} 3193;; 3194 3195{ .mfi 3196 nop.m 0 3197 famin.s0 fRes5L = fPol, fResH 3198 nop.i 0 3199} 3200{ .mfi 3201 nop.m 0 3202 // High part of -(LnSin + log(|DeltaX|)) 3203 fnma.s1 fRes2H = fResLnDxH, f1, fLnSinH 3204 nop.i 0 3205} 3206;; 3207 3208{ .mfi 3209 nop.m 0 3210 // (((LnSin6*deltaX^2 + LnSin4)*deltaX^2 + LnSin2)*DeltaX^2)lo 3211 fma.s1 fLnSinL = fLnSin4, fDxSqr, fLnSinL 3212 nop.i 0 3213} 3214{ .mfi 3215 nop.m 0 3216 fma.s1 fLnSin36 = fLnSin36, fDelX6, f0 3217 nop.i 0 3218} 3219;; 3220{ .mfi 3221 nop.m 0 3222 // poly_hi = Q1 * rsq + r 3223 fma.s1 fPolyHiDx = FR_Q1, fRDxSq, fRDx 3224 nop.i 0 3225} 3226{ .mfi 3227 nop.m 0 3228 // poly_lo = poly_lo*r^3 + h 3229 fma.s1 fPolyLoDx = fPolyLoDx, fRDxCub, fhDelX 3230 nop.i 0 3231} 3232;; 3233{ .mfi 3234 nop.m 0 3235 fsub.s1 fRes1L = fRes5H, fRes1H 3236 nop.i 0 3237} 3238{ .mfi 3239 nop.m 0 3240 // -(lgammal(|x|) + log(|x|))hi 3241 fnma.s1 fRes1H = fRes1H, f1, f0 3242 3243 nop.i 0 3244} 3245;; 3246{ .mfi 3247 nop.m 0 3248 // poly_hi = Q1 * rsq + r 3249 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 3250 nop.i 0 3251} 3252{ .mfi 3253 nop.m 0 3254 // poly_lo = poly_lo*r^3 + h 3255 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h 3256 nop.i 0 3257} 3258;; 3259{ .mfi 3260 nop.m 0 3261 fms.s1 fRes2L = fResLnDxH, fMOne, fRes2H 3262 nop.i 0 3263} 3264;; 3265{ .mfi 3266 nop.m 0 3267 fma.s1 fLnSinL = fLnSin36, fDxSqr, fLnSinL 3268 nop.i 0 3269} 3270{ .mfi 3271 nop.m 0 3272 // Y_lo = poly_hi + poly_lo 3273 fadd.s1 fResLnDxL = fPolyHiDx, fPolyLoDx 3274 nop.i 0 3275} 3276;; 3277{ .mfi 3278 nop.m 0 3279 fadd.s1 fRes1L = fRes1L, fRes5L 3280 nop.i 0 3281} 3282{ .mfi 3283 nop.m 0 3284 // high part of the final result 3285 fadd.s1 fYH = fRes2H, fRes1H 3286 nop.i 0 3287} 3288;; 3289{ .mfi 3290 nop.m 0 3291 // Y_lo = poly_hi + poly_lo 3292 fadd.s1 fResL = FR_poly_hi, FR_poly_lo 3293 nop.i 0 3294} 3295;; 3296{ .mfi 3297 nop.m 0 3298 famax.s0 fRes4H = fRes2H, fRes1H 3299 nop.i 0 3300} 3301;; 3302{ .mfi 3303 nop.m 0 3304 famin.s0 fRes4L = fRes2H, fRes1H 3305 nop.i 0 3306} 3307;; 3308{ .mfi 3309 nop.m 0 3310 // (LnSin)lo + (log(|DeltaX|))lo 3311 fsub.s1 fLnSinL = fLnSinL, fResLnDxL 3312 nop.i 0 3313} 3314{ .mfi 3315 nop.m 0 3316 fadd.s1 fRes2L = fRes2L, fLnSinH 3317 nop.i 0 3318} 3319;; 3320{ .mfi 3321 nop.m 0 3322 //(lgammal(|x|))lo + (log(|x|))lo 3323 fadd.s1 fPolL = fResL, fPolL 3324 nop.i 0 3325} 3326;; 3327{ .mfi 3328 nop.m 0 3329 fsub.s1 fYL = fRes4H, fYH 3330 nop.i 0 3331} 3332;; 3333{ .mfi 3334 nop.m 0 3335 // Low part of -(LnSin + log(|DeltaX|)) 3336 fadd.s1 fRes2L = fRes2L, fLnSinL 3337 nop.i 0 3338} 3339{ .mfi 3340 nop.m 0 3341 // High part of (lgammal(|x|) + log(|x|)) 3342 fadd.s1 fRes1L = fRes1L, fPolL 3343 nop.i 0 3344} 3345;; 3346{ .mfi 3347 nop.m 0 3348 fadd.s1 fYL = fYL, fRes4L 3349 nop.i 0 3350} 3351{ .mfi 3352 nop.m 0 3353 fsub.s1 fRes2L = fRes2L, fRes1L 3354 nop.i 0 3355} 3356;; 3357{ .mfi 3358 nop.m 0 3359 // low part of the final result 3360 fadd.s1 fYL = fYL, fRes2L 3361 nop.i 0 3362} 3363;; 3364{ .mfb 3365 nop.m 0 3366 // final result for -6.0 < x <= -0.75, non-integer, "far" from roots 3367 fma.s0 f8 = fYH, f1, fYL 3368 // exit here for -6.0 < x <= -0.75, non-integer, "far" from roots 3369 br.ret.sptk b0 3370} 3371;; 3372 3373// here if |x+1| < 2^(-7) 3374.align 32 3375_closeToNegOne: 3376{ .mfi 3377 getf.exp GR_N = fDx // Get N = exponent of x 3378 fmerge.se fAbsX = f1, fDx // Form |deltaX| 3379 // Get high 4 bits of significand of deltaX 3380 extr.u rIndex1Dx = rSignifDx, 59, 4 3381} 3382{ .mfi 3383 addl rPolDataPtr= @ltoff(lgammal_1pEps_data),gp 3384 fma.s1 fA0L = fDxSqr, fDxSqr, f0 // deltaX^4 3385 // sign of GAMMA is positive if p10 is set to 1 3386(p10) adds rSgnGam = 1, r0 3387} 3388;; 3389{ .mfi 3390 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1 3391 fnma.s1 fResL = fDx, f1, f0 // -(x+1) 3392 // Get high 15 bits of significand 3393 extr.u GR_X_0 = rSignifDx, 49, 15 3394} 3395{ .mfi 3396 ld8 rPolDataPtr = [rPolDataPtr] 3397 nop.f 0 3398 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1 3399} 3400;; 3401{ .mfi 3402 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1 3403 nop.f 0 3404 and GR_N = GR_N, r17Ones // mask sign bit 3405} 3406{ .mfi 3407 adds rTmpPtr = 8, GR_ad_tbl_1 3408 nop.f 0 3409 cmp.eq p6, p7 = 4, rSgnGamSize 3410} 3411;; 3412{ .mfi 3413 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 3414 nop.f 0 3415 adds rTmpPtr2 = 96, rPolDataPtr 3416} 3417{ .mfi 3418 ldfd FR_h = [rTmpPtr] // Load h_1 3419 nop.f 0 3420 // unbiased exponent of deltaX 3421 sub GR_N = GR_N, rExpHalf, 1 3422} 3423;; 3424{ .mfi 3425 adds rTmpPtr3 = 192, rPolDataPtr 3426 nop.f 0 3427 // sign of GAMMA is negative if p11 is set to 1 3428(p11) adds rSgnGam = -1, r0 3429} 3430{ .mfi 3431 ldfe fA1 = [rPolDataPtr], 16 // A1 3432 nop.f 0 3433 nop.i 0 3434} 3435;; 3436{.mfi 3437 ldfe fA2 = [rPolDataPtr], 16 // A2 3438 nop.f 0 3439 // Get bits 30-15 of X_0 * Z_1 3440 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 3441} 3442{ .mfi 3443 ldfpd fA20, fA19 = [rTmpPtr2], 16 // P8, P7 3444 nop.f 0 3445 nop.i 0 3446} 3447;; 3448// 3449// For performance, don't use result of pmpyshr2.u for 4 cycles. 3450// 3451{ .mfi 3452 ldfe fA3 = [rPolDataPtr], 16 // A3 3453 nop.f 0 3454 nop.i 0 3455} 3456{ .mfi 3457 ldfpd fA18, fA17 = [rTmpPtr2], 16 // P6, P5 3458 nop.f 0 3459 nop.i 0 3460} 3461;; 3462{ .mfi 3463 ldfe fA4 = [rPolDataPtr], 16 // A4 3464 nop.f 0 3465 nop.i 0 3466} 3467{ .mfi 3468 ldfpd fA16, fA15 = [rTmpPtr2], 16 // P4, p3 3469 nop.f 0 3470 nop.i 0 3471} 3472;; 3473{ .mfi 3474 ldfpd fA5L, fA6 = [rPolDataPtr], 16 // A5, A6 3475 nop.f 0 3476 nop.i 0 3477} 3478{ .mfi 3479 ldfpd fA14, fA13 = [rTmpPtr2], 16 // P2, P1 3480 nop.f 0 3481 nop.i 0 3482} 3483;; 3484{ .mfi 3485 ldfpd fA7, fA8 = [rPolDataPtr], 16 // A7, A8 3486 nop.f 0 3487 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 3488} 3489{ .mfi 3490 ldfe fLnSin2 = [rTmpPtr2], 16 3491 nop.f 0 3492 nop.i 0 3493} 3494;; 3495{ .mfi 3496 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 3497 nop.f 0 3498 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 3499} 3500{ .mfi 3501 ldfe fLnSin4 = [rTmpPtr2], 32 3502 nop.f 0 3503 nop.i 0 3504} 3505;; 3506{ .mfi 3507 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 3508 nop.f 0 3509 adds rTmpPtr = 8, GR_ad_tbl_2 3510} 3511{ .mfi 3512 // Put integer N into rightmost significand 3513 setf.sig fFloatN = GR_N 3514 nop.f 0 3515 nop.i 0 3516} 3517;; 3518{ .mfi 3519 ldfe fLnSin6 = [rTmpPtr3] 3520 nop.f 0 3521 nop.i 0 3522} 3523{ .mfi 3524 ldfe fLnSin8 = [rTmpPtr2] 3525 nop.f 0 3526 nop.i 0 3527} 3528;; 3529{ .mfi 3530 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2 3531 nop.f 0 3532 nop.i 0 3533} 3534{ .mfi 3535 ldfd FR_h2 = [rTmpPtr] // Load h_2 3536 nop.f 0 3537 nop.i 0 3538} 3539;; 3540{ .mfi 3541 // store signgam if size of variable is 4 bytes 3542(p6) st4 [rSgnGamAddr] = rSgnGam 3543 fma.s1 fResH = fA20, fResL, fA19 //polynomial for log(|x|) 3544 // Get bits 30-15 of X_1 * Z_2 3545 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 3546} 3547{ .mfi 3548 // store signgam if size of variable is 8 bytes 3549(p7) st8 [rSgnGamAddr] = rSgnGam 3550 fma.s1 fA2 = fA2, fDx, fA1 // polynomial for lgammal(|x|) 3551 nop.i 0 3552} 3553;; 3554// 3555// For performance, don't use result of pmpyshr2.u for 4 cycles. 3556// 3557{ .mfi 3558 nop.m 0 3559 fma.s1 fA18 = fA18, fResL, fA17 //polynomial for log(|x|) 3560 nop.i 0 3561} 3562;; 3563{ .mfi 3564 nop.m 0 3565 fma.s1 fA16 = fA16, fResL, fA15 //polynomial for log(|x|) 3566 nop.i 0 3567} 3568{ .mfi 3569 nop.m 0 3570 fma.s1 fA4 = fA4, fDx, fA3 // polynomial for lgammal(|x|) 3571 nop.i 0 3572} 3573;; 3574{ .mfi 3575 nop.m 0 3576 fma.s1 fA14 = fA14, fResL, fA13 //polynomial for log(|x|) 3577 nop.i 0 3578} 3579{ .mfi 3580 nop.m 0 3581 fma.s1 fA6 = fA6, fDx, fA5L // polynomial for lgammal(|x|) 3582 nop.i 0 3583} 3584;; 3585{ .mfi 3586 nop.m 0 3587 fma.s1 fPol = fA8, fDx, fA7 // polynomial for lgammal(|x|) 3588 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 3589} 3590;; 3591{ .mfi 3592 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 3593 // loqw part of lnsin polynomial 3594 fma.s1 fRes3L = fLnSin4, fDxSqr, fLnSin2 3595 nop.i 0 3596} 3597;; 3598{ .mfi 3599 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3 3600 fcvt.xf fFloatN = fFloatN // N as FP number 3601 nop.i 0 3602} 3603{ .mfi 3604 nop.m 0 3605 fma.s1 fResH = fResH, fDxSqr, fA18 // High part of log(|x|) 3606 nop.i 0 3607} 3608;; 3609{ .mfi 3610 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 3611 fma.s1 fA4 = fA4, fDxSqr, fA2 // Low part of lgammal(|x|) 3612 nop.i 0 3613} 3614{ .mfi 3615 nop.m 0 3616 // high part of lnsin polynomial 3617 fma.s1 fRes3H = fLnSin8, fDxSqr, fLnSin6 3618 nop.i 0 3619} 3620;; 3621{ .mfi 3622 nop.m 0 3623 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 3624 nop.i 0 3625} 3626{ .mfi 3627 nop.m 0 3628 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 3629 nop.i 0 3630} 3631;; 3632{ .mfi 3633 nop.m 0 3634 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 3635 nop.i 0 3636} 3637{ .mfi 3638 nop.m 0 3639 fma.s1 fA16 = fA16, fDxSqr, fA14 // Low part of log(|x|) 3640 nop.i 0 3641} 3642;; 3643{ .mfi 3644 nop.m 0 3645 fma.s1 fPol = fPol, fDxSqr, fA6 // High part of lgammal(|x|) 3646 nop.i 0 3647} 3648;; 3649{ .mfi 3650 nop.m 0 3651 fma.s1 fResH = fResH, fA0L, fA16 // log(|x|)/deltaX^2 - deltaX 3652 nop.i 0 3653} 3654;; 3655{ .mfi 3656 nop.m 0 3657 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 3658 nop.i 0 3659} 3660{ .mfi 3661 nop.m 0 3662 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 3663 nop.i 0 3664} 3665;; 3666{ .mfi 3667 nop.m 0 3668 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 3669 nop.i 0 3670} 3671;; 3672{ .mfi 3673 nop.m 0 3674 fma.s1 fResH = fResH, fDxSqr, fResL // log(|x|) 3675 nop.i 0 3676} 3677{ .mfi 3678 nop.m 0 3679 fma.s1 fPol = fPol, fA0L, fA4 // lgammal(|x|)/|x| 3680 nop.i 0 3681} 3682;; 3683{ .mfi 3684 nop.m 0 3685 fms.s1 FR_r = FR_G, fAbsX, f1 // r = G * S_hi - 1 3686 nop.i 0 3687} 3688{ .mfi 3689 nop.m 0 3690 // high part of log(deltaX)= Y_hi = N * log2_hi + H 3691 fma.s1 fRes4H = fFloatN, FR_log2_hi, FR_H 3692 nop.i 0 3693} 3694;; 3695{ .mfi 3696 nop.m 0 3697 // h = N * log2_lo + h 3698 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h 3699 nop.i 0 3700} 3701;; 3702{ .mfi 3703 nop.m 0 3704 fma.s1 fResH = fPol, fDx, fResH // lgammal(|x|) + log(|x|) 3705 nop.i 0 3706} 3707{ .mfi 3708 nop.m 0 3709 // lnsin/deltaX^2 3710 fma.s1 fRes3H = fRes3H, fA0L, fRes3L 3711 nop.i 0 3712} 3713;; 3714{ .mfi 3715 nop.m 0 3716 // poly_lo = r * Q4 + Q3 3717 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 3718 nop.i 0 3719} 3720{ .mfi 3721 nop.m 0 3722 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r 3723 nop.i 0 3724} 3725;; 3726{ .mfi 3727 nop.m 0 3728 // lnSin - log(|x|) - lgammal(|x|) 3729 fms.s1 fResH = fRes3H, fDxSqr, fResH 3730 nop.i 0 3731} 3732;; 3733 3734{ .mfi 3735 nop.m 0 3736 // poly_lo = poly_lo * r + Q2 3737 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 3738 nop.i 0 3739} 3740{ .mfi 3741 nop.m 0 3742 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 3743 nop.i 0 3744} 3745;; 3746 3747{ .mfi 3748 nop.m 0 3749 // poly_hi = Q1 * rsq + r 3750 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 3751 nop.i 0 3752} 3753;; 3754 3755{ .mfi 3756 nop.m 0 3757 // poly_lo = poly_lo*r^3 + h 3758 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h 3759 nop.i 0 3760} 3761;; 3762 3763{ .mfi 3764 nop.m 0 3765 // low part of log(|deltaX|) = Y_lo = poly_hi + poly_lo 3766 fadd.s1 fRes4L = FR_poly_hi, FR_poly_lo 3767 nop.i 0 3768} 3769;; 3770{ .mfi 3771 nop.m 0 3772 fsub.s1 fResH = fResH, fRes4L 3773 nop.i 0 3774} 3775;; 3776{ .mfb 3777 nop.m 0 3778 // final result for |x+1|< 2^(-7) path 3779 fsub.s0 f8 = fResH, fRes4H 3780 // exit for |x+1|< 2^(-7) path 3781 br.ret.sptk b0 3782} 3783;; 3784 3785 3786// here if -2^63 < x < -6.0 and x is not an integer 3787// Also we are going to filter out cases when x falls in 3788// range which is "close enough" to negative root. Rhis case 3789// may occur only for -19.5 < x since other roots of lgamma are 3790// insignificant from double extended point of view (they are closer 3791// to RTN(x) than one ulp(x). 3792.align 32 3793_negStirling: 3794{ .mfi 3795 ldfe fLnSin6 = [rLnSinDataPtr], 32 3796 fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration 3797 // Get high 4 bits of significand of deltaX 3798 extr.u rIndex1Dx = rSignifDx, 59, 4 3799} 3800{ .mfi 3801 ldfe fLnSin8 = [rTmpPtr3], 32 3802 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 3803(p12) cmp.ltu.unc p6, p0 = rSignifX, rLeftBound 3804} 3805;; 3806{ .mfi 3807 ldfe fLnSin10 = [rLnSinDataPtr], 32 3808 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 3809 // Get high 15 bits of significand 3810 extr.u GR_X_0 = rSignifDx, 49, 15 3811} 3812{ .mfi 3813 shladd GR_ad_z_1 = rIndex1Dx, 2, GR_ad_z_1 // Point to Z_1 3814 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 3815 // set p6 if x falls in "near root" range 3816(p6) cmp.geu.unc p6, p0 = rSignifX, rRightBound 3817} 3818;; 3819{ .mfi 3820 getf.exp GR_N = fDx // Get N = exponent of x 3821 fma.s1 fDx4 = fDxSqr, fDxSqr, f0 // deltaX^4 3822 adds rTmpPtr = 96, rBernulliPtr 3823} 3824{ .mfb 3825 ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1 3826 fma.s1 fLnSin34 = fLnSin34, fDxSqr, fLnSin32 3827 // branch to special path if x falls in "near root" range 3828(p6) br.cond.spnt _negRoots 3829} 3830;; 3831.pred.rel "mutex",p10,p11 3832{ .mfi 3833 ldfe fLnSin12 = [rTmpPtr3] 3834 fma.s1 fLnSin26 = fLnSin26, fDxSqr, fLnSin24 3835(p10) cmp.eq p8, p9 = rXRnd, r0 3836} 3837{ .mfi 3838 ldfe fLnSin14 = [rLnSinDataPtr] 3839 fma.s1 fLnSin30 = fLnSin30, fDxSqr, fLnSin28 3840(p11) cmp.eq p9, p8 = rXRnd, r0 3841} 3842;; 3843{ .mfi 3844 ldfpd fB2, fB2L = [rBernulliPtr], 16 3845 fma.s1 fLnSin18 = fLnSin18, fDxSqr, fLnSin16 3846 shladd GR_ad_tbl_1 = rIndex1Dx, 4, rTbl1Addr // Point to G_1 3847 3848} 3849{ .mfi 3850 ldfe fB14 = [rTmpPtr], 16 3851 fma.s1 fLnSin22 = fLnSin22, fDxSqr, fLnSin20 3852 and GR_N = GR_N, r17Ones // mask sign bit 3853} 3854;; 3855{ .mfi 3856 ldfe fB4 = [rBernulliPtr], 16 3857 fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration 3858 // Get bits 30-15 of X_0 * Z_1 3859 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 3860} 3861{ .mfi 3862 ldfe fB16 = [rTmpPtr], 16 3863 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 3864 adds rTmpPtr2 = 8, GR_ad_tbl_1 3865} 3866;; 3867// 3868// For performance, don't use result of pmpyshr2.u for 4 cycles. 3869// 3870{ .mfi 3871 ldfe fB6 = [rBernulliPtr], 16 3872 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1 3873 adds rTmpPtr3 = -48, rTmpPtr 3874} 3875{ .mfi 3876 ldfe fB18 = [rTmpPtr], 16 3877 // High part of the log(|x|) = Y_hi = N * log2_hi + H 3878 fma.s1 fResH = fFloatN, FR_log2_hi, FR_H 3879 sub GR_N = GR_N, rExpHalf, 1 // unbiased exponent of deltaX 3880} 3881;; 3882.pred.rel "mutex",p8,p9 3883{ .mfi 3884 ldfe fB8 = [rBernulliPtr], 16 3885 fma.s1 fLnSin36 = fLnSin36, fDx4, fLnSin34 3886 // sign of GAMMA(x) is negative 3887(p8) adds rSgnGam = -1, r0 3888} 3889{ .mfi 3890 ldfe fB20 = [rTmpPtr], -160 3891 fma.s1 fRes5H = fLnSin4, fDxSqr, f0 3892 // sign of GAMMA(x) is positive 3893(p9) adds rSgnGam = 1, r0 3894 3895} 3896;; 3897{ .mfi 3898 ldfe fB10 = [rBernulliPtr], 16 3899 fma.s1 fLnSin30 = fLnSin30, fDx4, fLnSin26 3900(p14) adds rTmpPtr = -160, rTmpPtr 3901} 3902{ .mfi 3903 ldfe fB12 = [rTmpPtr3], 16 3904 fma.s1 fDx8 = fDx4, fDx4, f0 // deltaX^8 3905 cmp.eq p6, p7 = 4, rSgnGamSize 3906} 3907;; 3908{ .mfi 3909 ldfps fGDx, fHDx = [GR_ad_tbl_1], 8 // Load G_1, H_1 3910 fma.s1 fDx6 = fDx4, fDxSqr, f0 // deltaX^6 3911 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 3912} 3913{ .mfi 3914 ldfd fhDx = [rTmpPtr2] // Load h_1 3915 fma.s1 fLnSin22 = fLnSin22, fDx4, fLnSin18 3916 nop.i 0 3917} 3918;; 3919{ .mfi 3920 // Load two parts of C 3921 ldfpd fRes1H, fRes1L = [rTmpPtr], 16 3922 fma.s1 fRcpX = fInvX, fInvX, f0 // (1/x)^2 3923 shladd GR_ad_tbl_2 = GR_Index2, 4, rTbl2Addr // Point to G_2 3924} 3925{ .mfi 3926 shladd GR_ad_z_2 = GR_Index2, 2, rZ2Addr // Point to Z_2 3927 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h// h = N * log2_lo + h 3928 nop.i 0 3929} 3930;; 3931{ .mfi 3932 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 3933 fnma.s1 fInvXL = f8, fInvX, f1 // relative error of 1/x 3934 nop.i 0 3935} 3936{ .mfi 3937 adds rTmpPtr2 = 8, GR_ad_tbl_2 3938 fma.s1 fLnSin8 = fLnSin8, fDxSqr, fLnSin6 3939 nop.i 0 3940} 3941;; 3942{ .mfi 3943 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2 3944 // poly_lo = r * Q4 + Q3 3945 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 3946 nop.i 0 3947} 3948{ .mfi 3949 ldfd fh2Dx = [rTmpPtr2] // Load h_2 3950 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r 3951 nop.i 0 3952} 3953;; 3954{ .mfi 3955 nop.m 0 3956 fma.s1 fA1L = fB2, fInvX, f0 // (B2*(1/x))hi 3957 nop.i 0 3958} 3959{ .mfi 3960 // Put integer N into rightmost significand 3961 setf.sig fFloatNDx = GR_N 3962 fms.s1 fRes4H = fResH, f1, f1 // ln(|x|)hi - 1 3963 nop.i 0 3964} 3965;; 3966{ .mfi 3967 nop.m 0 3968 fadd.s1 fRes2H = fRes5H, fLnSin2//(lnSin4*DeltaX^2 + lnSin2)hi 3969 // Get bits 30-15 of X_1 * Z_2 3970 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 3971} 3972{ .mfi 3973 nop.m 0 3974 fms.s1 fRes5L = fLnSin4, fDxSqr, fRes5H 3975 nop.i 0 3976} 3977;; 3978// 3979// For performance, don't use result of pmpyshr2.u for 4 cycles. 3980// 3981{ .mfi 3982 nop.m 0 3983 fma.s1 fInvX4 = fRcpX, fRcpX, f0 // (1/x)^4 3984 nop.i 0 3985} 3986{ .mfi 3987 nop.m 0 3988 fma.s1 fB6 = fB6, fRcpX, fB4 3989 nop.i 0 3990} 3991;; 3992{ .mfi 3993 // store signgam if size of variable is 4 bytes 3994(p6) st4 [rSgnGamAddr] = rSgnGam 3995 fma.s1 fB18 = fB18, fRcpX, fB16 3996 nop.i 0 3997} 3998{ .mfi 3999 // store signgam if size of variable is 8 bytes 4000(p7) st8 [rSgnGamAddr] = rSgnGam 4001 fma.s1 fInvXL = fInvXL, fInvX, f0 // low part of 1/x 4002 nop.i 0 4003} 4004;; 4005{ .mfi 4006 nop.m 0 4007 // poly_lo = poly_lo * r + Q2 4008 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 4009 nop.i 0 4010} 4011{ .mfi 4012 nop.m 0 4013 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 4014 nop.i 0 4015} 4016;; 4017{ .mfi 4018 nop.m 0 4019 fma.s1 fRes3H = fRes4H, f8, f0 // (-|x|*(ln(|x|)-1))hi 4020 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 4021} 4022{ .mfi 4023 nop.m 0 4024 // poly_hi = Q1 * rsq + r 4025 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 4026 nop.i 0 4027} 4028;; 4029{ .mfi 4030 shladd GR_ad_tbl_3 = GR_Index3, 4, rTbl3Addr // Point to G_3 4031 fms.s1 fA2L = fB2, fInvX, fA1L // delta(B2*(1/x)) 4032 nop.i 0 4033} 4034{ .mfi 4035 nop.m 0 4036 fnma.s1 fBrnH = fRes1H, f1, fA1L // (-C - S(1/x))hi 4037 nop.i 0 4038} 4039;; 4040{ .mfi 4041 ldfps fG3Dx, fH3Dx = [GR_ad_tbl_3],8 // Load G_3, H_3 4042 fma.s1 fInvX8 = fInvX4, fInvX4, f0 // (1/x)^8 4043 nop.i 0 4044} 4045{ .mfi 4046 nop.m 0 4047 fma.s1 fB10 = fB10, fRcpX, fB8 4048 nop.i 0 4049} 4050;; 4051 4052{ .mfi 4053 ldfd fh3Dx = [GR_ad_tbl_3] // Load h_3 4054 fma.s1 fB20 = fB20, fInvX4, fB18 4055 nop.i 0 4056} 4057{ .mfi 4058 nop.m 0 4059 fma.s1 fB14 = fB14, fRcpX, fB12 4060 nop.i 0 4061} 4062;; 4063{ .mfi 4064 nop.m 0 4065 fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin30 4066 nop.i 0 4067} 4068{ .mfi 4069 nop.m 0 4070 fma.s1 fLnSin12 = fLnSin12, fDxSqr, fLnSin10 4071 nop.i 0 4072} 4073;; 4074{ .mfi 4075 nop.m 0 4076 fsub.s1 fRes2L = fLnSin2, fRes2H 4077 nop.i 0 4078} 4079{ .mfi 4080 nop.m 0 4081 fma.s1 fPol = fRes2H, fDxSqr, f0 // high part of LnSin 4082 nop.i 0 4083} 4084;; 4085{ .mfi 4086 nop.m 0 4087 fnma.s1 fResH = fResH, FR_MHalf, fResH // -0.5*ln(|x|)hi 4088 nop.i 0 4089} 4090{ .mfi 4091 nop.m 0 4092 fmpy.s1 fGDx = fGDx, FR_G2 // G = G_1 * G_2 4093 nop.i 0 4094} 4095;; 4096{ .mfi 4097 nop.m 0 4098 // poly_lo = poly_lo*r^3 + h 4099 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h 4100 nop.i 0 4101} 4102{ .mfi 4103 nop.m 0 4104 // B2lo*(1/x)hi+ delta(B2*(1/x)) 4105 fma.s1 fA2L = fB2L, fInvX, fA2L 4106 nop.i 0 4107} 4108;; 4109{ .mfi 4110 nop.m 0 4111 fma.s1 fB20 = fB20, fInvX4, fB14 4112 nop.i 0 4113} 4114{ .mfi 4115 nop.m 0 4116 fma.s1 fB10 = fB10, fInvX4, fB6 4117 nop.i 0 4118} 4119;; 4120{ .mfi 4121 nop.m 0 4122 fcvt.xf fFloatNDx = fFloatNDx 4123 nop.i 0 4124} 4125{ .mfi 4126 nop.m 0 4127 fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin12 4128 nop.i 0 4129} 4130;; 4131{ .mfi 4132 nop.m 0 4133 fma.s1 fLnSin36 = fLnSin36, fDx8, fLnSin22 4134 nop.i 0 4135} 4136{ .mfi 4137 nop.m 0 4138 fms.s1 fRes3L = fRes4H, f8, fRes3H // delta(-|x|*(ln(|x|)-1)) 4139 nop.i 0 4140} 4141;; 4142{ .mfi 4143 nop.m 0 4144 fmpy.s1 fGDx = fGDx, fG3Dx // G = (G_1 * G_2) * G_3 4145 nop.i 0 4146} 4147{ .mfi 4148 nop.m 0 4149 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))hi 4150 fadd.s1 fRes4H = fRes3H, fResH 4151 nop.i 0 4152} 4153;; 4154{ .mfi 4155 nop.m 0 4156 fma.s1 fA2L = fInvXL, fB2, fA2L //(B2*(1/x))lo 4157 nop.i 0 4158} 4159{ .mfi 4160 nop.m 0 4161 // low part of log(|x|) = Y_lo = poly_hi + poly_lo 4162 fadd.s1 fResL = FR_poly_hi, FR_poly_lo 4163 nop.i 0 4164} 4165;; 4166{ .mfi 4167 nop.m 0 4168 fma.s1 fB20 = fB20, fInvX8, fB10 4169 nop.i 0 4170} 4171{ .mfi 4172 nop.m 0 4173 fma.s1 fInvX3 = fInvX, fRcpX, f0 // (1/x)^3 4174 nop.i 0 4175} 4176;; 4177{ .mfi 4178 nop.m 0 4179 fadd.s1 fHDx = fHDx, FR_H2 // H = H_1 + H_2 4180 nop.i 0 4181} 4182{ .mfi 4183 nop.m 0 4184 fadd.s1 fRes5L = fRes5L, fLnSin2L 4185 nop.i 0 4186} 4187;; 4188{ .mfi 4189 nop.m 0 4190 fadd.s1 fRes2L = fRes2L, fRes5H 4191 nop.i 0 4192} 4193{ .mfi 4194 nop.m 0 4195 fadd.s1 fhDx = fhDx, fh2Dx // h = h_1 + h_2 4196 nop.i 0 4197} 4198;; 4199{ .mfi 4200 nop.m 0 4201 fms.s1 fBrnL = fRes1H, fMOne, fBrnH 4202 nop.i 0 4203} 4204{ .mfi 4205 nop.m 0 4206 fms.s1 FR_r = fGDx, fNormDx, f1 // r = G * S_hi - 1 4207 nop.i 0 4208} 4209;; 4210{ .mfi 4211 nop.m 0 4212 fma.s1 fRes3L = fResL, f8 , fRes3L // (-|x|*(ln(|x|)-1))lo 4213 nop.i 0 4214} 4215{ .mfi 4216 nop.m 0 4217 fsub.s1 fRes4L = fRes3H, fRes4H 4218 nop.i 0 4219} 4220;; 4221{ .mfi 4222 nop.m 0 4223 // low part of "Bernulli" polynomial 4224 fma.s1 fB20 = fB20, fInvX3, fA2L 4225 nop.i 0 4226} 4227{ .mfi 4228 nop.m 0 4229 fnma.s1 fResL = fResL, FR_MHalf, fResL // -0.5*ln(|x|)lo 4230 nop.i 0 4231} 4232;; 4233{ .mfi 4234 nop.m 0 4235 fadd.s1 fHDx = fHDx, fH3Dx // H = (H_1 + H_2) + H_3 4236 nop.i 0 4237} 4238{ .mfi 4239 nop.m 0 4240 fms.s1 fPolL = fRes2H, fDxSqr, fPol 4241 nop.i 0 4242} 4243;; 4244{ .mfi 4245 nop.m 0 4246 fadd.s1 fhDx = fhDx, fh3Dx // h = (h_1 + h_2) + h_3 4247 nop.i 0 4248} 4249{ .mfi 4250 nop.m 0 4251 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))hi 4252 fadd.s1 fB14 = fRes4H, fBrnH 4253 nop.i 0 4254} 4255;; 4256{ .mfi 4257 nop.m 0 4258 // poly_lo = r * Q4 + Q3 4259 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 4260 nop.i 0 4261} 4262{ .mfi 4263 nop.m 0 4264 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r 4265 nop.i 0 4266} 4267;; 4268{ .mfi 4269 nop.m 0 4270 fadd.s1 fRes4L = fRes4L, fResH 4271 nop.i 0 4272} 4273{ .mfi 4274 nop.m 0 4275 fadd.s1 fBrnL = fBrnL, fA1L 4276 nop.i 0 4277} 4278;; 4279{ .mfi 4280 nop.m 0 4281 // (-|x|*(ln(|x|)-1))lo + (-0.5ln(|x|))lo 4282 fadd.s1 fRes3L = fRes3L, fResL 4283 nop.i 0 4284} 4285{ .mfi 4286 nop.m 0 4287 fnma.s1 fB20 = fRes1L, f1, fB20 // -Clo - S(1/x)lo 4288 nop.i 0 4289} 4290;; 4291{ .mfi 4292 nop.m 0 4293 fadd.s1 fRes2L = fRes2L, fRes5L // (lnSin4*DeltaX^2 + lnSin2)lo 4294 nop.i 0 4295} 4296{ .mfi 4297 nop.m 0 4298 fma.s1 fPolL = fDxSqrL, fRes2H, fPolL 4299 nop.i 0 4300} 4301;; 4302{ .mfi 4303 nop.m 0 4304 fma.s1 fLnSin14 = fLnSin14, fDx4, fLnSin8 4305 nop.i 0 4306} 4307{ .mfi 4308 nop.m 0 4309 fma.s1 fLnSin36 = fLnSin36, fDx8, f0 4310 nop.i 0 4311} 4312;; 4313{ .mfi 4314 nop.m 0 4315 // poly_lo = poly_lo * r + Q2 4316 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 4317 nop.i 0 4318} 4319{ .mfi 4320 nop.m 0 4321 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 4322 nop.i 0 4323} 4324;; 4325{ .mfi 4326 nop.m 0 4327 // poly_hi = Q1 * rsq + r 4328 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 4329 nop.i 0 4330} 4331{ .mfi 4332 nop.m 0 4333 fsub.s1 fB12 = fRes4H, fB14 4334 nop.i 0 4335} 4336;; 4337{ .mfi 4338 nop.m 0 4339 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo 4340 fadd.s1 fRes4L = fRes4L, fRes3L 4341 nop.i 0 4342} 4343{ .mfi 4344 nop.m 0 4345 fadd.s1 fBrnL = fBrnL, fB20 // (-C - S(1/x))lo 4346 nop.i 0 4347} 4348;; 4349{ .mfi 4350 nop.m 0 4351 // high part of log(|DeltaX|) = Y_hi = N * log2_hi + H 4352 fma.s1 fLnDeltaH = fFloatNDx, FR_log2_hi, fHDx 4353 nop.i 0 4354} 4355{ .mfi 4356 nop.m 0 4357 // h = N * log2_lo + h 4358 fma.s1 fhDx = fFloatNDx, FR_log2_lo, fhDx 4359 nop.i 0 4360} 4361;; 4362{ .mfi 4363 nop.m 0 4364 fma.s1 fPolL = fRes2L, fDxSqr, fPolL 4365 nop.i 0 4366} 4367{ .mfi 4368 nop.m 0 4369 fma.s1 fLnSin14 = fLnSin36, fDxSqr, fLnSin14 4370 nop.i 0 4371} 4372;; 4373{ .mfi 4374 nop.m 0 4375 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|))lo + (- C - S(1/x))lo 4376 fadd.s1 fBrnL = fBrnL, fRes4L 4377 nop.i 0 4378} 4379{ .mfi 4380 nop.m 0 4381 fadd.s1 fB12 = fB12, fBrnH 4382 nop.i 0 4383} 4384;; 4385{ .mfi 4386 nop.m 0 4387 // poly_lo = poly_lo*r^3 + h 4388 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, fhDx 4389 nop.i 0 4390} 4391{ .mfi 4392 nop.m 0 4393 fnma.s1 fRes1H = fLnDeltaH, f1, fPol//(-ln(|DeltaX|) + LnSin)hi 4394 nop.i 0 4395} 4396;; 4397{ .mfi 4398 nop.m 0 4399 fma.s1 fPolL = fDxSqrL, fRes2L, fPolL 4400 nop.i 0 4401} 4402{ .mfi 4403 nop.m 0 4404 fma.s1 fLnSin36 = fLnSin14, fDx6, f0 4405 nop.i 0 4406} 4407;; 4408{ .mfi 4409 nop.m 0 4410 // (-|x|*(ln(|x|)-1) - 0.5ln(|x|) - C - S(1/x))lo 4411 fadd.s1 fB12 = fB12, fBrnL 4412 nop.i 0 4413} 4414;; 4415{ .mfi 4416 nop.m 0 4417 // low part of log(|DeltaX|) = Y_lo = poly_hi + poly_lo 4418 fadd.s1 fLnDeltaL= FR_poly_hi, FR_poly_lo 4419 nop.i 0 4420} 4421{ .mfi 4422 nop.m 0 4423 fms.s1 fRes1L = fLnDeltaH, fMOne, fRes1H 4424 nop.i 0 4425} 4426;; 4427{ .mfi 4428 nop.m 0 4429 fadd.s1 fPolL = fPolL, fLnSin36 4430 nop.i 0 4431} 4432{ .mfi 4433 nop.m 0 4434 //(-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi + (-ln(|DeltaX|) + LnSin)hi 4435 fadd.s1 f8 = fRes1H, fB14 4436 nop.i 0 4437} 4438;; 4439{ .mfi 4440 nop.m 0 4441 //max((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi, 4442 // (-ln(|DeltaX|) + LnSin)hi) 4443 famax.s1 fMaxNegStir = fRes1H, fB14 4444 nop.i 0 4445} 4446{ .mfi 4447 nop.m 0 4448 //min((-|x|*(ln(|x|)-1)-0.5ln(|x|) - C - S(1/x))hi, 4449 // (-ln(|DeltaX|) + LnSin)hi) 4450 famin.s1 fMinNegStir = fRes1H, fB14 4451 nop.i 0 4452} 4453;; 4454{ .mfi 4455 nop.m 0 4456 fadd.s1 fRes1L = fRes1L, fPol 4457 nop.i 0 4458} 4459{ .mfi 4460 nop.m 0 4461 // (-ln(|DeltaX|))lo + (LnSin)lo 4462 fnma.s1 fPolL = fLnDeltaL, f1, fPolL 4463 nop.i 0 4464} 4465;; 4466{ .mfi 4467 nop.m 0 4468 fsub.s1 f9 = fMaxNegStir, f8 // delta1 4469 nop.i 0 4470} 4471;; 4472{ .mfi 4473 nop.m 0 4474 fadd.s1 fRes1L = fRes1L, fPolL // (-ln(|DeltaX|) + LnSin)lo 4475 nop.i 0 4476} 4477;; 4478{ .mfi 4479 nop.m 0 4480 fadd.s1 f9 = f9, fMinNegStir 4481 nop.i 0 4482} 4483;; 4484{ .mfi 4485 nop.m 0 4486 fadd.s1 fRes1L = fRes1L, fB12 4487 nop.i 0 4488} 4489;; 4490{ .mfi 4491 // low part of the result 4492 fadd.s1 f9 = f9, fRes1L 4493 nop.i 0 4494} 4495;; 4496{ .mfb 4497 nop.m 0 4498 // final result for -2^63 < x < -6.0 path 4499 fma.s0 f8 = f8, f1, f9 4500 // exit here for -2^63 < x < -6.0 path 4501 br.ret.sptk b0 4502} 4503;; 4504 4505// here if x falls in neighbourhood of any negative root 4506// "neighbourhood" typically means that |lgammal(x)| < 0.17 4507// on the [-3.0,-2.0] range |lgammal(x)| has even less 4508// magnitude 4509// rXint contains index of the root 4510// p10 is set if root belongs to "right" ones 4511// p11 is set if root belongs to "left" ones 4512// lgammal(x) is approximated by polynomial of 4513// 19th degree from (x - root) argument 4514.align 32 4515_negRoots: 4516{ .mfi 4517 addl rPolDataPtr= @ltoff(lgammal_right_roots_polynomial_data),gp 4518 nop.f 0 4519 shl rTmpPtr2 = rXint, 7 // (i*16)*8 4520} 4521{ .mfi 4522 adds rRootsAddr = -288, rRootsBndAddr 4523 nop.f 0 4524 nop.i 0 4525} 4526;; 4527{ .mfi 4528 ldfe fRoot = [rRootsAddr] // FP representation of root 4529 nop.f 0 4530 shl rTmpPtr = rXint, 6 // (i*16)*4 4531} 4532{ .mfi 4533(p11) adds rTmpPtr2 = 3536, rTmpPtr2 4534 nop.f 0 4535 nop.i 0 4536} 4537;; 4538{ .mfi 4539 ld8 rPolDataPtr = [rPolDataPtr] 4540 nop.f 0 4541 shladd rTmpPtr = rXint, 4, rTmpPtr // (i*16) + (i*16)*4 4542} 4543{ .mfi 4544 adds rTmpPtr3 = 32, rTmpPtr2 4545 nop.f 0 4546 nop.i 0 4547} 4548;; 4549.pred.rel "mutex",p10,p11 4550{ .mfi 4551 add rTmpPtr3 = rTmpPtr, rTmpPtr3 4552 nop.f 0 4553(p10) cmp.eq p8, p9 = rXRnd, r0 4554} 4555{ .mfi 4556 // (i*16) + (i*16)*4 + (i*16)*8 4557 add rTmpPtr = rTmpPtr, rTmpPtr2 4558 nop.f 0 4559(p11) cmp.eq p9, p8 = rXRnd, r0 4560} 4561;; 4562{ .mfi 4563 add rTmpPtr2 = rPolDataPtr, rTmpPtr3 4564 nop.f 0 4565 nop.i 0 4566} 4567{ .mfi 4568 add rPolDataPtr = rPolDataPtr, rTmpPtr // begin + offsett 4569 nop.f 0 4570 nop.i 0 4571} 4572;; 4573{ .mfi 4574 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0 4575 nop.f 0 4576 adds rTmpPtr = 112, rTmpPtr2 4577} 4578{ .mfi 4579 ldfpd fA2, fA2L = [rTmpPtr2], 16 // A2 4580 nop.f 0 4581 cmp.eq p12, p13 = 4, rSgnGamSize 4582} 4583;; 4584{ .mfi 4585 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 4586 nop.f 0 4587 nop.i 0 4588} 4589{ .mfi 4590 ldfe fA3 = [rTmpPtr2], 128 // A4 4591 nop.f 0 4592 nop.i 0 4593} 4594;; 4595{ .mfi 4596 ldfpd fA12, fA13 = [rTmpPtr], 16 // A12, A13 4597 nop.f 0 4598 adds rTmpPtr3 = 64, rPolDataPtr 4599} 4600{ .mfi 4601 ldfpd fA16, fA17 = [rTmpPtr2], 16 // A16, A17 4602 nop.f 0 4603 adds rPolDataPtr = 32, rPolDataPtr 4604} 4605;; 4606.pred.rel "mutex",p8,p9 4607{ .mfi 4608 ldfpd fA14, fA15 = [rTmpPtr], 16 // A14, A15 4609 nop.f 0 4610 // sign of GAMMA(x) is negative 4611(p8) adds rSgnGam = -1, r0 4612} 4613{ .mfi 4614 ldfpd fA18, fA19 = [rTmpPtr2], 16 // A18, A19 4615 nop.f 0 4616 // sign of GAMMA(x) is positive 4617(p9) adds rSgnGam = 1, r0 4618} 4619;; 4620{ .mfi 4621 ldfe fA4 = [rPolDataPtr], 16 // A4 4622 nop.f 0 4623 nop.i 0 4624} 4625{ .mfi 4626 ldfpd fA6, fA7 = [rTmpPtr3], 16 // A6, A7 4627 nop.f 0 4628 nop.i 0 4629} 4630;; 4631{ .mfi 4632 ldfe fA5 = [rPolDataPtr], 16 // A5 4633 // if x equals to (rounded) root exactly 4634 fcmp.eq.s1 p6, p0 = f8, fRoot 4635 nop.i 0 4636} 4637{ .mfi 4638 ldfpd fA8, fA9 = [rTmpPtr3], 16 // A8, A9 4639 fms.s1 FR_FracX = f8, f1, fRoot 4640 nop.i 0 4641} 4642;; 4643{ .mfi 4644 // store signgam if size of variable is 4 bytes 4645(p12) st4 [rSgnGamAddr] = rSgnGam 4646 nop.f 0 4647 nop.i 0 4648} 4649{ .mfb 4650 // store signgam if size of variable is 8 bytes 4651(p13) st8 [rSgnGamAddr] = rSgnGam 4652 // answer if x equals to (rounded) root exactly 4653(p6) fadd.s0 f8 = fA0, fA0L 4654 // exit if x equals to (rounded) root exactly 4655(p6) br.ret.spnt b0 4656} 4657;; 4658{ .mmf 4659 ldfpd fA10, fA11 = [rTmpPtr3], 16 // A10, A11 4660 nop.m 0 4661 nop.f 0 4662} 4663;; 4664{ .mfi 4665 nop.m 0 4666 fma.s1 fResH = fA2, FR_FracX, f0 // (A2*x)hi 4667 nop.i 0 4668} 4669{ .mfi 4670 nop.m 0 4671 fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2 4672 nop.i 0 4673} 4674;; 4675{ .mfi 4676 nop.m 0 4677 fma.s1 fA17 = fA17, FR_FracX, fA16 4678 nop.i 0 4679} 4680{.mfi 4681 nop.m 0 4682 fma.s1 fA13 = fA13, FR_FracX, fA12 4683 nop.i 0 4684} 4685;; 4686{ .mfi 4687 nop.m 0 4688 fma.s1 fA19 = fA19, FR_FracX, fA18 4689 nop.i 0 4690} 4691{.mfi 4692 nop.m 0 4693 fma.s1 fA15 = fA15, FR_FracX, fA14 4694 nop.i 0 4695} 4696;; 4697{.mfi 4698 nop.m 0 4699 fma.s1 fPol = fA7, FR_FracX, fA6 4700 nop.i 0 4701} 4702;; 4703{.mfi 4704 nop.m 0 4705 fma.s1 fA9 = fA9, FR_FracX, fA8 4706 nop.i 0 4707} 4708;; 4709{ .mfi 4710 nop.m 0 4711 fms.s1 fResL = fA2, FR_FracX, fResH // delta(A2*x) 4712 nop.i 0 4713} 4714{.mfi 4715 nop.m 0 4716 fadd.s1 fRes1H = fResH, fA1 // (A2*x + A1)hi 4717 nop.i 0 4718} 4719;; 4720{ .mfi 4721 nop.m 0 4722 fma.s1 fA11 = fA11, FR_FracX, fA10 4723 nop.i 0 4724} 4725{.mfi 4726 nop.m 0 4727 fma.s1 fA5L = fA4L, fA4L, f0 // x^4 4728 nop.i 0 4729} 4730;; 4731{ .mfi 4732 nop.m 0 4733 fma.s1 fA19 = fA19, fA4L, fA17 4734 nop.i 0 4735} 4736{.mfi 4737 nop.m 0 4738 fma.s1 fA15 = fA15, fA4L, fA13 4739 nop.i 0 4740} 4741;; 4742{ .mfi 4743 nop.m 0 4744 fma.s1 fPol = fPol, FR_FracX, fA5 4745 nop.i 0 4746} 4747{.mfi 4748 nop.m 0 4749 fma.s1 fA3L = fA4L, FR_FracX, f0 // x^3 4750 nop.i 0 4751} 4752;; 4753{ .mfi 4754 nop.m 0 4755 // delta(A2*x) + A2L*x = (A2*x)lo 4756 fma.s1 fResL = fA2L, FR_FracX, fResL 4757 nop.i 0 4758} 4759{.mfi 4760 nop.m 0 4761 fsub.s1 fRes1L = fA1, fRes1H 4762 nop.i 0 4763} 4764;; 4765{ .mfi 4766 nop.m 0 4767 fma.s1 fA11 = fA11, fA4L, fA9 4768 nop.i 0 4769} 4770{.mfi 4771 nop.m 0 4772 fma.s1 fA19 = fA19, fA5L, fA15 4773 nop.i 0 4774} 4775;; 4776{.mfi 4777 nop.m 0 4778 fma.s1 fPol = fPol, FR_FracX, fA4 4779 nop.i 0 4780} 4781;; 4782{ .mfi 4783 nop.m 0 4784 fadd.s1 fResL = fResL, fA1L // (A2*x)lo + A1 4785 nop.i 0 4786} 4787{.mfi 4788 nop.m 0 4789 fadd.s1 fRes1L = fRes1L, fResH 4790 nop.i 0 4791} 4792;; 4793{ .mfi 4794 nop.m 0 4795 fma.s1 fRes2H = fRes1H, FR_FracX, f0 // ((A2*x + A1)*x)hi 4796 nop.i 0 4797} 4798;; 4799{.mfi 4800 nop.m 0 4801 fma.s1 fA19 = fA19, fA5L, fA11 4802 nop.i 0 4803} 4804;; 4805{.mfi 4806 nop.m 0 4807 fma.s1 fPol = fPol, FR_FracX, fA3 4808 nop.i 0 4809} 4810;; 4811{ .mfi 4812 nop.m 0 4813 fadd.s1 fRes1L = fRes1L, fResL // (A2*x + A1)lo 4814 nop.i 0 4815} 4816;; 4817{ .mfi 4818 nop.m 0 4819 // delta((A2*x + A1)*x) 4820 fms.s1 fRes2L = fRes1H, FR_FracX, fRes2H 4821 nop.i 0 4822} 4823{.mfi 4824 nop.m 0 4825 fadd.s1 fRes3H = fRes2H, fA0 // ((A2*x + A1)*x + A0)hi 4826 nop.i 0 4827} 4828;; 4829{ .mfi 4830 nop.m 0 4831 fma.s1 fA19 = fA19, fA5L, f0 4832 nop.i 0 4833} 4834 4835;; 4836{ .mfi 4837 nop.m 0 4838 fma.s1 fRes2L = fRes1L, FR_FracX, fRes2L // ((A2*x + A1)*x)lo 4839 nop.i 0 4840} 4841{.mfi 4842 nop.m 0 4843 fsub.s1 fRes3L = fRes2H, fRes3H 4844 nop.i 0 4845} 4846;; 4847{.mfi 4848 nop.m 0 4849 fma.s1 fPol = fA19, FR_FracX, fPol 4850 nop.i 0 4851} 4852;; 4853{ .mfi 4854 nop.m 0 4855 fadd.s1 fRes3L = fRes3L, fA0 4856 nop.i 0 4857} 4858{.mfi 4859 nop.m 0 4860 fadd.s1 fRes2L = fRes2L, fA0L // ((A2*x + A1)*x)lo + A0L 4861 nop.i 0 4862} 4863;; 4864{ .mfi 4865 nop.m 0 4866 fadd.s1 fRes3L = fRes3L, fRes2L // (((A2*x + A1)*x) + A0)lo 4867 nop.i 0 4868} 4869;; 4870{.mfi 4871 nop.m 0 4872 fma.s1 fRes3L = fPol, fA3L, fRes3L 4873 nop.i 0 4874} 4875;; 4876{ .mfb 4877 nop.m 0 4878 // final result for arguments which are close to negative roots 4879 fma.s0 f8 = fRes3H, f1, fRes3L 4880 // exit here for arguments which are close to negative roots 4881 br.ret.sptk b0 4882} 4883;; 4884 4885// here if |x| < 0.5 4886.align 32 4887lgammal_0_half: 4888{ .mfi 4889 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1 4890 fma.s1 fA4L = f8, f8, f0 // x^2 4891 addl rPolDataPtr = @ltoff(lgammal_0_Half_data), gp 4892} 4893{ .mfi 4894 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1 4895 nop.f 0 4896 addl rLnSinDataPtr = @ltoff(lgammal_lnsin_data), gp 4897} 4898;; 4899{ .mfi 4900 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 4901 nop.f 0 4902 // Point to Constants_Z_2 4903 add GR_ad_z_2 = 0x140, GR_ad_z_1 4904} 4905{ .mfi 4906 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q 4907 nop.f 0 4908 // Point to Constants_G_H_h2 4909 add GR_ad_tbl_2 = 0x180, GR_ad_z_1 4910} 4911;; 4912{ .mfi 4913 ld8 rPolDataPtr = [rPolDataPtr] 4914 nop.f 0 4915 // Point to Constants_G_H_h3 4916 add GR_ad_tbl_3 = 0x280, GR_ad_z_1 4917} 4918{ .mfi 4919 ldfd FR_h = [GR_ad_tbl_1] // Load h_1 4920 nop.f 0 4921 sub GR_N = rExpX, rExpHalf, 1 4922} 4923;; 4924{ .mfi 4925 ld8 rLnSinDataPtr = [rLnSinDataPtr] 4926 nop.f 0 4927 // Get bits 30-15 of X_0 * Z_1 4928 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 4929} 4930{ .mfi 4931 ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi 4932 nop.f 0 4933 sub GR_N = r0, GR_N 4934} 4935;; 4936// 4937// For performance, don't use result of pmpyshr2.u for 4 cycles. 4938// 4939{ .mfi 4940 ldfe FR_log2_lo = [GR_ad_q], 16 // Load log2_lo 4941 nop.f 0 4942 add rTmpPtr2 = 320, rPolDataPtr 4943} 4944{ .mfi 4945 add rTmpPtr = 32, rPolDataPtr 4946 nop.f 0 4947 // exponent of 0.25 4948 adds rExp2 = -1, rExpHalf 4949} 4950;; 4951{ .mfi 4952 ldfpd fA3, fA3L = [rPolDataPtr], 16 // A3 4953 fma.s1 fA5L = fA4L, fA4L, f0 // x^4 4954 nop.i 0 4955} 4956{ .mfi 4957 ldfpd fA1, fA1L = [rTmpPtr], 16 // A1 4958 fms.s1 fB8 = f8, f8, fA4L // x^2 - <x^2> 4959 // set p6 if -0.5 < x <= -0.25 4960(p15) cmp.eq.unc p6, p0 = rExpX, rExp2 4961} 4962;; 4963{ .mfi 4964 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2 4965 nop.f 0 4966 // set p6 if -0.5 < x <= -0.40625 4967(p6) cmp.le.unc p6, p0 = 10, GR_Index1 4968} 4969{ .mfi 4970 ldfe fA21 = [rTmpPtr2], -16 // A21 4971 // Put integer N into rightmost significand 4972 nop.f 0 4973 adds rTmpPtr = 240, rTmpPtr 4974} 4975;; 4976{ .mfi 4977 setf.sig fFloatN = GR_N 4978 nop.f 0 4979 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 4980} 4981{ .mfi 4982 ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4 4983 nop.f 0 4984 adds rPolDataPtr = 304, rPolDataPtr 4985} 4986;; 4987{ .mfi 4988 ldfe fA20 = [rTmpPtr2], -32 // A20 4989 nop.f 0 4990 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2 4991} 4992{ .mfi 4993 ldfe fA19 = [rTmpPtr], -32 // A19 4994 nop.f 0 4995 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2 4996} 4997;; 4998{ .mfi 4999 ldfe fA17 = [rTmpPtr], -32 // A17 5000 nop.f 0 5001 adds rTmpPtr3 = 8, GR_ad_tbl_2 5002} 5003{ .mfb 5004 ldfe fA18 = [rTmpPtr2], -32 // A18 5005 nop.f 0 5006 // branch to special path for -0.5 < x <= 0.40625 5007(p6) br.cond.spnt lgammal_near_neg_half 5008} 5009;; 5010{ .mmf 5011 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 5012 ldfe fA15 = [rTmpPtr], -32 // A15 5013 fma.s1 fB20 = fA5L, fA5L, f0 // x^8 5014} 5015;; 5016{ .mmf 5017 ldfe fA16 = [rTmpPtr2], -32 // A16 5018 ldfe fA13 = [rTmpPtr], -32 // A13 5019 fms.s1 fB16 = fA4L, fA4L, fA5L 5020} 5021;; 5022{ .mmf 5023 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2 5024 ldfd FR_h2 = [rTmpPtr3] // Load h_2 5025 fmerge.s fB10 = f8, fA5L // sign(x) * x^4 5026} 5027;; 5028{ .mmi 5029 ldfe fA14 = [rTmpPtr2], -32 // A14 5030 ldfe fA11 = [rTmpPtr], -32 // A11 5031 // Get bits 30-15 of X_1 * Z_2 5032 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 5033} 5034;; 5035// 5036// For performance, don't use result of pmpyshr2.u for 4 cycles. 5037// 5038{ .mfi 5039 ldfe fA12 = [rTmpPtr2], -32 // A12 5040 fma.s1 fRes4H = fA3, fAbsX, f0 5041 adds rTmpPtr3 = 16, GR_ad_q 5042} 5043{ .mfi 5044 ldfe fA9 = [rTmpPtr], -32 // A9 5045 nop.f 0 5046 nop.i 0 5047} 5048;; 5049{ .mmf 5050 ldfe fA10 = [rTmpPtr2], -32 // A10 5051 ldfe fA7 = [rTmpPtr], -32 // A7 5052 fma.s1 fB18 = fB20, fB20, f0 // x^16 5053} 5054;; 5055{ .mmf 5056 ldfe fA8 = [rTmpPtr2], -32 // A8 5057 ldfe fA22 = [rPolDataPtr], 16 // A22 5058 fcvt.xf fFloatN = fFloatN 5059} 5060;; 5061{ .mfi 5062 ldfe fA5 = [rTmpPtr], -32 // A5 5063 fma.s1 fA21 = fA21, fAbsX, fA20 // v16 5064 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 5065} 5066{ .mfi 5067 ldfe fA6 = [rTmpPtr2], -32 // A6 5068 nop.f 0 5069 nop.i 0 5070} 5071;; 5072{ .mmf 5073 // Point to G_3 5074 shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 5075 ldfe fA4 = [rTmpPtr2], -32 // A4 5076 fma.s1 fA19 = fA19, fAbsX, fA18 // v13 5077} 5078;; 5079.pred.rel "mutex",p14,p15 5080{ .mfi 5081 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3 5082 fms.s1 fRes4L = fA3, fAbsX, fRes4H 5083(p14) adds rSgnGam = 1, r0 5084} 5085{ .mfi 5086 cmp.eq p6, p7 = 4, rSgnGamSize 5087 fadd.s1 fRes2H = fRes4H, fA2 5088(p15) adds rSgnGam = -1, r0 5089} 5090;; 5091 5092{ .mfi 5093 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 5094 fma.s1 fA17 = fA17, fAbsX, fA16 // v12 5095 nop.i 0 5096} 5097;; 5098{ .mfi 5099 ldfe FR_Q3 = [GR_ad_q], 32 // Load Q3 5100 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 5101 nop.i 0 5102} 5103{ .mfi 5104 ldfe FR_Q2 = [rTmpPtr3], 16 // Load Q2 5105 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 5106 nop.i 0 5107} 5108;; 5109{ .mfi 5110 ldfe FR_Q1 = [GR_ad_q] // Load Q1 5111 fma.s1 fA15 = fA15, fAbsX, fA14 // v8 5112 nop.i 0 5113} 5114{ .mfi 5115 adds rTmpPtr3 = 32, rLnSinDataPtr 5116 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 5117 nop.i 0 5118} 5119;; 5120{ .mmf 5121 ldfpd fLnSin2, fLnSin2L = [rLnSinDataPtr], 16 5122 ldfe fLnSin6 = [rTmpPtr3], 32 5123 fma.s1 fA13 = fA13, fAbsX, fA12 // v7 5124 5125} 5126;; 5127{ .mfi 5128 ldfe fLnSin4 = [rLnSinDataPtr], 32 5129 fma.s1 fRes4L = fA3L, fAbsX, fRes4L 5130 nop.i 0 5131} 5132{ .mfi 5133 ldfe fLnSin10 = [rTmpPtr3], 32 5134 fsub.s1 fRes2L = fA2, fRes2H 5135 nop.i 0 5136} 5137;; 5138{ .mfi 5139 ldfe fLnSin8 = [rLnSinDataPtr], 32 5140 fma.s1 fResH = fRes2H, fAbsX, f0 5141 nop.i 0 5142} 5143{ .mfi 5144 ldfe fLnSin14 = [rTmpPtr3], 32 5145 fma.s1 fA22 = fA22, fA4L, fA21 // v15 5146 nop.i 0 5147} 5148;; 5149{ .mfi 5150 ldfe fLnSin12 = [rLnSinDataPtr], 32 5151 fma.s1 fA9 = fA9, fAbsX, fA8 // v4 5152 nop.i 0 5153} 5154{ .mfi 5155 ldfd fLnSin18 = [rTmpPtr3], 16 5156 fma.s1 fA11 = fA11, fAbsX, fA10 // v5 5157 nop.i 0 5158} 5159;; 5160{ .mfi 5161 ldfe fLnSin16 = [rLnSinDataPtr], 24 5162 fma.s1 fA19 = fA19, fA4L, fA17 // v11 5163 nop.i 0 5164} 5165{ .mfi 5166 ldfd fLnSin22 = [rTmpPtr3], 16 5167 fma.s1 fPolL = fA7, fAbsX, fA6 5168 nop.i 0 5169} 5170;; 5171{ .mfi 5172 ldfd fLnSin20 = [rLnSinDataPtr], 16 5173 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 5174 nop.i 0 5175} 5176{ .mfi 5177 ldfd fLnSin26 = [rTmpPtr3], 16 5178 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 5179 nop.i 0 5180} 5181;; 5182{ .mfi 5183 ldfd fLnSin24 = [rLnSinDataPtr], 16 5184 fadd.s1 fRes2L = fRes2L, fRes4H 5185 nop.i 0 5186} 5187{ .mfi 5188 ldfd fLnSin30 = [rTmpPtr3], 16 5189 fadd.s1 fA2L = fA2L, fRes4L 5190 nop.i 0 5191} 5192;; 5193{ .mfi 5194 ldfd fLnSin28 = [rLnSinDataPtr], 16 5195 fms.s1 fResL = fRes2H, fAbsX, fResH 5196 nop.i 0 5197} 5198{ .mfi 5199 ldfd fLnSin34 = [rTmpPtr3], 8 5200 fadd.s1 fRes2H = fResH, fA1 5201 nop.i 0 5202} 5203;; 5204{ .mfi 5205 ldfd fLnSin32 = [rLnSinDataPtr] 5206 fma.s1 fA11 = fA11, fA4L, fA9 // v3 5207 nop.i 0 5208} 5209{ .mfi 5210 ldfd fLnSin36 = [rTmpPtr3] 5211 fma.s1 fA15 = fA15, fA4L, fA13 // v6 5212 nop.i 0 5213} 5214;; 5215 5216{ .mfi 5217 // store signgam if size of variable is 4 bytes 5218(p6) st4 [rSgnGamAddr] = rSgnGam 5219 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 5220 nop.i 0 5221} 5222{ .mfi 5223 // store signgam if size of variable is 8 bytes 5224(p7) st8 [rSgnGamAddr] = rSgnGam 5225 fma.s1 fA5 = fA5, fAbsX, fA4 5226 nop.i 0 5227} 5228;; 5229{ .mfi 5230 nop.m 0 5231 fms.s1 FR_r = FR_G, fSignifX, f1 // r = G * S_hi - 1 5232 nop.i 0 5233} 5234{ .mfi 5235 nop.m 0 5236 // High part of the log(|x|): Y_hi = N * log2_hi + H 5237 fms.s1 FR_log2_hi = fFloatN, FR_log2_hi, FR_H 5238 nop.i 0 5239} 5240;; 5241{ .mfi 5242 nop.m 0 5243 fadd.s1 fA3L = fRes2L, fA2L 5244 nop.i 0 5245} 5246{ .mfi 5247 nop.m 0 5248 fma.s1 fA22 = fA22, fA5L, fA19 5249 nop.i 0 5250} 5251;; 5252{ .mfi 5253 nop.m 0 5254 fsub.s1 fRes2L = fA1, fRes2H 5255 nop.i 0 5256} 5257{ .mfi 5258 nop.m 0 5259 fma.s1 fRes3H = fRes2H, f8, f0 5260 nop.i 0 5261} 5262;; 5263{ .mfi 5264 nop.m 0 5265 fma.s1 fA15 = fA15, fA5L, fA11 // v2 5266 nop.i 0 5267} 5268{ .mfi 5269 nop.m 0 5270 fma.s1 fLnSin18 = fLnSin18, fA4L, fLnSin16 5271 nop.i 0 5272} 5273;; 5274{ .mfi 5275 nop.m 0 5276 // h = N * log2_lo + h 5277 fms.s1 FR_h = fFloatN, FR_log2_lo, FR_h 5278 nop.i 0 5279} 5280{ .mfi 5281 nop.m 0 5282 fma.s1 fPolL = fPolL, fA4L, fA5 5283 nop.i 0 5284} 5285;; 5286{ .mfi 5287 nop.m 0 5288 // poly_lo = r * Q4 + Q3 5289 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 5290 nop.i 0 5291} 5292{ .mfi 5293 nop.m 0 5294 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r 5295 nop.i 0 5296} 5297;; 5298{ .mfi 5299 nop.m 0 5300 fma.s1 fResL = fA3L, fAbsX, fResL 5301 nop.i 0 5302} 5303{ .mfi 5304 nop.m 0 5305 fma.s1 fLnSin30 = fLnSin30, fA4L, fLnSin28 5306 nop.i 0 5307} 5308;; 5309{ .mfi 5310 nop.m 0 5311 fadd.s1 fRes2L = fRes2L, fResH 5312 nop.i 0 5313} 5314{ .mfi 5315 nop.m 0 5316 fms.s1 fRes3L = fRes2H, f8, fRes3H 5317 nop.i 0 5318} 5319;; 5320{ .mfi 5321 nop.m 0 5322 fadd.s1 fRes1H = fRes3H, FR_log2_hi 5323 nop.i 0 5324} 5325{ .mfi 5326 nop.m 0 5327 fma.s1 fPol = fB20, fA22, fA15 5328 nop.i 0 5329} 5330;; 5331{ .mfi 5332 nop.m 0 5333 fma.s1 fLnSin34 = fLnSin34, fA4L, fLnSin32 5334 nop.i 0 5335} 5336{ .mfi 5337 nop.m 0 5338 fma.s1 fLnSin14 = fLnSin14, fA4L, fLnSin12 5339 nop.i 0 5340} 5341;; 5342 5343{ .mfi 5344 nop.m 0 5345 // poly_lo = poly_lo * r + Q2 5346 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 5347 nop.i 0 5348} 5349{ .mfi 5350 nop.m 0 5351 fnma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 5352 nop.i 0 5353} 5354;; 5355{ .mfi 5356 nop.m 0 5357 // poly_hi = Q1 * rsq + r 5358 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 5359 nop.i 0 5360} 5361{ .mfi 5362 nop.m 0 5363 fadd.s1 fA1L = fA1L, fResL 5364 nop.i 0 5365} 5366;; 5367 5368{ .mfi 5369 nop.m 0 5370 fma.s1 fLnSin22 = fLnSin22, fA4L, fLnSin20 5371 nop.i 0 5372} 5373{ .mfi 5374 nop.m 0 5375 fma.s1 fLnSin26 = fLnSin26, fA4L, fLnSin24 5376 nop.i 0 5377} 5378;; 5379 5380{ .mfi 5381 nop.m 0 5382 fsub.s1 fRes1L = FR_log2_hi, fRes1H 5383 nop.i 0 5384} 5385{ .mfi 5386 nop.m 0 5387 fma.s1 fPol = fPol, fA5L, fPolL 5388 nop.i 0 5389} 5390;; 5391{ .mfi 5392 nop.m 0 5393 fma.s1 fLnSin34 = fLnSin36, fA5L, fLnSin34 5394 nop.i 0 5395} 5396{ .mfi 5397 nop.m 0 5398 fma.s1 fLnSin18 = fLnSin18, fA5L, fLnSin14 5399 nop.i 0 5400} 5401;; 5402{ .mfi 5403 nop.m 0 5404 fma.s1 fLnSin6 = fLnSin6, fA4L, fLnSin4 5405 nop.i 0 5406} 5407{ .mfi 5408 nop.m 0 5409 fma.s1 fLnSin10 = fLnSin10, fA4L, fLnSin8 5410 nop.i 0 5411} 5412;; 5413{ .mfi 5414 nop.m 0 5415 // poly_hi = Q1 * rsq + r 5416 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 5417 nop.i 0 5418} 5419{ .mfi 5420 nop.m 0 5421 fadd.s1 fRes2L = fRes2L, fA1L 5422 nop.i 0 5423} 5424;; 5425{ .mfi 5426 nop.m 0 5427 // poly_lo = poly_lo*r^3 + h 5428 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h 5429 nop.i 0 5430} 5431{ .mfi 5432 nop.m 0 5433 fma.s1 fB2 = fLnSin2, fA4L, f0 5434 nop.i 0 5435} 5436;; 5437{ .mfi 5438 nop.m 0 5439 fadd.s1 fRes1L = fRes1L, fRes3H 5440 nop.i 0 5441} 5442{ .mfi 5443 nop.m 0 5444 fma.s1 fPol = fPol, fB10, f0 5445 nop.i 0 5446} 5447;; 5448{ .mfi 5449 nop.m 0 5450 fma.s1 fLnSin26 = fLnSin26, fA5L, fLnSin22 5451 nop.i 0 5452} 5453{ .mfi 5454 nop.m 0 5455 fma.s1 fLnSin34 = fLnSin34, fA5L, fLnSin30 5456 nop.i 0 5457} 5458;; 5459{ .mfi 5460 nop.m 0 5461 fma.s1 fLnSin10 = fLnSin10, fA5L, fLnSin6 5462 nop.i 0 5463} 5464{ .mfi 5465 nop.m 0 5466 fma.s1 fLnSin2L = fLnSin2L, fA4L, f0 5467 nop.i 0 5468} 5469;; 5470 5471{ .mfi 5472 nop.m 0 5473 fma.s1 fRes3L = fRes2L, f8, fRes3L 5474 nop.i 0 5475} 5476;; 5477{ .mfi 5478 nop.m 0 5479 // Y_lo = poly_hi + poly_lo 5480 fsub.s1 FR_log2_lo = FR_poly_lo, FR_poly_hi 5481 nop.i 0 5482} 5483{ .mfi 5484 nop.m 0 5485 fms.s1 fB4 = fLnSin2, fA4L, fB2 5486 nop.i 0 5487} 5488;; 5489{ .mfi 5490 nop.m 0 5491 fadd.s1 fRes2H = fRes1H, fPol 5492 nop.i 0 5493} 5494;; 5495{ .mfi 5496 nop.m 0 5497 fma.s1 fLnSin34 = fLnSin34, fB20, fLnSin26 5498 nop.i 0 5499} 5500;; 5501{ .mfi 5502 nop.m 0 5503 fma.s1 fLnSin18 = fLnSin18, fB20, fLnSin10 5504 nop.i 0 5505} 5506{ .mfi 5507 nop.m 0 5508 fma.s1 fLnSin2L = fB8, fLnSin2, fLnSin2L 5509 nop.i 0 5510} 5511;; 5512 5513{ .mfi 5514 nop.m 0 5515 fadd.s1 FR_log2_lo = FR_log2_lo, fRes3L 5516 nop.i 0 5517} 5518;; 5519{ .mfi 5520 nop.m 0 5521 fsub.s1 fRes2L = fRes1H, fRes2H 5522 nop.i 0 5523} 5524;; 5525{ .mfi 5526 nop.m 0 5527 fma.s1 fB6 = fLnSin34, fB18, fLnSin18 5528 nop.i 0 5529} 5530{ .mfi 5531 nop.m 0 5532 fadd.s1 fB4 = fLnSin2L, fB4 5533 nop.i 0 5534} 5535;; 5536 5537{ .mfi 5538 nop.m 0 5539 fadd.s1 fRes1L = fRes1L, FR_log2_lo 5540 nop.i 0 5541} 5542;; 5543{ .mfi 5544 nop.m 0 5545 fadd.s1 fRes2L = fRes2L, fPol 5546 nop.i 0 5547} 5548;; 5549{ .mfi 5550 nop.m 0 5551 fma.s1 fB12 = fB6, fA5L, f0 5552 nop.i 0 5553} 5554;; 5555{ .mfi 5556 nop.m 0 5557 fadd.s1 fRes2L = fRes2L, fRes1L 5558 nop.i 0 5559} 5560;; 5561 5562{ .mfi 5563 nop.m 0 5564 fms.s1 fB14 = fB6, fA5L, fB12 5565 nop.i 0 5566} 5567{ .mfb 5568 nop.m 0 5569 fadd.s1 fLnSin30 = fB2, fB12 5570 // branch out if x is negative 5571(p15) br.cond.spnt _O_Half_neg 5572} 5573;; 5574{ .mfb 5575 nop.m 0 5576 // sign(x)*Pol(|x|) - log(|x|) 5577 fma.s0 f8 = fRes2H, f1, fRes2L 5578 // it's an answer already for positive x 5579 // exit if 0 < x < 0.5 5580 br.ret.sptk b0 5581} 5582;; 5583 5584// here if x is negative and |x| < 0.5 5585.align 32 5586_O_Half_neg: 5587{ .mfi 5588 nop.m 0 5589 fma.s1 fB14 = fB16, fB6, fB14 5590 nop.i 0 5591} 5592{ .mfi 5593 nop.m 0 5594 fsub.s1 fLnSin16 = fB2, fLnSin30 5595 nop.i 0 5596} 5597;; 5598{ .mfi 5599 nop.m 0 5600 fadd.s1 fResH = fLnSin30, fRes2H 5601 nop.i 0 5602} 5603;; 5604{ .mfi 5605 nop.m 0 5606 fadd.s1 fLnSin16 = fLnSin16, fB12 5607 nop.i 0 5608} 5609{ .mfi 5610 nop.m 0 5611 fadd.s1 fB4 = fB14, fB4 5612 nop.i 0 5613} 5614;; 5615{ .mfi 5616 nop.m 0 5617 fadd.s1 fLnSin16 = fB4, fLnSin16 5618 nop.i 0 5619} 5620{ .mfi 5621 nop.m 0 5622 fsub.s1 fResL = fRes2H, fResH 5623 nop.i 0 5624} 5625;; 5626{ .mfi 5627 nop.m 0 5628 fadd.s1 fResL = fResL, fLnSin30 5629 nop.i 0 5630} 5631{ .mfi 5632 nop.m 0 5633 fadd.s1 fLnSin16 = fLnSin16, fRes2L 5634 nop.i 0 5635} 5636;; 5637{ .mfi 5638 nop.m 0 5639 fadd.s1 fResL = fResL, fLnSin16 5640 nop.i 0 5641} 5642;; 5643{ .mfb 5644 nop.m 0 5645 // final result for -0.5 < x < 0 5646 fma.s0 f8 = fResH, f1, fResL 5647 // exit for -0.5 < x < 0 5648 br.ret.sptk b0 5649} 5650;; 5651 5652// here if x >= 8.0 5653// there are two computational paths: 5654// 1) For x >10.0 Stirling's formula is used 5655// 2) Polynomial approximation for 8.0 <= x <= 10.0 5656.align 32 5657lgammal_big_positive: 5658{ .mfi 5659 addl rPolDataPtr = @ltoff(lgammal_data), gp 5660 fmerge.se fSignifX = f1, f8 5661 // Get high 15 bits of significand 5662 extr.u GR_X_0 = rSignifX, 49, 15 5663} 5664{.mfi 5665 shladd rZ1offsett = GR_Index1, 2, GR_ad_z_1 // Point to Z_1 5666 fnma.s1 fInvX = f8, fRcpX, f1 // start of 1st NR iteration 5667 adds rSignif1andQ = 0x5, r0 5668} 5669;; 5670{.mfi 5671 ld4 GR_Z_1 = [rZ1offsett] // Load Z_1 5672 nop.f 0 5673 shl rSignif1andQ = rSignif1andQ, 61 // significand of 1.25 5674} 5675{ .mfi 5676 cmp.eq p8, p0 = rExpX, rExp8 // p8 = 1 if 8.0 <= x < 16 5677 nop.f 0 5678 adds rSgnGam = 1, r0 // gamma is positive at this range 5679} 5680;; 5681{ .mfi 5682 shladd GR_ad_tbl_1 = GR_Index1, 4, rTbl1Addr// Point to G_1 5683 nop.f 0 5684 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_Q 5685} 5686{ .mlx 5687 ld8 rPolDataPtr = [rPolDataPtr] 5688 movl rDelta = 0x3FF2000000000000 5689} 5690;; 5691{ .mfi 5692 ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 5693 nop.f 0 5694 add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2 5695} 5696{ .mfi 5697 // Point to Constants_G_H_h2 5698 add GR_ad_tbl_2 = 0x180, GR_ad_z_1 5699 nop.f 0 5700 // p8 = 1 if 8.0 <= x <= 10.0 5701(p8) cmp.leu.unc p8, p0 = rSignifX, rSignif1andQ 5702} 5703;; 5704{ .mfi 5705 ldfd FR_h = [GR_ad_tbl_1] // Load h_1 5706 nop.f 0 5707 // Get bits 30-15 of X_0 * Z_1 5708 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 5709} 5710{ .mfb 5711(p8) setf.d FR_MHalf = rDelta 5712 nop.f 0 5713(p8) br.cond.spnt lgammal_8_10 // branch out if 8.0 <= x <= 10.0 5714} 5715;; 5716// 5717// For performance, don't use result of pmpyshr2.u for 4 cycles. 5718// 5719{ .mfi 5720 ldfe fA1 = [rPolDataPtr], 16 // Load overflow threshold 5721 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 1st NR iteration 5722 // Point to Constants_G_H_h3 5723 add GR_ad_tbl_3 = 0x280, GR_ad_z_1 5724} 5725{ .mlx 5726 nop.m 0 5727 movl rDelta = 0xBFE0000000000000 // -0.5 in DP 5728} 5729;; 5730{ .mfi 5731 ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi 5732 nop.f 0 5733 sub GR_N = rExpX, rExpHalf, 1 // unbiased exponent of x 5734} 5735;; 5736{ .mfi 5737 ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo 5738 nop.f 0 5739 nop.i 0 5740} 5741{ .mfi 5742 setf.d FR_MHalf = rDelta 5743 nop.f 0 5744 nop.i 0 5745} 5746;; 5747{ .mfi 5748 // Put integer N into rightmost significand 5749 setf.sig fFloatN = GR_N 5750 nop.f 0 5751 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 5752} 5753{ .mfi 5754 ldfe FR_Q4 = [GR_ad_q], 16 // Load Q4 5755 nop.f 0 5756 nop.i 0 5757} 5758;; 5759{ .mfi 5760 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2 5761 nop.f 0 5762 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2// Point to G_2 5763} 5764{ .mfi 5765 ldfe FR_Q3 = [GR_ad_q], 16 // Load Q3 5766 nop.f 0 5767 nop.i 0 5768} 5769;; 5770{ .mfi 5771 ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 5772 fnma.s1 fInvX = f8, fRcpX, f1 // start of 2nd NR iteration 5773 nop.i 0 5774} 5775;; 5776{ .mfi 5777 ldfps FR_G2, FR_H2 = [GR_ad_tbl_2], 8 // Load G_2, H_2 5778 nop.f 0 5779 nop.i 0 5780} 5781;; 5782{ .mfi 5783 ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2 5784 nop.f 0 5785 nop.i 0 5786} 5787;; 5788{ .mfi 5789 ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 5790 nop.f 0 5791 // Get bits 30-15 of X_1 * Z_2 5792 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 5793} 5794;; 5795// 5796// For performance, don't use result of pmpyshr2.u for 4 cycles. 5797// 5798{ .mfi 5799 ldfe FR_Q1 = [GR_ad_q] // Load Q1 5800 fcmp.gt.s1 p7,p0 = f8, fA1 // check if x > overflow threshold 5801 nop.i 0 5802} 5803;; 5804{.mfi 5805 ldfpd fA0, fA0L = [rPolDataPtr], 16 // Load two parts of C 5806 fma.s1 fRcpX = fInvX, fRcpX, fRcpX // end of 2nd NR iteration 5807 nop.i 0 5808} 5809;; 5810{ .mfb 5811 ldfpd fB2, fA1 = [rPolDataPtr], 16 5812 nop.f 0 5813(p7) br.cond.spnt lgammal_overflow // branch if x > overflow threshold 5814} 5815;; 5816{.mfi 5817 ldfe fB4 = [rPolDataPtr], 16 5818 fcvt.xf fFloatN = fFloatN 5819 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 5820} 5821;; 5822{ .mfi 5823 shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3// Point to G_3 5824 nop.f 0 5825 nop.i 0 5826} 5827{ .mfi 5828 ldfe fB6 = [rPolDataPtr], 16 5829 nop.f 0 5830 nop.i 0 5831} 5832;; 5833{ .mfi 5834 ldfps FR_G3, FR_H3 = [GR_ad_tbl_3], 8 // Load G_3, H_3 5835 nop.f 0 5836 nop.i 0 5837} 5838;; 5839{ .mfi 5840 ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 5841 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 5842 nop.i 0 5843} 5844{ .mfi 5845 nop.m 0 5846 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 5847 nop.i 0 5848} 5849;; 5850 5851{ .mfi 5852 ldfe fB8 = [rPolDataPtr], 16 5853 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 5854 nop.i 0 5855} 5856{ .mfi 5857 nop.m 0 5858 fnma.s1 fInvX = f8, fRcpX, f1 // start of 3rd NR iteration 5859 nop.i 0 5860} 5861;; 5862{ .mfi 5863 ldfe fB10 = [rPolDataPtr], 16 5864 nop.f 0 5865 cmp.eq p6, p7 = 4, rSgnGamSize 5866} 5867;; 5868{ .mfi 5869 ldfe fB12 = [rPolDataPtr], 16 5870 nop.f 0 5871 nop.i 0 5872} 5873;; 5874{ .mfi 5875 ldfe fB14 = [rPolDataPtr], 16 5876 nop.f 0 5877 nop.i 0 5878} 5879;; 5880 5881{ .mfi 5882 ldfe fB16 = [rPolDataPtr], 16 5883 // get double extended coefficients from two doubles 5884 // two doubles are needed in Stitling's formula for negative x 5885 fadd.s1 fB2 = fB2, fA1 5886 nop.i 0 5887} 5888;; 5889{ .mfi 5890 ldfe fB18 = [rPolDataPtr], 16 5891 fma.s1 fInvX = fInvX, fRcpX, fRcpX // end of 3rd NR iteration 5892 nop.i 0 5893} 5894;; 5895{ .mfi 5896 ldfe fB20 = [rPolDataPtr], 16 5897 nop.f 0 5898 nop.i 0 5899} 5900;; 5901{ .mfi 5902 // store signgam if size of variable is 4 bytes 5903(p6) st4 [rSgnGamAddr] = rSgnGam 5904 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 5905 nop.i 0 5906} 5907{ .mfi 5908 // store signgam if size of variable is 8 bytes 5909(p7) st8 [rSgnGamAddr] = rSgnGam 5910 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 5911 nop.i 0 5912} 5913;; 5914{ .mfi 5915 nop.m 0 5916 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 5917 nop.i 0 5918} 5919;; 5920{ .mfi 5921 nop.m 0 5922 fma.s1 fRcpX = fInvX, fInvX, f0 // 1/x^2 5923 nop.i 0 5924} 5925{ .mfi 5926 nop.m 0 5927 fma.s1 fA0L = fB2, fInvX, fA0L 5928 nop.i 0 5929} 5930;; 5931{ .mfi 5932 nop.m 0 5933 fms.s1 FR_r = fSignifX, FR_G, f1 // r = G * S_hi - 1 5934 nop.i 0 5935} 5936{ .mfi 5937 nop.m 0 5938 // High part of the log(x): Y_hi = N * log2_hi + H 5939 fma.s1 fRes2H = fFloatN, FR_log2_hi, FR_H 5940 nop.i 0 5941} 5942;; 5943 5944{ .mfi 5945 nop.m 0 5946 // h = N * log2_lo + h 5947 fma.s1 FR_h = fFloatN, FR_log2_lo, FR_h 5948 nop.i 0 5949} 5950{ .mfi 5951 nop.m 0 5952 // High part of the log(x): Y_hi = N * log2_hi + H 5953 fma.s1 fRes1H = fFloatN, FR_log2_hi, FR_H 5954 nop.i 0 5955} 5956;; 5957{.mfi 5958 nop.m 0 5959 fma.s1 fPol = fB18, fRcpX, fB16 // v9 5960 nop.i 0 5961} 5962{ .mfi 5963 nop.m 0 5964 fma.s1 fA2L = fRcpX, fRcpX, f0 // v10 5965 nop.i 0 5966} 5967;; 5968{.mfi 5969 nop.m 0 5970 fma.s1 fA3 = fB6, fRcpX, fB4 // v3 5971 nop.i 0 5972} 5973{ .mfi 5974 nop.m 0 5975 fma.s1 fA4 = fB10, fRcpX, fB8 // v4 5976 nop.i 0 5977} 5978;; 5979{ .mfi 5980 nop.m 0 5981 fms.s1 fRes2H =fRes2H, f1, f1 // log_Hi(x) -1 5982 nop.i 0 5983} 5984{ .mfi 5985 nop.m 0 5986 // poly_lo = r * Q4 + Q3 5987 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 5988 nop.i 0 5989} 5990;; 5991{ .mfi 5992 nop.m 0 5993 fma.s1 fRes1H = fRes1H, FR_MHalf, f0 // -0.5*log_Hi(x) 5994 nop.i 0 5995} 5996{ .mfi 5997 nop.m 0 5998 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r 5999 nop.i 0 6000} 6001;; 6002{ .mfi 6003 nop.m 0 6004 fma.s1 fA7 = fB14, fRcpX, fB12 // v7 6005 nop.i 0 6006} 6007{ .mfi 6008 nop.m 0 6009 fma.s1 fA8 = fA2L, fB20, fPol // v8 6010 nop.i 0 6011} 6012;; 6013{ .mfi 6014 nop.m 0 6015 fma.s1 fA2 = fA4, fA2L, fA3 // v2 6016 nop.i 0 6017} 6018{ .mfi 6019 nop.m 0 6020 fma.s1 fA4L = fA2L, fA2L, f0 // v5 6021 nop.i 0 6022} 6023;; 6024{ .mfi 6025 nop.m 0 6026 fma.s1 fResH = fRes2H, f8, f0 // (x*(ln(x)-1))hi 6027 nop.i 0 6028} 6029{ .mfi 6030 nop.m 0 6031 // poly_lo = poly_lo * r + Q2 6032 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 6033 nop.i 0 6034} 6035;; 6036{ .mfi 6037 nop.m 0 6038 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 6039 nop.i 0 6040} 6041{ .mfi 6042 nop.m 0 6043 // poly_hi = Q1 * rsq + r 6044 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 6045 nop.i 0 6046} 6047;; 6048{ .mfi 6049 nop.m 0 6050 fma.s1 fA11 = fRcpX, fInvX, f0 // 1/x^3 6051 nop.i 0 6052} 6053{ .mfi 6054 nop.m 0 6055 fma.s1 fA6 = fA8, fA2L, fA7 // v6 6056 nop.i 0 6057} 6058;; 6059{ .mfi 6060 nop.m 0 6061 fms.s1 fResL = fRes2H, f8, fResH // d(x*(ln(x)-1)) 6062 nop.i 0 6063} 6064{ .mfi 6065 nop.m 0 6066 fadd.s1 fRes3H = fResH, fRes1H // (x*(ln(x)-1) -0.5ln(x))hi 6067 nop.i 0 6068} 6069;; 6070{ .mfi 6071 nop.m 0 6072 // poly_lo = poly_lo*r^3 + h 6073 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h 6074 nop.i 0 6075} 6076;; 6077{ .mfi 6078 nop.m 0 6079 fma.s1 fPol = fA4L, fA6, fA2 // v1 6080 nop.i 0 6081} 6082{ .mfi 6083 nop.m 0 6084 // raise inexact exception 6085 fma.s0 FR_log2_lo = FR_log2_lo, FR_log2_lo, f0 6086 nop.i 0 6087} 6088;; 6089{ .mfi 6090 nop.m 0 6091 fadd.s1 fRes4H = fRes3H, fA0 // (x*(ln(x)-1) -0.5ln(x))hi + Chi 6092 nop.i 0 6093} 6094{ .mfi 6095 nop.m 0 6096 fsub.s1 fRes3L = fResH, fRes3H 6097 nop.i 0 6098} 6099;; 6100{ .mfi 6101 nop.m 0 6102 // Y_lo = poly_hi + poly_lo 6103 fadd.s1 fRes2L = FR_poly_hi, FR_poly_lo 6104 nop.i 0 6105} 6106;; 6107 6108{ .mfi 6109 nop.m 0 6110 fma.s1 fA0L = fPol, fA11, fA0L // S(1/x) + Clo 6111 nop.i 0 6112} 6113;; 6114{ .mfi 6115 nop.m 0 6116 fadd.s1 fRes3L = fRes3L, fRes1H 6117 nop.i 0 6118} 6119{ .mfi 6120 nop.m 0 6121 fsub.s1 fRes4L = fRes3H, fRes4H 6122 nop.i 0 6123} 6124;; 6125{ .mfi 6126 nop.m 0 6127 fma.s1 fResL = fRes2L, f8 , fResL // lo part of x*(ln(x)-1) 6128 nop.i 0 6129} 6130;; 6131{ .mfi 6132 nop.m 0 6133 // Clo + S(1/x) - 0.5*logLo(x) 6134 fma.s1 fA0L = fRes2L, FR_MHalf, fA0L 6135 nop.i 0 6136} 6137;; 6138{ .mfi 6139 nop.m 0 6140 fadd.s1 fRes4L = fRes4L, fA0 6141 nop.i 0 6142} 6143;; 6144{ .mfi 6145 nop.m 0 6146 // Clo + S(1/x) - 0.5*logLo(x) + (x*(ln(x)-1))lo 6147 fadd.s1 fA0L = fA0L, fResL 6148 nop.i 0 6149} 6150;; 6151{ .mfi 6152 nop.m 0 6153 fadd.s1 fRes4L = fRes4L, fRes3L 6154 nop.i 0 6155} 6156;; 6157{ .mfi 6158 nop.m 0 6159 fadd.s1 fRes4L = fRes4L, fA0L 6160 nop.i 0 6161} 6162;; 6163{ .mfb 6164 nop.m 0 6165 fma.s0 f8 = fRes4H, f1, fRes4L 6166 // exit for x > 10.0 6167 br.ret.sptk b0 6168} 6169;; 6170// here if 8.0 <= x <= 10.0 6171// Result = P15(y), where y = x/8.0 - 1.5 6172.align 32 6173lgammal_8_10: 6174{ .mfi 6175 addl rPolDataPtr = @ltoff(lgammal_8_10_data), gp 6176 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf // y = x/8.0 - 1.5 6177 cmp.eq p6, p7 = 4, rSgnGamSize 6178} 6179;; 6180{ .mfi 6181 ld8 rLnSinDataPtr = [rPolDataPtr] 6182 nop.f 0 6183 nop.i 0 6184} 6185{ .mfi 6186 ld8 rPolDataPtr = [rPolDataPtr] 6187 nop.f 0 6188 nop.i 0 6189} 6190;; 6191{ .mfi 6192 adds rZ1offsett = 32, rLnSinDataPtr 6193 nop.f 0 6194 nop.i 0 6195} 6196{ .mfi 6197 adds rLnSinDataPtr = 48, rLnSinDataPtr 6198 nop.f 0 6199 nop.i 0 6200} 6201;; 6202{ .mfi 6203 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 6204 nop.f 0 6205 nop.i 0 6206} 6207{ .mfi 6208 ldfe fA2 = [rZ1offsett], 32 // A5 6209 nop.f 0 6210 nop.i 0 6211} 6212;; 6213{ .mfi 6214 ldfpd fA0, fA0L = [rPolDataPtr], 16 // A0 6215 fma.s1 FR_rsq = FR_FracX, FR_FracX, f0 // y^2 6216 nop.i 0 6217} 6218{ .mfi 6219 ldfe fA3 = [rLnSinDataPtr],32 // A5 6220 nop.f 0 6221 nop.i 0 6222} 6223;; 6224{ .mmf 6225 ldfe fA4 = [rZ1offsett], 32 // A4 6226 ldfe fA5 = [rLnSinDataPtr], 32 // A5 6227 nop.f 0 6228} 6229;; 6230{ .mmf 6231 ldfe fA6 = [rZ1offsett], 32 // A6 6232 ldfe fA7 = [rLnSinDataPtr], 32 // A7 6233 nop.f 0 6234} 6235;; 6236{ .mmf 6237 ldfe fA8 = [rZ1offsett], 32 // A8 6238 ldfe fA9 = [rLnSinDataPtr], 32 // A9 6239 nop.f 0 6240} 6241;; 6242{ .mmf 6243 ldfe fA10 = [rZ1offsett], 32 // A10 6244 ldfe fA11 = [rLnSinDataPtr], 32 // A11 6245 nop.f 0 6246} 6247;; 6248{ .mmf 6249 ldfe fA12 = [rZ1offsett], 32 // A12 6250 ldfe fA13 = [rLnSinDataPtr], 32 // A13 6251 fma.s1 FR_Q4 = FR_rsq, FR_rsq, f0 // y^4 6252} 6253;; 6254{ .mmf 6255 ldfe fA14 = [rZ1offsett], 32 // A14 6256 ldfe fA15 = [rLnSinDataPtr], 32 // A15 6257 nop.f 0 6258} 6259;; 6260{ .mfi 6261 nop.m 0 6262 fma.s1 fRes1H = FR_FracX, fA1, f0 6263 nop.i 0 6264} 6265;; 6266{ .mfi 6267 nop.m 0 6268 fma.s1 fA3 = fA3, FR_FracX, fA2 // v4 6269 nop.i 0 6270} 6271;; 6272{ .mfi 6273 nop.m 0 6274 fma.s1 fA5 = fA5, FR_FracX, fA4 // v5 6275 nop.i 0 6276} 6277;; 6278{ .mfi 6279 // store sign of GAMMA(x) if size of variable is 4 bytes 6280(p6) st4 [rSgnGamAddr] = rSgnGam 6281 fma.s1 fA3L = FR_Q4, FR_Q4, f0 // v9 = y^8 6282 nop.i 0 6283} 6284{ .mfi 6285 // store sign of GAMMA(x) if size of variable is 8 bytes 6286(p7) st8 [rSgnGamAddr] = rSgnGam 6287 fma.s1 fA7 = fA7, FR_FracX, fA6 // v7 6288 nop.i 0 6289} 6290;; 6291{ .mfi 6292 nop.m 0 6293 fma.s1 fA9 = fA9, FR_FracX, fA8 // v8 6294 nop.i 0 6295} 6296;; 6297{ .mfi 6298 nop.m 0 6299 fms.s1 fRes1L = FR_FracX, fA1, fRes1H 6300 nop.i 0 6301} 6302{ .mfi 6303 nop.m 0 6304 fma.s1 fA11 = fA11, FR_FracX, fA10 // v12 6305 nop.i 0 6306} 6307;; 6308{ .mfi 6309 nop.m 0 6310 fma.s1 fA13 = fA13, FR_FracX, fA12 // v13 6311 nop.i 0 6312} 6313{ .mfi 6314 nop.m 0 6315 fma.s1 fRes2H = fRes1H, f1, fA0 6316 nop.i 0 6317} 6318;; 6319{ .mfi 6320 nop.m 0 6321 fma.s1 fA15 = fA15, FR_FracX, fA14 // v16 6322 nop.i 0 6323} 6324{ .mfi 6325 nop.m 0 6326 fma.s1 fA5 = fA5, FR_rsq, fA3 // v3 6327 nop.i 0 6328} 6329;; 6330{ .mfi 6331 nop.m 0 6332 fma.s1 fA9 = fA9, FR_rsq, fA7 // v6 6333 nop.i 0 6334} 6335;; 6336{ .mfi 6337 nop.m 0 6338 fma.s1 fRes1L = FR_FracX, fA1L, fRes1L 6339 nop.i 0 6340} 6341;; 6342{ .mfi 6343 nop.m 0 6344 fms.s1 fRes2L = fA0, f1, fRes2H 6345 nop.i 0 6346} 6347{ .mfi 6348 nop.m 0 6349 fma.s1 fA13 = fA13, FR_rsq, fA11 // v11 6350 nop.i 0 6351} 6352;; 6353{ .mfi 6354 nop.m 0 6355 fma.s1 fA9 = fA9, FR_Q4, fA5 // v2 6356 nop.i 0 6357} 6358;; 6359{ .mfi 6360 nop.m 0 6361 fma.s1 fRes1L = fRes1L, f1, fA0L 6362 nop.i 0 6363} 6364;; 6365{ .mfi 6366 nop.m 0 6367 fma.s1 fRes2L = fRes2L, f1, fRes1H 6368 nop.i 0 6369} 6370{ .mfi 6371 nop.m 0 6372 fma.s1 fA15 = fA15, FR_Q4, fA13 // v10 6373 nop.i 0 6374} 6375;; 6376{ .mfi 6377 nop.m 0 6378 fma.s1 fRes2L = fRes1L, f1, fRes2L 6379 nop.i 0 6380} 6381{ .mfi 6382 nop.m 0 6383 fma.s1 fPol = fA3L, fA15, fA9 6384 nop.i 0 6385} 6386;; 6387{ .mfi 6388 nop.m 0 6389 fma.s1 f8 = FR_rsq , fPol, fRes2H 6390 nop.i 0 6391} 6392{ .mfi 6393 nop.m 0 6394 fma.s1 fPol = fPol, FR_rsq, f0 6395 nop.i 0 6396} 6397;; 6398{ .mfi 6399 nop.m 0 6400 fms.s1 fRes1L = fRes2H, f1, f8 6401 nop.i 0 6402} 6403;; 6404{ .mfi 6405 nop.m 0 6406 fma.s1 fRes1L = fRes1L, f1, fPol 6407 nop.i 0 6408} 6409;; 6410{.mfi 6411 nop.m 0 6412 fma.s1 fRes1L = fRes1L, f1, fRes2L 6413 nop.i 0 6414} 6415;; 6416{ .mfb 6417 nop.m 0 6418 fma.s0 f8 = f8, f1, fRes1L 6419 // exit for 8.0 <= x <= 10.0 6420 br.ret.sptk b0 6421} 6422;; 6423 6424// here if 4.0 <=x < 8.0 6425.align 32 6426lgammal_4_8: 6427{ .mfi 6428 addl rPolDataPtr= @ltoff(lgammal_4_8_data),gp 6429 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf 6430 adds rSgnGam = 1, r0 6431} 6432;; 6433{ .mfi 6434 ld8 rPolDataPtr = [rPolDataPtr] 6435 nop.f 0 6436 nop.i 0 6437} 6438;; 6439 6440{ .mfb 6441 adds rTmpPtr = 160, rPolDataPtr 6442 nop.f 0 6443 // branch to special path which computes polynomial of 25th degree 6444 br.sptk lgamma_polynom25 6445} 6446;; 6447 6448// here if 2.25 <=x < 4.0 6449.align 32 6450lgammal_2Q_4: 6451{ .mfi 6452 addl rPolDataPtr= @ltoff(lgammal_2Q_4_data),gp 6453 fms.s1 FR_FracX = fSignifX, f1, FR_MHalf 6454 adds rSgnGam = 1, r0 6455} 6456;; 6457{ .mfi 6458 ld8 rPolDataPtr = [rPolDataPtr] 6459 nop.f 0 6460 nop.i 0 6461} 6462;; 6463 6464{ .mfb 6465 adds rTmpPtr = 160, rPolDataPtr 6466 nop.f 0 6467 // branch to special path which computes polynomial of 25th degree 6468 br.sptk lgamma_polynom25 6469} 6470;; 6471 6472// here if 0.5 <= |x| < 0.75 6473.align 32 6474lgammal_half_3Q: 6475.pred.rel "mutex", p14, p15 6476{ .mfi 6477(p14) addl rPolDataPtr= @ltoff(lgammal_half_3Q_data),gp 6478 // FR_FracX = x - 0.625 for positive x 6479(p14) fms.s1 FR_FracX = f8, f1, FR_FracX 6480(p14) adds rSgnGam = 1, r0 6481} 6482{ .mfi 6483(p15) addl rPolDataPtr= @ltoff(lgammal_half_3Q_neg_data),gp 6484 // FR_FracX = x + 0.625 for negative x 6485(p15) fma.s1 FR_FracX = f8, f1, FR_FracX 6486(p15) adds rSgnGam = -1, r0 6487} 6488;; 6489{ .mfi 6490 ld8 rPolDataPtr = [rPolDataPtr] 6491 nop.f 0 6492 nop.i 0 6493} 6494;; 6495{ .mfb 6496 adds rTmpPtr = 160, rPolDataPtr 6497 nop.f 0 6498 // branch to special path which computes polynomial of 25th degree 6499 br.sptk lgamma_polynom25 6500} 6501;; 6502// here if 1.3125 <= x < 1.5625 6503.align 32 6504lgammal_loc_min: 6505{ .mfi 6506 adds rSgnGam = 1, r0 6507 nop.f 0 6508 nop.i 0 6509} 6510{ .mfb 6511 adds rTmpPtr = 160, rPolDataPtr 6512 fms.s1 FR_FracX = f8, f1, fA5L 6513 br.sptk lgamma_polynom25 6514} 6515;; 6516// here if -2.605859375 <= x < -2.5 6517// special polynomial approximation used since neither "near root" 6518// approximation nor reflection formula give satisfactory accuracy on 6519// this range 6520.align 32 6521_neg2andHalf: 6522{ .mfi 6523 addl rPolDataPtr= @ltoff(lgammal_neg2andHalf_data),gp 6524 fma.s1 FR_FracX = fB20, f1, f8 // 2.5 + x 6525 adds rSgnGam = -1, r0 6526} 6527;; 6528{.mfi 6529 ld8 rPolDataPtr = [rPolDataPtr] 6530 nop.f 0 6531 nop.i 0 6532} 6533;; 6534{ .mfb 6535 adds rTmpPtr = 160, rPolDataPtr 6536 nop.f 0 6537 // branch to special path which computes polynomial of 25th degree 6538 br.sptk lgamma_polynom25 6539} 6540;; 6541 6542// here if -0.5 < x <= -0.40625 6543.align 32 6544lgammal_near_neg_half: 6545{ .mmf 6546 addl rPolDataPtr= @ltoff(lgammal_near_neg_half_data),gp 6547 setf.exp FR_FracX = rExpHalf 6548 nop.f 0 6549} 6550;; 6551{ .mfi 6552 ld8 rPolDataPtr = [rPolDataPtr] 6553 nop.f 0 6554 adds rSgnGam = -1, r0 6555} 6556;; 6557{ .mfb 6558 adds rTmpPtr = 160, rPolDataPtr 6559 fma.s1 FR_FracX = FR_FracX, f1, f8 6560 // branch to special path which computes polynomial of 25th degree 6561 br.sptk lgamma_polynom25 6562} 6563;; 6564 6565// here if there an answer is P25(x) 6566// rPolDataPtr, rTmpPtr point to coefficients 6567// x is in FR_FracX register 6568.align 32 6569lgamma_polynom25: 6570{ .mfi 6571 ldfpd fA3, fA0L = [rPolDataPtr], 16 // A3 6572 nop.f 0 6573 cmp.eq p6, p7 = 4, rSgnGamSize 6574} 6575{ .mfi 6576 ldfpd fA18, fA19 = [rTmpPtr], 16 // D7, D6 6577 nop.f 0 6578 nop.i 0 6579} 6580;; 6581{ .mfi 6582 ldfpd fA1, fA1L = [rPolDataPtr], 16 // A1 6583 nop.f 0 6584 nop.i 0 6585} 6586{ .mfi 6587 ldfpd fA16, fA17 = [rTmpPtr], 16 // D4, D5 6588 nop.f 0 6589} 6590;; 6591{ .mfi 6592 ldfpd fA12, fA13 = [rPolDataPtr], 16 // D0, D1 6593 nop.f 0 6594 nop.i 0 6595} 6596{ .mfi 6597 ldfpd fA14, fA15 = [rTmpPtr], 16 // D2, D3 6598 nop.f 0 6599 nop.i 0 6600} 6601;; 6602{ .mfi 6603 ldfpd fA24, fA25 = [rPolDataPtr], 16 // C21, C20 6604 nop.f 0 6605 nop.i 0 6606} 6607{ .mfi 6608 ldfpd fA22, fA23 = [rTmpPtr], 16 // C19, C18 6609 nop.f 0 6610 nop.i 0 6611} 6612;; 6613{ .mfi 6614 ldfpd fA2, fA2L = [rPolDataPtr], 16 // A2 6615 fma.s1 fA4L = FR_FracX, FR_FracX, f0 // x^2 6616 nop.i 0 6617} 6618{ .mfi 6619 ldfpd fA20, fA21 = [rTmpPtr], 16 // C17, C16 6620 nop.f 0 6621 nop.i 0 6622} 6623;; 6624{ .mfi 6625 ldfe fA11 = [rTmpPtr], 16 // E7 6626 nop.f 0 6627 nop.i 0 6628} 6629{ .mfi 6630 ldfpd fA0, fA3L = [rPolDataPtr], 16 // A0 6631 nop.f 0 6632 nop.i 0 6633};; 6634{ .mfi 6635 ldfe fA10 = [rPolDataPtr], 16 // E6 6636 nop.f 0 6637 nop.i 0 6638} 6639{ .mfi 6640 ldfe fA9 = [rTmpPtr], 16 // E5 6641 nop.f 0 6642 nop.i 0 6643} 6644;; 6645{ .mmf 6646 ldfe fA8 = [rPolDataPtr], 16 // E4 6647 ldfe fA7 = [rTmpPtr], 16 // E3 6648 nop.f 0 6649} 6650;; 6651{ .mmf 6652 ldfe fA6 = [rPolDataPtr], 16 // E2 6653 ldfe fA5 = [rTmpPtr], 16 // E1 6654 nop.f 0 6655} 6656;; 6657{ .mfi 6658 ldfe fA4 = [rPolDataPtr], 16 // E0 6659 fma.s1 fA5L = fA4L, fA4L, f0 // x^4 6660 nop.i 0 6661} 6662{ .mfi 6663 nop.m 0 6664 fms.s1 fB2 = FR_FracX, FR_FracX, fA4L // x^2 - <x^2> 6665 nop.i 0 6666} 6667;; 6668{ .mfi 6669 // store signgam if size of variable is 4 bytes 6670(p6) st4 [rSgnGamAddr] = rSgnGam 6671 fma.s1 fRes4H = fA3, FR_FracX, f0 // (A3*x)hi 6672 nop.i 0 6673} 6674{ .mfi 6675 // store signgam if size of variable is 8 bytes 6676(p7) st8 [rSgnGamAddr] = rSgnGam 6677 fma.s1 fA19 = fA19, FR_FracX, fA18 // D7*x + D6 6678 nop.i 0 6679} 6680;; 6681{ .mfi 6682 nop.m 0 6683 fma.s1 fResH = fA1, FR_FracX, f0 // (A1*x)hi 6684 nop.i 0 6685} 6686{ .mfi 6687 nop.m 0 6688 fma.s1 fB6 = fA1L, FR_FracX, fA0L // A1L*x + A0L 6689 nop.i 0 6690} 6691;; 6692{ .mfi 6693 nop.m 0 6694 fma.s1 fA17 = fA17, FR_FracX, fA16 // D5*x + D4 6695 nop.i 0 6696} 6697{ .mfi 6698 nop.m 0 6699 fma.s1 fA15 = fA15, FR_FracX, fA14 // D3*x + D2 6700 nop.i 0 6701} 6702;; 6703{ .mfi 6704 nop.m 0 6705 fma.s1 fA25 = fA25, FR_FracX, fA24 // C21*x + C20 6706 nop.i 0 6707} 6708{ .mfi 6709 nop.m 0 6710 fma.s1 fA13 = fA13, FR_FracX, fA12 // D1*x + D0 6711 nop.i 0 6712} 6713;; 6714{ .mfi 6715 nop.m 0 6716 fma.s1 fA23 = fA23, FR_FracX, fA22 // C19*x + C18 6717 nop.i 0 6718} 6719{ .mfi 6720 nop.m 0 6721 fma.s1 fA21 = fA21, FR_FracX, fA20 // C17*x + C16 6722 nop.i 0 6723} 6724;; 6725{ .mfi 6726 nop.m 0 6727 fms.s1 fRes4L = fA3, FR_FracX, fRes4H // delta((A3*x)hi) 6728 nop.i 0 6729} 6730{ .mfi 6731 nop.m 0 6732 fadd.s1 fRes2H = fRes4H, fA2 // (A3*x + A2)hi 6733 nop.i 0 6734} 6735;; 6736{ .mfi 6737 nop.m 0 6738 fms.s1 fResL = fA1, FR_FracX, fResH // d(A1*x) 6739 nop.i 0 6740} 6741{ .mfi 6742 nop.m 0 6743 fadd.s1 fRes1H = fResH, fA0 // (A1*x + A0)hi 6744 nop.i 0 6745} 6746;; 6747{ .mfi 6748 nop.m 0 6749 fma.s1 fA19 = fA19, fA4L, fA17 // Dhi 6750 nop.i 0 6751} 6752{ .mfi 6753 nop.m 0 6754 fma.s1 fA11 = fA11, FR_FracX, fA10 // E7*x + E6 6755 nop.i 0 6756} 6757;; 6758{ .mfi 6759 nop.m 0 6760 // Doing this to raise inexact flag 6761 fma.s0 fA10 = fA0, fA0, f0 6762 nop.i 0 6763} 6764;; 6765{ .mfi 6766 nop.m 0 6767 fma.s1 fA15 = fA15, fA4L, fA13 // Dlo 6768 nop.i 0 6769} 6770{ .mfi 6771 nop.m 0 6772 // (C21*x + C20)*x^2 + C19*x + C18 6773 fma.s1 fA25 = fA25, fA4L, fA23 6774 nop.i 0 6775} 6776;; 6777{ .mfi 6778 nop.m 0 6779 fma.s1 fA9 = fA9, FR_FracX, fA8 // E5*x + E4 6780 nop.i 0 6781} 6782{ .mfi 6783 nop.m 0 6784 fma.s1 fA7 = fA7, FR_FracX, fA6 // E3*x + E2 6785 nop.i 0 6786} 6787;; 6788{ .mfi 6789 nop.m 0 6790 fma.s1 fRes4L = fA3L, FR_FracX, fRes4L // (A3*x)lo 6791 nop.i 0 6792} 6793{ .mfi 6794 nop.m 0 6795 fsub.s1 fRes2L = fA2, fRes2H 6796 nop.i 0 6797} 6798;; 6799{ .mfi 6800 nop.m 0 6801 fadd.s1 fResL = fResL, fB6 // (A1L*x + A0L) + d(A1*x) 6802 nop.i 0 6803} 6804{ .mfi 6805 nop.m 0 6806 fsub.s1 fRes1L = fA0, fRes1H 6807 nop.i 0 6808} 6809;; 6810{ .mfi 6811 nop.m 0 6812 fma.s1 fA5 = fA5, FR_FracX, fA4 // E1*x + E0 6813 nop.i 0 6814} 6815{ .mfi 6816 nop.m 0 6817 fma.s1 fB8 = fA5L, fA5L, f0 // x^8 6818 nop.i 0 6819} 6820;; 6821{ .mfi 6822 nop.m 0 6823 // ((C21*x + C20)*x^2 + C19*x + C18)*x^2 + C17*x + C16 6824 fma.s1 fA25 = fA25, fA4L, fA21 6825 nop.i 0 6826} 6827{ .mfi 6828 nop.m 0 6829 fma.s1 fA19 = fA19, fA5L, fA15 // D 6830 nop.i 0 6831} 6832;; 6833{ .mfi 6834 nop.m 0 6835 fma.s1 fA11 = fA11, fA4L, fA9 // Ehi 6836 nop.i 0 6837} 6838;; 6839{ .mfi 6840 nop.m 0 6841 fadd.s1 fRes2L = fRes2L, fRes4H 6842 nop.i 0 6843} 6844{ .mfi 6845 nop.m 0 6846 fadd.s1 fRes4L = fRes4L, fA2L // (A3*x)lo + A2L 6847 nop.i 0 6848} 6849;; 6850{ .mfi 6851 nop.m 0 6852 fma.s1 fRes3H = fRes2H, fA4L, f0 // ((A3*x + A2)*x^2)hi 6853 nop.i 0 6854} 6855{ .mfi 6856 nop.m 0 6857 fadd.s1 fRes1L = fRes1L, fResH 6858 nop.i 0 6859} 6860;; 6861{ .mfi 6862 nop.m 0 6863 fma.s1 fRes3L = fRes2H, fB2, f0 // (A3*x + A2)hi*d(x^2) 6864 nop.i 0 6865} 6866{ .mfi 6867 nop.m 0 6868 fma.s1 fA7 = fA7, fA4L, fA5 // Elo 6869 nop.i 0 6870} 6871;; 6872{ .mfi 6873 nop.m 0 6874 fma.s1 fA25 = fA25, fB8, fA19 // C*x^8 + D 6875 nop.i 0 6876} 6877;; 6878{ .mfi 6879 nop.m 0 6880 fadd.s1 fRes2L = fRes2L, fRes4L // (A3*x + A2)lo 6881 nop.i 0 6882} 6883;; 6884{ .mfi 6885 nop.m 0 6886 fms.s1 fB4 = fRes2H, fA4L, fRes3H // d((A3*x + A2)*x^2)) 6887 nop.i 0 6888} 6889{ .mfi 6890 nop.m 0 6891 fadd.s1 fRes1L = fRes1L, fResL // (A1*x + A0)lo 6892 nop.i 0 6893} 6894;; 6895{ .mfi 6896 nop.m 0 6897 fadd.s1 fB20 = fRes3H, fRes1H // Phi 6898 nop.i 0 6899} 6900{ .mfi 6901 nop.m 0 6902 fma.s1 fA11 = fA11, fA5L, fA7 // E 6903 nop.i 0 6904} 6905;; 6906{ .mfi 6907 nop.m 0 6908 // ( (A3*x + A2)lo*<x^2> + (A3*x + A2)hi*d(x^2)) 6909 fma.s1 fRes3L = fRes2L, fA4L, fRes3L 6910 nop.i 0 6911} 6912;; 6913{ .mfi 6914 nop.m 0 6915 // d((A3*x + A2)*x^2)) + (A1*x + A0)lo 6916 fadd.s1 fRes1L = fRes1L, fB4 6917 nop.i 0 6918} 6919;; 6920{ .mfi 6921 nop.m 0 6922 fsub.s1 fB18 = fRes1H, fB20 6923 nop.i 0 6924} 6925{ .mfi 6926 nop.m 0 6927 fma.s1 fPol = fA25, fB8, fA11 6928 nop.i 0 6929} 6930;; 6931{ .mfi 6932 nop.m 0 6933 fadd.s1 fRes1L = fRes1L, fRes3L 6934 nop.i 0 6935} 6936;; 6937{ .mfi 6938 nop.m 0 6939 fadd.s1 fB18 = fB18, fRes3H 6940 nop.i 0 6941} 6942{ .mfi 6943 nop.m 0 6944 fma.s1 fRes4H = fPol, fA5L, fB20 6945 nop.i 0 6946} 6947;; 6948{ .mfi 6949 nop.m 0 6950 fma.s1 fPolL = fPol, fA5L, f0 6951 nop.i 0 6952} 6953;; 6954{ .mfi 6955 nop.m 0 6956 fadd.s1 fB18 = fB18, fRes1L // Plo 6957 nop.i 0 6958} 6959{ .mfi 6960 nop.m 0 6961 fsub.s1 fRes4L = fB20, fRes4H 6962 nop.i 0 6963} 6964;; 6965{ .mfi 6966 nop.m 0 6967 fadd.s1 fB18 = fB18, fPolL 6968 nop.i 0 6969} 6970;; 6971{ .mfi 6972 nop.m 0 6973 fadd.s1 fRes4L = fRes4L, fB18 6974 nop.i 0 6975} 6976;; 6977{ .mfb 6978 nop.m 0 6979 fma.s0 f8 = fRes4H, f1, fRes4L 6980 // P25(x) computed, exit here 6981 br.ret.sptk b0 6982} 6983;; 6984 6985 6986// here if 0.75 <= x < 1.3125 6987.align 32 6988lgammal_03Q_1Q: 6989{ .mfi 6990 addl rPolDataPtr= @ltoff(lgammal_03Q_1Q_data),gp 6991 fma.s1 FR_FracX = fA5L, f1, f0 // x 6992 adds rSgnGam = 1, r0 6993} 6994{ .mfi 6995 nop.m 0 6996 fma.s1 fB4 = fA5L, fA5L, f0 // x^2 6997 nop.i 0 6998} 6999;; 7000{ .mfi 7001 ld8 rPolDataPtr = [rPolDataPtr] 7002 nop.f 0 7003 nop.i 0 7004} 7005;; 7006{ .mfb 7007 adds rTmpPtr = 144, rPolDataPtr 7008 nop.f 0 7009 br.sptk lgamma_polynom24x 7010} 7011;; 7012 7013// here if 1.5625 <= x < 2.25 7014.align 32 7015lgammal_13Q_2Q: 7016{ .mfi 7017 addl rPolDataPtr= @ltoff(lgammal_13Q_2Q_data),gp 7018 fma.s1 FR_FracX = fB4, f1, f0 // x 7019 adds rSgnGam = 1, r0 7020} 7021{ .mfi 7022 nop.m 0 7023 fma.s1 fB4 = fB4, fB4, f0 // x^2 7024 nop.i 0 7025} 7026;; 7027{ .mfi 7028 ld8 rPolDataPtr = [rPolDataPtr] 7029 nop.f 0 7030 nop.i 0 7031} 7032;; 7033{ .mfb 7034 adds rTmpPtr = 144, rPolDataPtr 7035 nop.f 0 7036 br.sptk lgamma_polynom24x 7037} 7038;; 7039 7040// here if result is Pol24(x) 7041// x is in FR_FracX, 7042// rPolDataPtr, rTmpPtr point to coefficients 7043.align 32 7044lgamma_polynom24x: 7045{ .mfi 7046 ldfpd fA4, fA2L = [rPolDataPtr], 16 7047 nop.f 0 7048 cmp.eq p6, p7 = 4, rSgnGamSize 7049} 7050{ .mfi 7051 ldfpd fA23, fA24 = [rTmpPtr], 16 // C18, C19 7052 nop.f 0 7053 nop.i 0 7054} 7055;; 7056{ .mfi 7057 ldfpd fA3, fA1L = [rPolDataPtr], 16 7058 fma.s1 fA5L = fB4, fB4, f0 // x^4 7059 nop.i 0 7060} 7061{ .mfi 7062 ldfpd fA19, fA20 = [rTmpPtr], 16 // D6, D7 7063 fms.s1 fB2 = FR_FracX, FR_FracX, fB4 // x^2 - <x^2> 7064 nop.i 0 7065} 7066;; 7067{ .mmf 7068 ldfpd fA15, fA16 = [rPolDataPtr], 16 // D2, D3 7069 ldfpd fA17, fA18 = [rTmpPtr], 16 // D4, D5 7070 nop.f 0 7071} 7072;; 7073{ .mmf 7074 ldfpd fA13, fA14 = [rPolDataPtr], 16 // D0, D1 7075 ldfpd fA12, fA21 = [rTmpPtr], 16 // E7, C16 7076 nop.f 0 7077} 7078;; 7079{ .mfi 7080 ldfe fA11 = [rPolDataPtr], 16 // E6 7081 nop.f 0 7082 nop.i 0 7083} 7084{ .mfi 7085 ldfe fA10 = [rTmpPtr], 16 // E5 7086 nop.f 0 7087 nop.i 0 7088} 7089;; 7090{ .mfi 7091 ldfpd fA2, fA4L = [rPolDataPtr], 16 7092 nop.f 0 7093 nop.i 0 7094} 7095{ .mfi 7096 ldfpd fA1, fA3L = [rTmpPtr], 16 7097 nop.f 0 7098 nop.i 0 7099} 7100;; 7101{ .mfi 7102 ldfpd fA22, fA25 = [rPolDataPtr], 16 // C17, C20 7103 fma.s1 fA0 = fA5L, fA5L, f0 // x^8 7104 nop.i 0 7105} 7106{ .mfi 7107 nop.m 0 7108 fma.s1 fA0L = fA5L, FR_FracX, f0 // x^5 7109 nop.i 0 7110} 7111;; 7112{ .mmf 7113 ldfe fA9 = [rPolDataPtr], 16 // E4 7114 ldfe fA8 = [rTmpPtr], 16 // E3 7115 nop.f 0 7116} 7117;; 7118{ .mmf 7119 ldfe fA7 = [rPolDataPtr], 16 // E2 7120 ldfe fA6 = [rTmpPtr], 16 // E1 7121 nop.f 0 7122} 7123;; 7124{ .mfi 7125 ldfe fA5 = [rTmpPtr], 16 // E0 7126 fma.s1 fRes4H = fA4, fB4, f0 // A4*<x^2> 7127 nop.i 0 7128} 7129{ .mfi 7130 nop.m 0 7131 fma.s1 fPol = fA24, FR_FracX, fA23 // C19*x + C18 7132 nop.i 0 7133} 7134;; 7135{ .mfi 7136 // store signgam if size of variable is 4 bytes 7137(p6) st4 [rSgnGamAddr] = rSgnGam 7138 fma.s1 fRes1H = fA3, fB4, f0 // A3*<x^2> 7139 nop.i 0 7140} 7141{ .mfi 7142 // store signgam if size of variable is 8 bytes 7143(p7) st8 [rSgnGamAddr] = rSgnGam 7144 fma.s1 fA1L = fA3, fB2,fA1L // A3*d(x^2) + A1L 7145 nop.i 0 7146} 7147;; 7148{ .mfi 7149 nop.m 0 7150 fma.s1 fA20 = fA20, FR_FracX, fA19 // D7*x + D6 7151 nop.i 0 7152} 7153{ .mfi 7154 nop.m 0 7155 fma.s1 fA18 = fA18, FR_FracX, fA17 // D5*x + D4 7156 nop.i 0 7157} 7158;; 7159{ .mfi 7160 nop.m 0 7161 fma.s1 fA16 = fA16, FR_FracX, fA15 // D3*x + D2 7162 nop.i 0 7163} 7164{ .mfi 7165 nop.m 0 7166 fma.s1 fA14 = fA14, FR_FracX, fA13 // D1*x + D0 7167 nop.i 0 7168} 7169;; 7170{ .mfi 7171 nop.m 0 7172 fma.s1 fA2L = fA4, fB2,fA2L // A4*d(x^2) + A2L 7173 nop.i 0 7174} 7175{ .mfi 7176 nop.m 0 7177 fma.s1 fA12 = fA12, FR_FracX, fA11 // E7*x + E6 7178 nop.i 0 7179} 7180;; 7181{ .mfi 7182 nop.m 0 7183 fms.s1 fRes2L = fA4, fB4, fRes4H // delta(A4*<x^2>) 7184 nop.i 0 7185} 7186{ .mfi 7187 nop.m 0 7188 fadd.s1 fRes2H = fRes4H, fA2 // A4*<x^2> + A2 7189 nop.i 0 7190} 7191;; 7192{ .mfi 7193 nop.m 0 7194 fms.s1 fRes3L = fA3, fB4, fRes1H // delta(A3*<x^2>) 7195 nop.i 0 7196} 7197{ .mfi 7198 nop.m 0 7199 fadd.s1 fRes3H = fRes1H, fA1 // A3*<x^2> + A1 7200 nop.i 0 7201} 7202;; 7203{ .mfi 7204 nop.m 0 7205 fma.s1 fA20 = fA20, fB4, fA18 // (D7*x + D6)*x^2 + D5*x + D4 7206 nop.i 0 7207} 7208{ .mfi 7209 nop.m 0 7210 fma.s1 fA22 = fA22, FR_FracX, fA21 // C17*x + C16 7211 nop.i 0 7212} 7213;; 7214{ .mfi 7215 nop.m 0 7216 fma.s1 fA16 = fA16, fB4, fA14 // (D3*x + D2)*x^2 + D1*x + D0 7217 nop.i 0 7218} 7219{ .mfi 7220 nop.m 0 7221 fma.s1 fPol = fA25, fB4, fPol // C20*x^2 + C19*x + C18 7222 nop.i 0 7223} 7224;; 7225{ .mfi 7226 nop.m 0 7227 fma.s1 fA2L = fA4L, fB4, fA2L // A4L*<x^2> + A4*d(x^2) + A2L 7228 nop.i 0 7229} 7230{ .mfi 7231 nop.m 0 7232 fma.s1 fA1L = fA3L, fB4, fA1L // A3L*<x^2> + A3*d(x^2) + A1L 7233 nop.i 0 7234} 7235;; 7236{ .mfi 7237 nop.m 0 7238 fsub.s1 fRes4L = fA2, fRes2H // d1 7239 nop.i 0 7240} 7241{ .mfi 7242 nop.m 0 7243 fma.s1 fResH = fRes2H, fB4, f0 // (A4*<x^2> + A2)*x^2 7244 nop.i 0 7245} 7246;; 7247{ .mfi 7248 nop.m 0 7249 fsub.s1 fRes1L = fA1, fRes3H // d1 7250 nop.i 0 7251} 7252{ .mfi 7253 nop.m 0 7254 fma.s1 fB6 = fRes3H, FR_FracX, f0 // (A3*<x^2> + A1)*x 7255 nop.i 0 7256} 7257;; 7258{ .mfi 7259 nop.m 0 7260 fma.s1 fA10 = fA10, FR_FracX, fA9 // E5*x + E4 7261 nop.i 0 7262} 7263{ .mfi 7264 nop.m 0 7265 fma.s1 fA8 = fA8, FR_FracX, fA7 // E3*x + E2 7266 nop.i 0 7267} 7268;; 7269{ .mfi 7270 nop.m 0 7271 // (C20*x^2 + C19*x + C18)*x^2 + C17*x + C16 7272 fma.s1 fPol = fPol, fB4, fA22 7273 nop.i 0 7274} 7275{ .mfi 7276 nop.m 0 7277 fma.s1 fA6 = fA6, FR_FracX, fA5 // E1*x + E0 7278 nop.i 0 7279} 7280;; 7281{ .mfi 7282 nop.m 0 7283 // A4L*<x^2> + A4*d(x^2) + A2L + delta(A4*<x^2>) 7284 fadd.s1 fRes2L = fA2L, fRes2L 7285 nop.i 0 7286} 7287{ .mfi 7288 nop.m 0 7289 // A3L*<x^2> + A3*d(x^2) + A1L + delta(A3*<x^2>) 7290 fadd.s1 fRes3L = fA1L, fRes3L 7291 nop.i 0 7292} 7293;; 7294{ .mfi 7295 nop.m 0 7296 fadd.s1 fRes4L = fRes4L, fRes4H // d2 7297 nop.i 0 7298} 7299{ .mfi 7300 nop.m 0 7301 fms.s1 fResL = fRes2H, fB4, fResH // d(A4*<x^2> + A2)*x^2) 7302 nop.i 0 7303} 7304;; 7305{ .mfi 7306 nop.m 0 7307 fadd.s1 fRes1L = fRes1L, fRes1H // d2 7308 nop.i 0 7309} 7310{ .mfi 7311 nop.m 0 7312 fms.s1 fB8 = fRes3H, FR_FracX, fB6 // d((A3*<x^2> + A1)*x) 7313 nop.i 0 7314} 7315;; 7316{ .mfi 7317 nop.m 0 7318 fadd.s1 fB10 = fResH, fB6 // (A4*x^4 + .. + A1*x)hi 7319 nop.i 0 7320} 7321{ .mfi 7322 nop.m 0 7323 fma.s1 fA12 = fA12, fB4, fA10 // Ehi 7324 nop.i 0 7325} 7326;; 7327{ .mfi 7328 nop.m 0 7329 // ((D7*x + D6)*x^2 + D5*x + D4)*x^4 + (D3*x + D2)*x^2 + D1*x + D0 7330 fma.s1 fA20 = fA20, fA5L, fA16 7331 nop.i 0 7332} 7333{ .mfi 7334 nop.m 0 7335 fma.s1 fA8 = fA8, fB4, fA6 // Elo 7336 nop.i 0 7337} 7338;; 7339{ .mfi 7340 nop.m 0 7341 fadd.s1 fRes2L = fRes2L, fRes4L // (A4*<x^2> + A2)lo 7342 nop.i 0 7343} 7344{ .mfi 7345 nop.m 0 7346 // d(A4*<x^2> + A2)*x^2) + A4*<x^2> + A2)*d(x^2) 7347 fma.s1 fResL = fRes2H, fB2, fResL 7348 nop.i 0 7349} 7350;; 7351{ .mfi 7352 nop.m 0 7353 fadd.s1 fRes3L = fRes3L, fRes1L // (A4*<x^2> + A2)lo 7354 nop.i 0 7355} 7356;; 7357{ .mfi 7358 nop.m 0 7359 fsub.s1 fB12 = fB6, fB10 7360 nop.i 0 7361} 7362;; 7363{ .mfi 7364 nop.m 0 7365 fma.s1 fPol = fPol, fA0, fA20 // PolC*x^8 + PolD 7366 nop.i 0 7367} 7368{ .mfi 7369 nop.m 0 7370 fma.s1 fPolL = fA12, fA5L, fA8 // E 7371 nop.i 0 7372} 7373;; 7374{ .mfi 7375 nop.m 0 7376 fma.s1 fResL = fB4, fRes2L, fResL // ((A4*<x^2> + A2)*x^2)lo 7377 nop.i 0 7378} 7379;; 7380{ .mfi 7381 nop.m 0 7382 fma.s1 fRes3L = fRes3L, FR_FracX, fB8 // ((A3*<x^2> + A1)*x)lo 7383 nop.i 0 7384} 7385;; 7386{ .mfi 7387 nop.m 0 7388 fadd.s1 fB12 = fB12, fResH 7389 nop.i 0 7390} 7391;; 7392{ .mfi 7393 nop.m 0 7394 fma.s1 fPol = fPol, fA0, fPolL 7395 nop.i 0 7396} 7397;; 7398{ .mfi 7399 nop.m 0 7400 fadd.s1 fRes3L = fRes3L, fResL 7401 nop.i 0 7402} 7403;; 7404{ .mfi 7405 nop.m 0 7406 fma.s1 fRes2H = fPol, fA0L, fB10 7407 nop.i 0 7408} 7409;; 7410{ .mfi 7411 nop.m 0 7412 fadd.s1 fRes3L = fB12, fRes3L 7413 nop.i 0 7414} 7415;; 7416{ .mfi 7417 nop.m 0 7418 fsub.s1 fRes4L = fB10, fRes2H 7419 nop.i 0 7420} 7421;; 7422{ .mfi 7423 nop.m 0 7424 fma.s1 fRes4L = fPol, fA0L, fRes4L 7425 nop.i 0 7426} 7427;; 7428{ .mfi 7429 nop.m 0 7430 fadd.s1 fRes4L = fRes4L, fRes3L 7431 nop.i 0 7432} 7433;; 7434{ .mfb 7435 nop.m 0 7436 // final result for all paths for which the result is Pol24(x) 7437 fma.s0 f8 = fRes2H, f1, fRes4L 7438 // here is the exit for all paths for which the result is Pol24(x) 7439 br.ret.sptk b0 7440} 7441;; 7442 7443 7444// here if x is natval, nan, +/-inf, +/-0, or denormal 7445.align 32 7446lgammal_spec: 7447{ .mfi 7448 nop.m 0 7449 fclass.m p9, p0 = f8, 0xB // +/-denormals 7450 nop.i 0 7451};; 7452{ .mfi 7453 nop.m 0 7454 fclass.m p6, p0 = f8, 0x1E1 // Test x for natval, nan, +inf 7455 nop.i 0 7456};; 7457{ .mfb 7458 nop.m 0 7459 fclass.m p7, p0 = f8, 0x7 // +/-0 7460(p9) br.cond.sptk lgammal_denormal_input 7461};; 7462{ .mfb 7463 nop.m 0 7464 nop.f 0 7465 // branch out if x is natval, nan, +inf 7466(p6) br.cond.spnt lgammal_nan_pinf 7467};; 7468{ .mfb 7469 nop.m 0 7470 nop.f 0 7471(p7) br.cond.spnt lgammal_singularity 7472};; 7473// if we are still here then x = -inf 7474{ .mfi 7475 cmp.eq p6, p7 = 4, rSgnGamSize 7476 nop.f 0 7477 adds rSgnGam = 1, r0 7478};; 7479{ .mfi 7480 // store signgam if size of variable is 4 bytes 7481(p6) st4 [rSgnGamAddr] = rSgnGam 7482 nop.f 0 7483 nop.i 0 7484} 7485{ .mfb 7486 // store signgam if size of variable is 8 bytes 7487(p7) st8 [rSgnGamAddr] = rSgnGam 7488 fma.s0 f8 = f8,f8,f0 // return +inf, no call to error support 7489 br.ret.spnt b0 7490};; 7491 7492// here if x is NaN, NatVal or +INF 7493.align 32 7494lgammal_nan_pinf: 7495{ .mfi 7496 cmp.eq p6, p7 = 4, rSgnGamSize 7497 nop.f 0 7498 adds rSgnGam = 1, r0 7499} 7500;; 7501{ .mfi 7502 // store signgam if size of variable is 4 bytes 7503(p6) st4 [rSgnGamAddr] = rSgnGam 7504 fma.s0 f8 = f8,f1,f8 // return x+x if x is natval, nan, +inf 7505 nop.i 0 7506} 7507{ .mfb 7508 // store signgam if size of variable is 8 bytes 7509(p7) st8 [rSgnGamAddr] = rSgnGam 7510 nop.f 0 7511 br.ret.sptk b0 7512} 7513;; 7514 7515// here if x denormal or unnormal 7516.align 32 7517lgammal_denormal_input: 7518{ .mfi 7519 nop.m 0 7520 fma.s0 fResH = f1, f1, f8 // raise denormal exception 7521 nop.i 0 7522} 7523{ .mfi 7524 nop.m 0 7525 fnorm.s1 f8 = f8 // normalize input value 7526 nop.i 0 7527} 7528;; 7529{ .mfi 7530 getf.sig rSignifX = f8 7531 fmerge.se fSignifX = f1, f8 7532 nop.i 0 7533} 7534{ .mfi 7535 getf.exp rSignExpX = f8 7536 fcvt.fx.s1 fXint = f8 // Convert arg to int (int repres. in FR) 7537 nop.i 0 7538} 7539;; 7540{ .mfi 7541 getf.exp rSignExpX = f8 7542 fcmp.lt.s1 p15, p14 = f8, f0 7543 nop.i 0 7544} 7545;; 7546{ .mfb 7547 and rExpX = rSignExpX, r17Ones 7548 fmerge.s fAbsX = f1, f8 // |x| 7549 br.cond.sptk _deno_back_to_main_path 7550} 7551;; 7552 7553 7554// here if overflow (x > overflow_bound) 7555.align 32 7556lgammal_overflow: 7557{ .mfi 7558 addl r8 = 0x1FFFE, r0 7559 nop.f 0 7560 cmp.eq p6, p7 = 4, rSgnGamSize 7561} 7562{ .mfi 7563 adds rSgnGam = 1, r0 7564 nop.f 0 7565 nop.i 0 7566} 7567;; 7568{ .mfi 7569 setf.exp f9 = r8 7570 fmerge.s FR_X = f8,f8 7571 mov GR_Parameter_TAG = 102 // overflow 7572};; 7573{ .mfi 7574 // store signgam if size of variable is 4 bytes 7575(p6) st4 [rSgnGamAddr] = rSgnGam 7576 nop.f 0 7577 nop.i 0 7578} 7579{ .mfb 7580 // store signgam if size of variable is 8 bytes 7581(p7) st8 [rSgnGamAddr] = rSgnGam 7582 fma.s0 FR_RESULT = f9,f9,f0 // Set I,O and +INF result 7583 br.cond.sptk __libm_error_region 7584};; 7585 7586// here if x is negative integer or +/-0 (SINGULARITY) 7587.align 32 7588lgammal_singularity: 7589{ .mfi 7590 adds rSgnGam = 1, r0 7591 fclass.m p8,p0 = f8,0x6 // is x -0? 7592 mov GR_Parameter_TAG = 103 // negative 7593} 7594{ .mfi 7595 cmp.eq p6, p7 = 4, rSgnGamSize 7596 fma.s1 FR_X = f0,f0,f8 7597 nop.i 0 7598};; 7599{ .mfi 7600(p8) sub rSgnGam = r0, rSgnGam 7601 nop.f 0 7602 nop.i 0 7603} 7604{ .mfi 7605 nop.m 0 7606 nop.f 0 7607 nop.i 0 7608};; 7609{ .mfi 7610 // store signgam if size of variable is 4 bytes 7611(p6) st4 [rSgnGamAddr] = rSgnGam 7612 nop.f 0 7613 nop.i 0 7614} 7615{ .mfb 7616 // store signgam if size of variable is 8 bytes 7617(p7) st8 [rSgnGamAddr] = rSgnGam 7618 frcpa.s0 FR_RESULT, p0 = f1, f0 7619 br.cond.sptk __libm_error_region 7620};; 7621 7622GLOBAL_LIBM_END(__libm_lgammal) 7623 7624 7625 7626LOCAL_LIBM_ENTRY(__libm_error_region) 7627.prologue 7628{ .mfi 7629 add GR_Parameter_Y=-32,sp // Parameter 2 value 7630 nop.f 0 7631.save ar.pfs,GR_SAVE_PFS 7632 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 7633} 7634{ .mfi 7635.fframe 64 7636 add sp=-64,sp // Create new stack 7637 nop.f 0 7638 mov GR_SAVE_GP=gp // Save gp 7639};; 7640{ .mmi 7641 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack 7642 add GR_Parameter_X = 16,sp // Parameter 1 address 7643.save b0, GR_SAVE_B0 7644 mov GR_SAVE_B0=b0 // Save b0 7645};; 7646.body 7647{ .mib 7648 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack 7649 add GR_Parameter_RESULT = 0,GR_Parameter_Y 7650 nop.b 0 // Parameter 3 address 7651} 7652{ .mib 7653 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack 7654 add GR_Parameter_Y = -16,GR_Parameter_Y 7655 br.call.sptk b0=__libm_error_support# // Call error handling function 7656};; 7657{ .mmi 7658 add GR_Parameter_RESULT = 48,sp 7659 nop.m 999 7660 nop.i 999 7661};; 7662{ .mmi 7663 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack 7664.restore sp 7665 add sp = 64,sp // Restore stack pointer 7666 mov b0 = GR_SAVE_B0 // Restore return address 7667};; 7668{ .mib 7669 mov gp = GR_SAVE_GP // Restore gp 7670 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 7671 br.ret.sptk b0 // Return 7672};; 7673 7674LOCAL_LIBM_END(__libm_error_region#) 7675 7676.type __libm_error_support#,@function 7677.global __libm_error_support# 7678