1.file "libm_lgamma.s" 2 3 4// Copyright (c) 2002 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT 25// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, 28// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code,and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39//********************************************************************* 40// 41// History: 42// 01/10/02 Initial version 43// 01/25/02 Corrected error tag numbers 44// 02/04/02 Added support of SIGN(GAMMA(x)) calculation 45// 05/20/02 Cleaned up namespace and sf0 syntax 46// 09/15/02 Fixed bug on the branch lgamma_negrecursion 47// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero 48// 02/10/03 Reordered header: .section, .global, .proc, .align 49// 07/22/03 Reformatted some data tables 50// 03/31/05 Reformatted delimiters between data tables 51// 52//********************************************************************* 53// 54//********************************************************************* 55// 56// Function: __libm_lgamma(double x, int* signgam, int szsigngam) 57// computes the principle value of the logarithm of the GAMMA function 58// of x. Signum of GAMMA(x) is stored to memory starting at the address 59// specified by the signgam. 60// 61//********************************************************************* 62// 63// Resources Used: 64// 65// Floating-Point Registers: f6-f15 66// f32-f122 67// 68// General Purpose Registers: 69// r8-r11 70// r14-r31 71// r32-r36 72// r37-r40 (Used to pass arguments to error handling routine) 73// 74// Predicate Registers: p6-p15 75// 76//********************************************************************* 77// 78// IEEE Special Conditions: 79// 80// __libm_lgamma(+inf) = +inf 81// __libm_lgamma(-inf) = QNaN 82// __libm_lgamma(+/-0) = +inf 83// __libm_lgamma(x<0, x - integer) = +inf 84// __libm_lgamma(SNaN) = QNaN 85// __libm_lgamma(QNaN) = QNaN 86// 87//********************************************************************* 88// 89// Overview 90// 91// The method consists of three cases. 92// 93// If 512 <= x < OVERFLOW_BOUNDARY use case lgamma_pstirling; 94// else if 1 < x < 512 use case lgamma_regular; 95// else if -17 < x < 1 use case lgamma_negrecursion; 96// else if -512 < x < -17 use case lgamma_negpoly; 97// else if x < -512 use case lgamma_negstirling; 98// else if x is close to negative 99// roots of ln(GAMMA(x)) use case lgamma_negroots; 100// 101// 102// Case 512 <= x < OVERFLOW_BOUNDARY 103// --------------------------------- 104// Here we use algorithm based on the Stirling formula: 105// ln(GAMMA(x)) = ln(sqrt(2*Pi)) + (x-0.5)ln(x) - x + (W2 + W4/x^2)/x 106// 107// Case 1 < x < 512 108// ---------------- 109// To calculate GAMMA(x) on this interval we use polynomial approximation 110// on following intervals [0.875; 1.25), [1.25; 1.75), [1.75, 2.25), 111// [2.25; 4), [2^i; 2^(i+1)), i=2..8 112// 113// Following variants of approximation and argument reduction are used: 114// 1. [0.875; 1.25) 115// ln(GAMMA(x)) ~ (x-1.0)*P17(x-1.0) 116// 117// 2. [1.25; 1.75) 118// ln(GAMMA(x)) ~ (x-LocalMinimun)*P17(x-LocalMinimun) 119// 120// 3. [1.75, 2.25) 121// ln(GAMMA(x)) ~ (x-2.0)*P17(x-2.0) 122// 123// 4. [2.25; 4) 124// ln(GAMMA(x)) ~ P22(x) 125// 126// 5. [2^i; 2^(i+1)), i=2..8 127// ln(GAMMA(x)) ~ P22((x-2^i)/2^i) 128// 129// Case -17 < x < 1 130// ---------------- 131// Here we use the recursive formula: 132// ln(GAMMA(x)) = ln(GAMMA(x+1)) - ln(x) 133// 134// Using this formula we reduce argument to base interval [1.0; 2.0] 135// 136// Case -512 < x < -17 137// -------------------- 138// Here we use the formula: 139// ln(GAMMA(-x)) = ln(Pi/(x*GAMMA(x)*sin(Pi*x))) = 140// = -ln(x) - ln((GAMMA(x)) - ln(sin(Pi*r)/(Pi*r)) - ln(|r|) 141// where r = x - rounded_to_nearest(x), i.e |r| <= 0.5 and 142// ln(sin(Pi*r)/(Pi*r)) is approximated by 14-degree polynomial of r^2 143// 144// 145// Case x < -512 146// ------------- 147// Here we use algorithm based on the Stirling formula: 148// ln(GAMMA(-x)) = -ln(sqrt(2*Pi)) + (-x-0.5)ln(x) + x - (W2 + W4/x^2)/x - 149// - ln(sin(Pi*r)/(Pi*r)) - ln(|r|) 150// where r = x - rounded_to_nearest(x). 151// 152// Neighbourhoods of negative roots 153// -------------------------------- 154// Here we use polynomial approximation 155// ln(GAMMA(x-x0)) = ln(GAMMA(x0)) + (x-x0)*P14(x-x0), 156// where x0 is a root of ln(GAMMA(x)) rounded to nearest double 157// precision number. 158// 159 160//********************************************************************* 161 162FR_X = f10 163FR_Y = f1 // __libm_lgamma is single argument function 164FR_RESULT = f8 165 166FR_B11 = f6 167FR_B10 = f7 168 169FR_int_N = f9 170FR_N = f10 171FR_P5 = f11 172FR_P4 = f12 173FR_P3 = f13 174FR_P2 = f14 175FR_NormX = f15 176 177FR_Ln2 = f32 178FR_C01 = f33 179FR_A17 = f33 180FR_C00 = f34 181FR_Xp2 = f34 182FR_A00 = f34 183FR_A16 = f34 184FR_C11 = f35 185FR_A15 = f35 186FR_C10 = f36 187FR_Xp3 = f36 188FR_A14 = f36 189FR_B1 = f36 190FR_C21 = f37 191FR_A13 = f37 192FR_PR01 = f37 193FR_C20 = f38 194FR_Xp6 = f38 195FR_A12 = f38 196FR_C31 = f39 197FR_Xp7 = f39 198FR_B0 = f39 199FR_A11 = f39 200FR_C30 = f40 201FR_Xp8 = f40 202FR_A10 = f40 203FR_PR00 = f40 204FR_C41 = f41 205FR_Xp9 = f41 206FR_A9 = f41 207FR_PR11 = f41 208FR_C40 = f42 209FR_A8 = f42 210FR_C51 = f43 211FR_Xp11 = f43 212FR_A7 = f43 213FR_C50 = f44 214FR_C = f44 215FR_Xp12 = f44 216FR_A6 = f44 217FR_Xm2 = f45 218FR_Xp13 = f45 219FR_A5 = f45 220FR_PR10 = f45 221FR_C61 = f46 222FR_Xp14 = f46 223FR_A4 = f46 224FR_PR21 = f46 225FR_C60 = f47 226FR_Xp15 = f47 227FR_A3 = f47 228FR_PR20 = f47 229FR_C71 = f48 230FR_Xp16 = f48 231FR_A2 = f48 232FR_PR31 = f48 233FR_C70 = f49 234FR_Xp17 = f49 235FR_A1 = f49 236FR_PR30 = f49 237FR_C81 = f50 238FR_B17 = f50 239FR_A0 = f50 240FR_C80 = f51 241FR_B16 = f51 242FR_C91 = f52 243FR_B15 = f52 244FR_C90 = f53 245FR_B14 = f53 246FR_CA1 = f54 247FR_B13 = f54 248FR_CA0 = f55 249FR_B12 = f55 250FR_CN = f56 251FR_Qlo = f56 252FR_PRN = f56 253FR_B7 = f57 254FR_B6 = f58 255FR_Qhi = f59 256FR_x = f60 257FR_x2 = f61 258FR_TpNxLn2 = f62 259FR_W2 = f63 260FR_x4 = f64 261FR_r4 = f64 262FR_x8 = f65 263FR_r8 = f65 264FR_r05 = f66 265FR_Xm05 = f66 266FR_B5 = f66 267FR_LnSqrt2Pi = f67 268FR_B4 = f67 269FR_InvX = f68 270FR_B3 = f68 271FR_InvX2 = f69 272FR_B2 = f69 273FR_W4 = f70 274FR_OvfBound = f71 275FR_05 = f72 276FR_LocalMin = f73 277FR_tmp = f73 278FR_LnX = f74 279FR_Xf = f75 280FR_InvXf = f76 281FR_rf = f77 282FR_rf2 = f78 283FR_P54f = f79 284FR_P32f = f80 285FR_rf3 = f81 286FR_P10f = f82 287FR_TpNxLn2f = f83 288FR_Nf = f84 289FR_LnXf = f85 290FR_int_Nf = f86 291FR_Tf = f87 292FR_Xf2 = f88 293FR_Xp10 = f89 294FR_w3 = f90 295FR_S28 = f90 296FR_w2 = f91 297FR_S26 = f91 298FR_w6 = f92 299FR_S24 = f92 300FR_w4 = f93 301FR_S22 = f93 302FR_w = f94 303FR_S20 = f94 304FR_Q8 = f95 305FR_S18 = f95 306FR_Q7 = f96 307FR_S16 = f96 308FR_Q4 = f97 309FR_S14 = f97 310FR_Q3 = f98 311FR_S12 = f98 312FR_Q6 = f99 313FR_S10 = f99 314FR_Q5 = f100 315FR_S8 = f100 316FR_Q2 = f101 317FR_S6 = f101 318FR_Root = f101 319FR_S4 = f102 320FR_Q1 = f102 321FR_S2 = f103 322FR_Xp1 = f104 323FR_Xf4 = f105 324FR_Xf8 = f106 325FR_Xfr = f107 326FR_Xf6 = f108 327FR_Ntrunc = f109 328FR_B9 = f110 329FR_2 = f110 330FR_B8 = f111 331FR_3 = f111 332FR_5 = f112 333FR_Xp4 = f113 334FR_Xp5 = f114 335FR_P54 = f115 336FR_P32 = f116 337FR_P10 = f117 338FR_r = f118 339FR_r2 = f119 340FR_r3 = f120 341FR_T = f121 342FR_int_Ntrunc = f122 343 344//=================================== 345 346GR_TAG = r8 347GR_ExpMask = r8 348GR_ExpBias = r9 349GR_ad_Roots = r9 350GR_Expf = r10 351GR_Arg = r10 352GR_SignExp = r11 353GR_ArgXfr = r11 354 355GR_Exp = r14 356GR_Arg125 = r14 357GR_RootInd = r14 358GR_ArgAsIs = r15 359GR_Arg175 = r15 360GR_Sig = r16 361GR_Ind = r17 362GR_ad_Dx = r17 363GR_ad_1 = r18 364GR_SignExp_w = r19 365GR_2_25 = r19 366GR_Arg025 = r19 367GR_Arg15 = r19 368GR_Arg17 = r19 369GR_Exp_w = r19//21 370GR_ad_2 = r20 371GR_2xDx = r21 372GR_SignOfGamma = r21 373GR_fff9 = r22 374GR_Offs = r22 375GR_ad_Co7 = r23 376GR_Arg075 = r23 377GR_Arg0875 = r23 378GR_ad_T = r24 379GR_ad_Root = r24 380GR_Ind = r24 381GR_ad_Co = r25 382GR_ad_Ce = r26 383GR_ad_Ce7 = r27 384GR_Arg05 = r27 385GR_Offs7 = r28 386GR_ArgXfrAsIs = r28 387GR_ExpOf2 = r29 388GR_ad_LnT = r29 389GR_Dx = r29 390GR_ExpOf256 = r30 391GR_0x30033 = r30 392GR_Root = r30 393GR_PseudoRoot = r30 394GR_ad_Data = r31 395GR_ad_SignGam = r31 396 397 398GR_SAVE_B0 = r33 399GR_SAVE_PFS = r34 400GR_SAVE_GP = r35 401GR_SAVE_SP = r36 402 403GR_Parameter_X = r37 404GR_Parameter_Y = r38 405GR_Parameter_RESULT = r39 406GR_Parameter_TAG = r40 407 408 409 410// Data tables 411//============================================================== 412 413RODATA 414.align 16 415LOCAL_OBJECT_START(lgamma_data) 416// polynomial approximation of ln(GAMMA(x)), 2.25 <= x < 512 417// [2.25; 4) 418data8 0xF888E8D7892718A2,0xC001 // C01 419data8 0xF62F273BA12A4639,0x3FFD // C11 420data8 0xA93AC50A37EC8D38,0xBFFC // C21 421data8 0xB4CC43D2C161E057,0xBFFF // C31 422data8 0xC6AC672F0C1392C7,0xC000 // C41 423data8 0xA292B9AE3276942E,0xC001 // C51 424data8 0xE554E4CCCA6C7B7B,0xC001 // C61 425data8 0x92F0F55FBC87F860,0xC002 // C71 426data8 0xAF60D0112843F6C1,0xC002 // C81 427data8 0xC5956500FA3D92E7,0xC002 // C91 428data8 0xD3B22CCBD8587750,0xC002 // CA1 429data8 0xD888B6CF34159B54,0x4001 // C00 430data8 0xBCB79C8329FD9F44,0x3FFE // C10 431data8 0xCB8896FAD69C455D,0x4000 // C20 432data8 0xE510A424639EBF5E,0x4001 // C30 433data8 0xC65ED41B097486B3,0x4002 // C40 434// [4; 8) 435data8 0x9F1F3C822D03080E,0xC001 // C01 436data8 0x941CACFA9C0FA8A6,0xC001 // C11 437data8 0xFE34336391D99CB7,0xC000 // C21 438data8 0xC40BAEAA165F81A1,0xC000 // C31 439data8 0xFE3AE166E9B4DE8F,0xBFFF // C41 440data8 0xD744F91AF7DAF873,0xBFFE // C51 441data8 0x87871851E9C32D02,0x3FFD // C61 442data8 0x9C93C03C502E808F,0x3FFF // C71 443data8 0xF78BED07501D6A8E,0x3FFF // C81 444data8 0x92FE41BA8BEADF70,0x4000 // C91 445data8 0xA021878E1903A2C6,0x3FFF // CA1 446data8 0xC85EFAC379FAFEE2,0x4001 // C00 447data8 0xC10D7AAB7CEC7FF2,0x4001 // C10 448data8 0xB3537BDF603E454C,0x4001 // C20 449data8 0xA0D44E3D5BBE44C4,0x4001 // C30 450data8 0x8B9C229B6241E7B3,0x4001 // C40 451// [8; 16) 452data8 0xD16AB33AEC220DF6,0x3FFF // C01 453data8 0x987483646E150BCD,0x4000 // C11 454data8 0x80C10A24C863999B,0x4000 // C21 455data8 0xA39A8EB6F8AACE75,0x3FFF // C31 456data8 0x93E04A1379BEC764,0x3FFD // C41 457data8 0xD9F59C4BD3A69BD1,0xBFFE // C51 458data8 0x82094EC891179B1A,0xC000 // C61 459data8 0xC90CFE3A24F70659,0xC000 // C71 460data8 0x827984EA7C155184,0xC001 // C81 461data8 0x981BFDF79D1E0D80,0xC001 // C91 462data8 0xA37209A8B97D230D,0xC001 // CA1 463data8 0xAA1989737D6BA66D,0x3FFE // C00 464data8 0xDBC013A351630AF8,0x3FFF // C10 465data8 0x8B8D47698299389D,0x4000 // C20 466data8 0xACCDD1315DE06EB0,0x4000 // C30 467data8 0xD3414A5AC81BBB2D,0x4000 // C40 468// [16; 32) 469data8 0xECB2B0BE75C5F995,0x3FFF // C01 470data8 0x9DD28BD6DBC96500,0x4000 // C11 471data8 0x8521431B99C6244F,0x4000 // C21 472data8 0xA95F92612B8413C3,0x3FFF // C31 473data8 0x9C76E643B22D9544,0x3FFD // C41 474data8 0xDD90EA99417C8038,0xBFFE // C51 475data8 0x84EA6B6D32E5F906,0xC000 // C61 476data8 0xCDBFE499E05AA622,0xC000 // C71 477data8 0x8594A7DE35427100,0xC001 // C81 478data8 0x9BC1CB2C10DC702F,0xC001 // C91 479data8 0xA7602268762666B0,0xC001 // CA1 480data8 0xDA082BCC6BDB8F7B,0x3FFE // C00 481data8 0xEEBFE1C99322B85E,0x3FFF // C10 482data8 0x96FED4C785361946,0x4000 // C20 483data8 0xB9E3A7207C16B2FE,0x4000 // C30 484data8 0xE1E8170CED48E2C7,0x4000 // C40 485// [32; 64) 486data8 0xFD481EB9AEDD53E7,0x3FFF // C01 487data8 0xA216FB66AC8C53E1,0x4000 // C11 488data8 0x885FF935787553BA,0x4000 // C21 489data8 0xAD471CD89A313327,0x3FFF // C31 490data8 0x9FF13FBA139D21E0,0x3FFD // C41 491data8 0xE25E1663A6EE0266,0xBFFE // C51 492data8 0x87BE51DD5D262FA2,0xC000 // C61 493data8 0xD211A9D4CCE55696,0xC000 // C71 494data8 0x885BEFC29FDED3C9,0xC001 // C81 495data8 0x9EFA48E6367A67F6,0xC001 // C91 496data8 0xAAD3978FC0791297,0xC001 // CA1 497data8 0xF96D210DF37A0AEA,0x3FFE // C00 498data8 0xFE11DC6783917C82,0x3FFF // C10 499data8 0x9FFCD928291B7DDE,0x4000 // C20 500data8 0xC4518F4A80E09AE1,0x4000 // C30 501data8 0xEDDFE9E0FD297C63,0x4000 // C40 502// [64; 128) 503data8 0x840E2E62609B0AD3,0x4000 // C01 504data8 0xA5275A0DD0D3DDF8,0x4000 // C11 505data8 0x8AADC6ABFC441731,0x4000 // C21 506data8 0xB041C6696BE90E50,0x3FFF // C31 507data8 0xA4A8C9153F4B037E,0x3FFD // C41 508data8 0xE3C6A461A7B86736,0xBFFE // C51 509data8 0x89047681C6DE7673,0xC000 // C61 510data8 0xD42DF77A480092DF,0xC000 // C71 511data8 0x89C25D17F086FB20,0xC001 // C81 512data8 0xA09F907D02E34EC7,0xC001 // C91 513data8 0xAC998A9CB79805B7,0xC001 // CA1 514data8 0x875CC9B69AE964CC,0x3FFF // C00 515data8 0x847836BA85DD4C12,0x4000 // C10 516data8 0xA5F3CB2B32E74936,0x4000 // C20 517data8 0xCAE2197C96CB5A0F,0x4000 // C30 518data8 0xF50F7EB60DE5CD09,0x4000 // C40 519// [128; 256) 520data8 0x87D9065DD1876926,0x4000 // C01 521data8 0xA781C28FDAD7CC25,0x4000 // C11 522data8 0x8C6A4FCE35A7EC8D,0x4000 // C21 523data8 0xB27BA081728354F9,0x3FFF // C31 524data8 0xA82FEA7124B0EB2B,0x3FFD // C41 525data8 0xE4C996E42ECBF77A,0xBFFE // C51 526data8 0x89F1A92C84FA538F,0xC000 // C61 527data8 0xD5B6CFF7DB7F6070,0xC000 // C71 528data8 0x8AC6B561FAE38B66,0xC001 // C81 529data8 0xA1D1505C438D8F46,0xC001 // C91 530data8 0xADE2DC1C924FEC81,0xC001 // CA1 531data8 0x8EF6CC62A7E0EB5A,0x3FFF // C00 532data8 0x88A2FFC0ABCB00C0,0x4000 // C10 533data8 0xAA6EA8FCB75B065B,0x4000 // C20 534data8 0xCFC4B82B3D5C9363,0x4000 // C30 535data8 0xFA60FD85DE861771,0x4000 // C40 536// [256; 512) 537data8 0x8AAA7CE4ED5C1EFD,0x4000 // C01 538data8 0xA9679234FB56F1E1,0x4000 // C11 539data8 0x8DCE02287789D841,0x4000 // C21 540data8 0xB44328EF30A8DE7E,0x3FFF // C31 541data8 0xAB0DC564BFA1AB12,0x3FFD // C41 542data8 0xE5882B16FCF2D3CB,0xBFFE // C51 543data8 0x8AA7F48993006A86,0xC000 // C61 544data8 0xD6E63752D192750D,0xC000 // C71 545data8 0x8B90080B17853295,0xC001 // C81 546data8 0xA2BDD4253128D1AB,0xC001 // C91 547data8 0xAEE1A042F96B8121,0xC001 // CA1 548data8 0x94A9C37A42E43BA7,0x3FFF // C00 549data8 0x8BFA54E703878F5A,0x4000 // C10 550data8 0xADFA426DDF14647B,0x4000 // C20 551data8 0xD39C7F7B3958EAF0,0x4000 // C30 552data8 0xFE8C3987853C01E3,0x4000 // C40 553// 554// [2.25; 4) 555data8 0x943AF77763601441,0x4003 // C50 556data8 0xC8A93F9ECB06E891,0x4003 // C60 557data8 0xFC2E5A4AD33DE19D,0x4003 // C70 558data8 0x9526B75B38670119,0x4004 // C80 559data8 0xA7675879D68B587E,0x4004 // C90 560data8 0xB31DFA672D7FB8C0,0x4004 // CA0 561data8 0x83A27775D86F9A81,0xBFD7 // CN 562// [4; 8) 563data8 0xEB8049BA5E79ADA3,0x4000 // C50 564data8 0xC20C95EA99037228,0x4000 // C60 565data8 0x9D4A8C864053CEB8,0x4000 // C70 566data8 0xFC7716544AB0C5C9,0x3FFF // C80 567data8 0xC7EB985259EABA5F,0x3FFF // C90 568data8 0xC042FB3B4C95096D,0x3FFD // CA0 569data8 0xCC2A7F930856177B,0x3FEE // CN 570// [8; 16) 571data8 0xFE1903679D078C7A,0x4000 // C50 572data8 0x957C221AB90171F1,0x4001 // C60 573data8 0xAB2C53B2A78F4031,0x4001 // C70 574data8 0xBE080AE6063AE387,0x4001 // C80 575data8 0xCC019A0311605CB9,0x4001 // C90 576data8 0xD3739D85A12C8ADF,0x4001 // CA0 577data8 0x81FA4D2B7BD7A82D,0x3FEF // CN 578// [16; 32) 579data8 0x871F69E2DD221F02,0x4001 // C50 580data8 0x9E3EF2D477442A9C,0x4001 // C60 581data8 0xB48733582B3C82C5,0x4001 // C70 582data8 0xC7DB9B3C25854A2A,0x4001 // C80 583data8 0xD628B87975BE898F,0x4001 // C90 584data8 0xDDC569C321FF119C,0x4001 // CA0 585data8 0xB27B65560DF7ADA7,0x3FEF // CN 586// [32; 64) 587data8 0x8DE4127349719B22,0x4001 // C50 588data8 0xA5C30A7760F5FBB2,0x4001 // C60 589data8 0xBCB4096055AA2A4E,0x4001 // C70 590data8 0xD08F5F2FB4E7B899,0x4001 // C80 591data8 0xDF39ED39DC91F9CF,0x4001 // C90 592data8 0xE7063E45322F072E,0x4001 // CA0 593data8 0x85A9E11DDDDE67C8,0x3FF0 // CN 594// [64; 128) 595data8 0x91CA191EB80E8893,0x4001 // C50 596data8 0xA9F1D5A55397334A,0x4001 // C60 597data8 0xC1222710295094E3,0x4001 // C70 598data8 0xD52FFABBA6CBE5C6,0x4001 // C80 599data8 0xE3FD9D5282052E1D,0x4001 // C90 600data8 0xEBDBE47BB662F3EF,0x4001 // CA0 601data8 0xEF889F489D88FD31,0x3FF0 // CN 602// [128; 256) 603data8 0x94AA029C2286F8D2,0x4001 // C50 604data8 0xAD0549E55A72389F,0x4001 // C60 605data8 0xC4628899DAF94BA4,0x4001 // C70 606data8 0xD89432A4161C72CB,0x4001 // C80 607data8 0xE77ABA75E9C38F3A,0x4001 // C90 608data8 0xEF65BFFFF71347FF,0x4001 // CA0 609data8 0xE2627460064D918D,0x3FF1 // CN 610// [256; 512) 611data8 0x96E9890D722C2FC1,0x4001 // C50 612data8 0xAF6C2236F6A1CEC4,0x4001 // C60 613data8 0xC6EBB8C9F987D20D,0x4001 // C70 614data8 0xDB38CEFD5EF328CC,0x4001 // C80 615data8 0xEA3265DC66C9A0B4,0x4001 // C90 616data8 0xF2272D6B368C70B1,0x4001 // CA0 617data8 0xDBFF93ECEBCEF1F3,0x3FF2 // CN 618// 619data8 0x3FDD8B618D5AF8FE // point of local minimum on [1;2] 620data8 0x3FE0000000000000 // 0.5 621data8 0xBFC5555DA7212371 // P5 622data8 0x3FC999A19EEF5826 // P4 623data8 0xb17217f7d1cf79ac,0x3ffe // ln(2) 624data8 0xEB3F8E4325F5A535,0x3FFE // ln(sqrt(4*arcsin(1))) 625// 626data8 0xBFCFFFFFFFFEF009 // P3 627data8 0x3FD555555554ECB2 // P2 628data8 0xBF66C16C16C16C17 // W4=B4/12=-1/360 629data8 0x7F5754D9278B51A8 // overflow boundary (first inf result) 630data8 0xAAAAAAAAAAAAAAAB,0x3FFB // W2=B2/2=1/12 631// 632data8 0x3FBC756AC654273B // Q8 633data8 0xBFC001A42489AB4D // Q7 634data8 0x3FC99999999A169B // Q4 635data8 0xBFD00000000019AC // Q3 636data8 0x3FC2492479AA0DF8 // Q6 637data8 0xBFC5555544986F52 // Q5 638data8 0x3FD5555555555555 // Q2 639data8 0xBFE0000000000000 // Q1, P1 = -0.5 640// 641data8 0x80200aaeac44ef38,0x3ff6 // ln(1/frcpa(1+ 0/2^-8)) 642data8 0xc09090a2c35aa070,0x3ff7 // ln(1/frcpa(1+ 1/2^-8)) 643data8 0xa0c94fcb41977c75,0x3ff8 // ln(1/frcpa(1+ 2/2^-8)) 644data8 0xe18b9c263af83301,0x3ff8 // ln(1/frcpa(1+ 3/2^-8)) 645data8 0x8d35c8d6399c30ea,0x3ff9 // ln(1/frcpa(1+ 4/2^-8)) 646data8 0xadd4d2ecd601cbb8,0x3ff9 // ln(1/frcpa(1+ 5/2^-8)) 647data8 0xce95403a192f9f01,0x3ff9 // ln(1/frcpa(1+ 6/2^-8)) 648data8 0xeb59392cbcc01096,0x3ff9 // ln(1/frcpa(1+ 7/2^-8)) 649data8 0x862c7d0cefd54c5d,0x3ffa // ln(1/frcpa(1+ 8/2^-8)) 650data8 0x94aa63c65e70d499,0x3ffa // ln(1/frcpa(1+ 9/2^-8)) 651data8 0xa54a696d4b62b382,0x3ffa // ln(1/frcpa(1+ 10/2^-8)) 652data8 0xb3e4a796a5dac208,0x3ffa // ln(1/frcpa(1+ 11/2^-8)) 653data8 0xc28c45b1878340a9,0x3ffa // ln(1/frcpa(1+ 12/2^-8)) 654data8 0xd35c55f39d7a6235,0x3ffa // ln(1/frcpa(1+ 13/2^-8)) 655data8 0xe220f037b954f1f5,0x3ffa // ln(1/frcpa(1+ 14/2^-8)) 656data8 0xf0f3389b036834f3,0x3ffa // ln(1/frcpa(1+ 15/2^-8)) 657data8 0xffd3488d5c980465,0x3ffa // ln(1/frcpa(1+ 16/2^-8)) 658data8 0x87609ce2ed300490,0x3ffb // ln(1/frcpa(1+ 17/2^-8)) 659data8 0x8ede9321e8c85927,0x3ffb // ln(1/frcpa(1+ 18/2^-8)) 660data8 0x96639427f2f8e2f4,0x3ffb // ln(1/frcpa(1+ 19/2^-8)) 661data8 0x9defad3e8f73217b,0x3ffb // ln(1/frcpa(1+ 20/2^-8)) 662data8 0xa582ebd50097029c,0x3ffb // ln(1/frcpa(1+ 21/2^-8)) 663data8 0xac06dbe75ab80fee,0x3ffb // ln(1/frcpa(1+ 22/2^-8)) 664data8 0xb3a78449b2d3ccca,0x3ffb // ln(1/frcpa(1+ 23/2^-8)) 665data8 0xbb4f79635ab46bb2,0x3ffb // ln(1/frcpa(1+ 24/2^-8)) 666data8 0xc2fec93a83523f3f,0x3ffb // ln(1/frcpa(1+ 25/2^-8)) 667data8 0xc99af2eaca4c4571,0x3ffb // ln(1/frcpa(1+ 26/2^-8)) 668data8 0xd1581106472fa653,0x3ffb // ln(1/frcpa(1+ 27/2^-8)) 669data8 0xd8002560d4355f2e,0x3ffb // ln(1/frcpa(1+ 28/2^-8)) 670data8 0xdfcb43b4fe508632,0x3ffb // ln(1/frcpa(1+ 29/2^-8)) 671data8 0xe67f6dff709d4119,0x3ffb // ln(1/frcpa(1+ 30/2^-8)) 672data8 0xed393b1c22351280,0x3ffb // ln(1/frcpa(1+ 31/2^-8)) 673data8 0xf5192bff087bcc35,0x3ffb // ln(1/frcpa(1+ 32/2^-8)) 674data8 0xfbdf4ff6dfef2fa3,0x3ffb // ln(1/frcpa(1+ 33/2^-8)) 675data8 0x81559a97f92f9cc7,0x3ffc // ln(1/frcpa(1+ 34/2^-8)) 676data8 0x84be72bce90266e8,0x3ffc // ln(1/frcpa(1+ 35/2^-8)) 677data8 0x88bc74113f23def2,0x3ffc // ln(1/frcpa(1+ 36/2^-8)) 678data8 0x8c2ba3edf6799d11,0x3ffc // ln(1/frcpa(1+ 37/2^-8)) 679data8 0x8f9dc92f92ea08b1,0x3ffc // ln(1/frcpa(1+ 38/2^-8)) 680data8 0x9312e8f36efab5a7,0x3ffc // ln(1/frcpa(1+ 39/2^-8)) 681data8 0x968b08643409ceb6,0x3ffc // ln(1/frcpa(1+ 40/2^-8)) 682data8 0x9a062cba08a1708c,0x3ffc // ln(1/frcpa(1+ 41/2^-8)) 683data8 0x9d845b3abf95485c,0x3ffc // ln(1/frcpa(1+ 42/2^-8)) 684data8 0xa06fd841bc001bb4,0x3ffc // ln(1/frcpa(1+ 43/2^-8)) 685data8 0xa3f3a74652fbe0db,0x3ffc // ln(1/frcpa(1+ 44/2^-8)) 686data8 0xa77a8fb2336f20f5,0x3ffc // ln(1/frcpa(1+ 45/2^-8)) 687data8 0xab0497015d28b0a0,0x3ffc // ln(1/frcpa(1+ 46/2^-8)) 688data8 0xae91c2be6ba6a615,0x3ffc // ln(1/frcpa(1+ 47/2^-8)) 689data8 0xb189d1b99aebb20b,0x3ffc // ln(1/frcpa(1+ 48/2^-8)) 690data8 0xb51cced5de9c1b2c,0x3ffc // ln(1/frcpa(1+ 49/2^-8)) 691data8 0xb819bee9e720d42f,0x3ffc // ln(1/frcpa(1+ 50/2^-8)) 692data8 0xbbb2a0947b093a5d,0x3ffc // ln(1/frcpa(1+ 51/2^-8)) 693data8 0xbf4ec1505811684a,0x3ffc // ln(1/frcpa(1+ 52/2^-8)) 694data8 0xc2535bacfa8975ff,0x3ffc // ln(1/frcpa(1+ 53/2^-8)) 695data8 0xc55a3eafad187eb8,0x3ffc // ln(1/frcpa(1+ 54/2^-8)) 696data8 0xc8ff2484b2c0da74,0x3ffc // ln(1/frcpa(1+ 55/2^-8)) 697data8 0xcc0b1a008d53ab76,0x3ffc // ln(1/frcpa(1+ 56/2^-8)) 698data8 0xcfb6203844b3209b,0x3ffc // ln(1/frcpa(1+ 57/2^-8)) 699data8 0xd2c73949a47a19f5,0x3ffc // ln(1/frcpa(1+ 58/2^-8)) 700data8 0xd5daae18b49d6695,0x3ffc // ln(1/frcpa(1+ 59/2^-8)) 701data8 0xd8f08248cf7e8019,0x3ffc // ln(1/frcpa(1+ 60/2^-8)) 702data8 0xdca7749f1b3e540e,0x3ffc // ln(1/frcpa(1+ 61/2^-8)) 703data8 0xdfc28e033aaaf7c7,0x3ffc // ln(1/frcpa(1+ 62/2^-8)) 704data8 0xe2e012a5f91d2f55,0x3ffc // ln(1/frcpa(1+ 63/2^-8)) 705data8 0xe600064ed9e292a8,0x3ffc // ln(1/frcpa(1+ 64/2^-8)) 706data8 0xe9226cce42b39f60,0x3ffc // ln(1/frcpa(1+ 65/2^-8)) 707data8 0xec4749fd97a28360,0x3ffc // ln(1/frcpa(1+ 66/2^-8)) 708data8 0xef6ea1bf57780495,0x3ffc // ln(1/frcpa(1+ 67/2^-8)) 709data8 0xf29877ff38809091,0x3ffc // ln(1/frcpa(1+ 68/2^-8)) 710data8 0xf5c4d0b245cb89be,0x3ffc // ln(1/frcpa(1+ 69/2^-8)) 711data8 0xf8f3afd6fcdef3aa,0x3ffc // ln(1/frcpa(1+ 70/2^-8)) 712data8 0xfc2519756be1abc7,0x3ffc // ln(1/frcpa(1+ 71/2^-8)) 713data8 0xff59119f503e6832,0x3ffc // ln(1/frcpa(1+ 72/2^-8)) 714data8 0x8147ce381ae0e146,0x3ffd // ln(1/frcpa(1+ 73/2^-8)) 715data8 0x82e45f06cb1ad0f2,0x3ffd // ln(1/frcpa(1+ 74/2^-8)) 716data8 0x842f5c7c573cbaa2,0x3ffd // ln(1/frcpa(1+ 75/2^-8)) 717data8 0x85ce471968c8893a,0x3ffd // ln(1/frcpa(1+ 76/2^-8)) 718data8 0x876e8305bc04066d,0x3ffd // ln(1/frcpa(1+ 77/2^-8)) 719data8 0x891012678031fbb3,0x3ffd // ln(1/frcpa(1+ 78/2^-8)) 720data8 0x8a5f1493d766a05f,0x3ffd // ln(1/frcpa(1+ 79/2^-8)) 721data8 0x8c030c778c56fa00,0x3ffd // ln(1/frcpa(1+ 80/2^-8)) 722data8 0x8da85df17e31d9ae,0x3ffd // ln(1/frcpa(1+ 81/2^-8)) 723data8 0x8efa663e7921687e,0x3ffd // ln(1/frcpa(1+ 82/2^-8)) 724data8 0x90a22b6875c6a1f8,0x3ffd // ln(1/frcpa(1+ 83/2^-8)) 725data8 0x91f62cc8f5d24837,0x3ffd // ln(1/frcpa(1+ 84/2^-8)) 726data8 0x93a06cfc3857d980,0x3ffd // ln(1/frcpa(1+ 85/2^-8)) 727data8 0x94f66d5e6fd01ced,0x3ffd // ln(1/frcpa(1+ 86/2^-8)) 728data8 0x96a330156e6772f2,0x3ffd // ln(1/frcpa(1+ 87/2^-8)) 729data8 0x97fb3582754ea25b,0x3ffd // ln(1/frcpa(1+ 88/2^-8)) 730data8 0x99aa8259aad1bbf2,0x3ffd // ln(1/frcpa(1+ 89/2^-8)) 731data8 0x9b0492f6227ae4a8,0x3ffd // ln(1/frcpa(1+ 90/2^-8)) 732data8 0x9c5f8e199bf3a7a5,0x3ffd // ln(1/frcpa(1+ 91/2^-8)) 733data8 0x9e1293b9998c1daa,0x3ffd // ln(1/frcpa(1+ 92/2^-8)) 734data8 0x9f6fa31e0b41f308,0x3ffd // ln(1/frcpa(1+ 93/2^-8)) 735data8 0xa0cda11eaf46390e,0x3ffd // ln(1/frcpa(1+ 94/2^-8)) 736data8 0xa22c8f029cfa45aa,0x3ffd // ln(1/frcpa(1+ 95/2^-8)) 737data8 0xa3e48badb7856b34,0x3ffd // ln(1/frcpa(1+ 96/2^-8)) 738data8 0xa5459a0aa95849f9,0x3ffd // ln(1/frcpa(1+ 97/2^-8)) 739data8 0xa6a79c84480cfebd,0x3ffd // ln(1/frcpa(1+ 98/2^-8)) 740data8 0xa80a946d0fcb3eb2,0x3ffd // ln(1/frcpa(1+ 99/2^-8)) 741data8 0xa96e831a3ea7b314,0x3ffd // ln(1/frcpa(1+100/2^-8)) 742data8 0xaad369e3dc544e3b,0x3ffd // ln(1/frcpa(1+101/2^-8)) 743data8 0xac92e9588952c815,0x3ffd // ln(1/frcpa(1+102/2^-8)) 744data8 0xadfa035aa1ed8fdc,0x3ffd // ln(1/frcpa(1+103/2^-8)) 745data8 0xaf6219eae1ad6e34,0x3ffd // ln(1/frcpa(1+104/2^-8)) 746data8 0xb0cb2e6d8160f753,0x3ffd // ln(1/frcpa(1+105/2^-8)) 747data8 0xb2354249ad950f72,0x3ffd // ln(1/frcpa(1+106/2^-8)) 748data8 0xb3a056e98ef4a3b4,0x3ffd // ln(1/frcpa(1+107/2^-8)) 749data8 0xb50c6dba52c6292a,0x3ffd // ln(1/frcpa(1+108/2^-8)) 750data8 0xb679882c33876165,0x3ffd // ln(1/frcpa(1+109/2^-8)) 751data8 0xb78c07429785cedc,0x3ffd // ln(1/frcpa(1+110/2^-8)) 752data8 0xb8faeb8dc4a77d24,0x3ffd // ln(1/frcpa(1+111/2^-8)) 753data8 0xba6ad77eb36ae0d6,0x3ffd // ln(1/frcpa(1+112/2^-8)) 754data8 0xbbdbcc915e9bee50,0x3ffd // ln(1/frcpa(1+113/2^-8)) 755data8 0xbd4dcc44f8cf12ef,0x3ffd // ln(1/frcpa(1+114/2^-8)) 756data8 0xbec0d81bf5b531fa,0x3ffd // ln(1/frcpa(1+115/2^-8)) 757data8 0xc034f19c139186f4,0x3ffd // ln(1/frcpa(1+116/2^-8)) 758data8 0xc14cb69f7c5e55ab,0x3ffd // ln(1/frcpa(1+117/2^-8)) 759data8 0xc2c2abbb6e5fd56f,0x3ffd // ln(1/frcpa(1+118/2^-8)) 760data8 0xc439b2c193e6771e,0x3ffd // ln(1/frcpa(1+119/2^-8)) 761data8 0xc553acb9d5c67733,0x3ffd // ln(1/frcpa(1+120/2^-8)) 762data8 0xc6cc96e441272441,0x3ffd // ln(1/frcpa(1+121/2^-8)) 763data8 0xc8469753eca88c30,0x3ffd // ln(1/frcpa(1+122/2^-8)) 764data8 0xc962cf3ce072b05c,0x3ffd // ln(1/frcpa(1+123/2^-8)) 765data8 0xcadeba8771f694aa,0x3ffd // ln(1/frcpa(1+124/2^-8)) 766data8 0xcc5bc08d1f72da94,0x3ffd // ln(1/frcpa(1+125/2^-8)) 767data8 0xcd7a3f99ea035c29,0x3ffd // ln(1/frcpa(1+126/2^-8)) 768data8 0xcef93860c8a53c35,0x3ffd // ln(1/frcpa(1+127/2^-8)) 769data8 0xd0192f68a7ed23df,0x3ffd // ln(1/frcpa(1+128/2^-8)) 770data8 0xd19a201127d3c645,0x3ffd // ln(1/frcpa(1+129/2^-8)) 771data8 0xd2bb92f4061c172c,0x3ffd // ln(1/frcpa(1+130/2^-8)) 772data8 0xd43e80b2ee8cc8fc,0x3ffd // ln(1/frcpa(1+131/2^-8)) 773data8 0xd56173601fc4ade4,0x3ffd // ln(1/frcpa(1+132/2^-8)) 774data8 0xd6e6637efb54086f,0x3ffd // ln(1/frcpa(1+133/2^-8)) 775data8 0xd80ad9f58f3c8193,0x3ffd // ln(1/frcpa(1+134/2^-8)) 776data8 0xd991d1d31aca41f8,0x3ffd // ln(1/frcpa(1+135/2^-8)) 777data8 0xdab7d02231484a93,0x3ffd // ln(1/frcpa(1+136/2^-8)) 778data8 0xdc40d532cde49a54,0x3ffd // ln(1/frcpa(1+137/2^-8)) 779data8 0xdd685f79ed8b265e,0x3ffd // ln(1/frcpa(1+138/2^-8)) 780data8 0xde9094bbc0e17b1d,0x3ffd // ln(1/frcpa(1+139/2^-8)) 781data8 0xe01c91b78440c425,0x3ffd // ln(1/frcpa(1+140/2^-8)) 782data8 0xe14658f26997e729,0x3ffd // ln(1/frcpa(1+141/2^-8)) 783data8 0xe270cdc2391e0d23,0x3ffd // ln(1/frcpa(1+142/2^-8)) 784data8 0xe3ffce3a2aa64922,0x3ffd // ln(1/frcpa(1+143/2^-8)) 785data8 0xe52bdb274ed82887,0x3ffd // ln(1/frcpa(1+144/2^-8)) 786data8 0xe6589852e75d7df6,0x3ffd // ln(1/frcpa(1+145/2^-8)) 787data8 0xe786068c79937a7d,0x3ffd // ln(1/frcpa(1+146/2^-8)) 788data8 0xe91903adad100911,0x3ffd // ln(1/frcpa(1+147/2^-8)) 789data8 0xea481236f7d35bb0,0x3ffd // ln(1/frcpa(1+148/2^-8)) 790data8 0xeb77d48c692e6b14,0x3ffd // ln(1/frcpa(1+149/2^-8)) 791data8 0xeca84b83d7297b87,0x3ffd // ln(1/frcpa(1+150/2^-8)) 792data8 0xedd977f4962aa158,0x3ffd // ln(1/frcpa(1+151/2^-8)) 793data8 0xef7179a22f257754,0x3ffd // ln(1/frcpa(1+152/2^-8)) 794data8 0xf0a450d139366ca7,0x3ffd // ln(1/frcpa(1+153/2^-8)) 795data8 0xf1d7e0524ff9ffdb,0x3ffd // ln(1/frcpa(1+154/2^-8)) 796data8 0xf30c29036a8b6cae,0x3ffd // ln(1/frcpa(1+155/2^-8)) 797data8 0xf4412bc411ea8d92,0x3ffd // ln(1/frcpa(1+156/2^-8)) 798data8 0xf576e97564c8619d,0x3ffd // ln(1/frcpa(1+157/2^-8)) 799data8 0xf6ad62fa1b5f172f,0x3ffd // ln(1/frcpa(1+158/2^-8)) 800data8 0xf7e499368b55c542,0x3ffd // ln(1/frcpa(1+159/2^-8)) 801data8 0xf91c8d10abaffe22,0x3ffd // ln(1/frcpa(1+160/2^-8)) 802data8 0xfa553f7018c966f3,0x3ffd // ln(1/frcpa(1+161/2^-8)) 803data8 0xfb8eb13e185d802c,0x3ffd // ln(1/frcpa(1+162/2^-8)) 804data8 0xfcc8e3659d9bcbed,0x3ffd // ln(1/frcpa(1+163/2^-8)) 805data8 0xfe03d6d34d487fd2,0x3ffd // ln(1/frcpa(1+164/2^-8)) 806data8 0xff3f8c7581e9f0ae,0x3ffd // ln(1/frcpa(1+165/2^-8)) 807data8 0x803e029e280173ae,0x3ffe // ln(1/frcpa(1+166/2^-8)) 808data8 0x80dca10cc52d0757,0x3ffe // ln(1/frcpa(1+167/2^-8)) 809data8 0x817ba200632755a1,0x3ffe // ln(1/frcpa(1+168/2^-8)) 810data8 0x821b05f3b01d6774,0x3ffe // ln(1/frcpa(1+169/2^-8)) 811data8 0x82bacd623ff19d06,0x3ffe // ln(1/frcpa(1+170/2^-8)) 812data8 0x835af8c88e7a8f47,0x3ffe // ln(1/frcpa(1+171/2^-8)) 813data8 0x83c5f8299e2b4091,0x3ffe // ln(1/frcpa(1+172/2^-8)) 814data8 0x8466cb43f3d87300,0x3ffe // ln(1/frcpa(1+173/2^-8)) 815data8 0x850803a67c80ca4b,0x3ffe // ln(1/frcpa(1+174/2^-8)) 816data8 0x85a9a1d11a23b461,0x3ffe // ln(1/frcpa(1+175/2^-8)) 817data8 0x864ba644a18e6e05,0x3ffe // ln(1/frcpa(1+176/2^-8)) 818data8 0x86ee1182dcc432f7,0x3ffe // ln(1/frcpa(1+177/2^-8)) 819data8 0x875a925d7e48c316,0x3ffe // ln(1/frcpa(1+178/2^-8)) 820data8 0x87fdaa109d23aef7,0x3ffe // ln(1/frcpa(1+179/2^-8)) 821data8 0x88a129ed4becfaf2,0x3ffe // ln(1/frcpa(1+180/2^-8)) 822data8 0x89451278ecd7f9cf,0x3ffe // ln(1/frcpa(1+181/2^-8)) 823data8 0x89b29295f8432617,0x3ffe // ln(1/frcpa(1+182/2^-8)) 824data8 0x8a572ac5a5496882,0x3ffe // ln(1/frcpa(1+183/2^-8)) 825data8 0x8afc2d0ce3b2dadf,0x3ffe // ln(1/frcpa(1+184/2^-8)) 826data8 0x8b6a69c608cfd3af,0x3ffe // ln(1/frcpa(1+185/2^-8)) 827data8 0x8c101e106e899a83,0x3ffe // ln(1/frcpa(1+186/2^-8)) 828data8 0x8cb63de258f9d626,0x3ffe // ln(1/frcpa(1+187/2^-8)) 829data8 0x8d2539c5bd19e2b1,0x3ffe // ln(1/frcpa(1+188/2^-8)) 830data8 0x8dcc0e064b29e6f1,0x3ffe // ln(1/frcpa(1+189/2^-8)) 831data8 0x8e734f45d88357ae,0x3ffe // ln(1/frcpa(1+190/2^-8)) 832data8 0x8ee30cef034a20db,0x3ffe // ln(1/frcpa(1+191/2^-8)) 833data8 0x8f8b0515686d1d06,0x3ffe // ln(1/frcpa(1+192/2^-8)) 834data8 0x90336bba039bf32f,0x3ffe // ln(1/frcpa(1+193/2^-8)) 835data8 0x90a3edd23d1c9d58,0x3ffe // ln(1/frcpa(1+194/2^-8)) 836data8 0x914d0de2f5d61b32,0x3ffe // ln(1/frcpa(1+195/2^-8)) 837data8 0x91be0c20d28173b5,0x3ffe // ln(1/frcpa(1+196/2^-8)) 838data8 0x9267e737c06cd34a,0x3ffe // ln(1/frcpa(1+197/2^-8)) 839data8 0x92d962ae6abb1237,0x3ffe // ln(1/frcpa(1+198/2^-8)) 840data8 0x9383fa6afbe2074c,0x3ffe // ln(1/frcpa(1+199/2^-8)) 841data8 0x942f0421651c1c4e,0x3ffe // ln(1/frcpa(1+200/2^-8)) 842data8 0x94a14a3845bb985e,0x3ffe // ln(1/frcpa(1+201/2^-8)) 843data8 0x954d133857f861e7,0x3ffe // ln(1/frcpa(1+202/2^-8)) 844data8 0x95bfd96468e604c4,0x3ffe // ln(1/frcpa(1+203/2^-8)) 845data8 0x9632d31cafafa858,0x3ffe // ln(1/frcpa(1+204/2^-8)) 846data8 0x96dfaabd86fa1647,0x3ffe // ln(1/frcpa(1+205/2^-8)) 847data8 0x9753261fcbb2a594,0x3ffe // ln(1/frcpa(1+206/2^-8)) 848data8 0x9800c11b426b996d,0x3ffe // ln(1/frcpa(1+207/2^-8)) 849data8 0x9874bf4d45ae663c,0x3ffe // ln(1/frcpa(1+208/2^-8)) 850data8 0x99231f5ee9a74f79,0x3ffe // ln(1/frcpa(1+209/2^-8)) 851data8 0x9997a18a56bcad28,0x3ffe // ln(1/frcpa(1+210/2^-8)) 852data8 0x9a46c873a3267e79,0x3ffe // ln(1/frcpa(1+211/2^-8)) 853data8 0x9abbcfc621eb6cb6,0x3ffe // ln(1/frcpa(1+212/2^-8)) 854data8 0x9b310cb0d354c990,0x3ffe // ln(1/frcpa(1+213/2^-8)) 855data8 0x9be14cf9e1b3515c,0x3ffe // ln(1/frcpa(1+214/2^-8)) 856data8 0x9c5710b8cbb73a43,0x3ffe // ln(1/frcpa(1+215/2^-8)) 857data8 0x9ccd0abd301f399c,0x3ffe // ln(1/frcpa(1+216/2^-8)) 858data8 0x9d7e67f3bdce8888,0x3ffe // ln(1/frcpa(1+217/2^-8)) 859data8 0x9df4ea81a99daa01,0x3ffe // ln(1/frcpa(1+218/2^-8)) 860data8 0x9e6ba405a54514ba,0x3ffe // ln(1/frcpa(1+219/2^-8)) 861data8 0x9f1e21c8c7bb62b3,0x3ffe // ln(1/frcpa(1+220/2^-8)) 862data8 0x9f956593f6b6355c,0x3ffe // ln(1/frcpa(1+221/2^-8)) 863data8 0xa00ce1092e5498c3,0x3ffe // ln(1/frcpa(1+222/2^-8)) 864data8 0xa0c08309c4b912c1,0x3ffe // ln(1/frcpa(1+223/2^-8)) 865data8 0xa1388a8c6faa2afa,0x3ffe // ln(1/frcpa(1+224/2^-8)) 866data8 0xa1b0ca7095b5f985,0x3ffe // ln(1/frcpa(1+225/2^-8)) 867data8 0xa22942eb47534a00,0x3ffe // ln(1/frcpa(1+226/2^-8)) 868data8 0xa2de62326449d0a3,0x3ffe // ln(1/frcpa(1+227/2^-8)) 869data8 0xa357690f88bfe345,0x3ffe // ln(1/frcpa(1+228/2^-8)) 870data8 0xa3d0a93f45169a4b,0x3ffe // ln(1/frcpa(1+229/2^-8)) 871data8 0xa44a22f7ffe65f30,0x3ffe // ln(1/frcpa(1+230/2^-8)) 872data8 0xa500c5e5b4c1aa36,0x3ffe // ln(1/frcpa(1+231/2^-8)) 873data8 0xa57ad064eb2ebbc2,0x3ffe // ln(1/frcpa(1+232/2^-8)) 874data8 0xa5f5152dedf4384e,0x3ffe // ln(1/frcpa(1+233/2^-8)) 875data8 0xa66f9478856233ec,0x3ffe // ln(1/frcpa(1+234/2^-8)) 876data8 0xa6ea4e7cca02c32e,0x3ffe // ln(1/frcpa(1+235/2^-8)) 877data8 0xa765437325341ccf,0x3ffe // ln(1/frcpa(1+236/2^-8)) 878data8 0xa81e21e6c75b4020,0x3ffe // ln(1/frcpa(1+237/2^-8)) 879data8 0xa899ab333fe2b9ca,0x3ffe // ln(1/frcpa(1+238/2^-8)) 880data8 0xa9157039c51ebe71,0x3ffe // ln(1/frcpa(1+239/2^-8)) 881data8 0xa991713433c2b999,0x3ffe // ln(1/frcpa(1+240/2^-8)) 882data8 0xaa0dae5cbcc048b3,0x3ffe // ln(1/frcpa(1+241/2^-8)) 883data8 0xaa8a27ede5eb13ad,0x3ffe // ln(1/frcpa(1+242/2^-8)) 884data8 0xab06de228a9e3499,0x3ffe // ln(1/frcpa(1+243/2^-8)) 885data8 0xab83d135dc633301,0x3ffe // ln(1/frcpa(1+244/2^-8)) 886data8 0xac3fb076adc7fe7a,0x3ffe // ln(1/frcpa(1+245/2^-8)) 887data8 0xacbd3cbbe47988f1,0x3ffe // ln(1/frcpa(1+246/2^-8)) 888data8 0xad3b06b1a5dc57c3,0x3ffe // ln(1/frcpa(1+247/2^-8)) 889data8 0xadb90e94af887717,0x3ffe // ln(1/frcpa(1+248/2^-8)) 890data8 0xae3754a218f7c816,0x3ffe // ln(1/frcpa(1+249/2^-8)) 891data8 0xaeb5d9175437afa2,0x3ffe // ln(1/frcpa(1+250/2^-8)) 892data8 0xaf349c322e9c7cee,0x3ffe // ln(1/frcpa(1+251/2^-8)) 893data8 0xafb39e30d1768d1c,0x3ffe // ln(1/frcpa(1+252/2^-8)) 894data8 0xb032df51c2c93116,0x3ffe // ln(1/frcpa(1+253/2^-8)) 895data8 0xb0b25fd3e6035ad9,0x3ffe // ln(1/frcpa(1+254/2^-8)) 896data8 0xb1321ff67cba178c,0x3ffe // ln(1/frcpa(1+255/2^-8)) 897// 898data8 0xC7DC2985D3B44557,0x3FCA // A00 899// 900// polynomial approximation of ln(GAMMA(x)), 1 <= x < 2.25 901// [0.875,1.25) 902data8 0xBF9A04F7E40C8498,0x3FAB79D8D9380F03 // C17,C16 903data8 0xBFB3B63609CA0CBD,0x3FB5564EA1675539 // C13,C12 904data8 0xBFBC806766F48C41,0x3FC010B36CDA773A // C9,C8 905data8 0xD45CE0BD54BE3D67,0xBFFC // C5 906data8 0xCD26AADF559676D0,0xBFFD // C3 907data8 0x93C467E37DB0C7A7,0xBFFE // C1 908data8 0xBFB10C251723B123,0x3FB2669DAD69A12D // C15,C14 909data8 0xBFB748A3CFCE4717,0x3FB9A01DEE29966A // C11,C10 910data8 0xBFC2703A1D85497E,0x3FC5B40CB0FD353C // C7,C6 911data8 0x8A8991563ECBBA5D,0x3FFD // C4 912data8 0xD28D3312983E9844,0x3FFE // C2 913data8 0,0 // C0 914// [1.25,1.75) 915data8 0xBF12680486396DE6,0x3F23C51FC332CD9D // C17,C16 916data8 0xBF422633DA3A1496,0x3F4CC70680768857 // C13,C12 917data8 0xBF6E2F1A1F804B5D,0x3F78FCE02A032428 // C9,C8 918data8 0x864D46FA895985C1,0xBFFA // C5 919data8 0x97213C6E35E12043,0xBFFC // C3 920data8 0x8A8A42A401D979B7,0x3FC7 // C1 921data8 0xBF2E098A8A2332A8,0x3F370E61B73B205C // C15,C14 922data8 0xBF56F9849D3BC6CC,0x3F6283126F58D7F4 // C11,C10 923data8 0xBF851F9F9516A98F,0x3F9266E797A1433F // C7,C6 924data8 0x845A14A6A81B0638,0x3FFB // C4 925data8 0xF7B95E4771C55C99,0x3FFD // C2 926data8 0xF8CDCDE61C520E0F,0xBFFB // C0 927// [1.75,2.25) 928data8 0xBEA01D7AFA5D8F52,0x3EB1010986E60253 // C17,C16 929data8 0xBEE3CBEDB4C918AA,0x3EF580F6D9D0F72D // C13,C12 930data8 0xBF2D3FD4C7F68563,0x3F40B36AF884AE9A // C9,C8 931data8 0xF2027E10C7B051EC,0xBFF7 // C5 932data8 0x89F000D2ABB03401,0xBFFB // C3 933data8 0xD8773039049E70B6,0x3FFD // C1 934data8 0xBEC112CD07CFC31A,0x3ED2528A428D30E1 // C15,C14 935data8 0xBF078DE5618D8C9F,0x3F1A127AD811A53D // C11,C10 936data8 0xBF538AC5C2BF540D,0x3F67ADD6EADB5718 // C7,C6 937data8 0xA8991563EC243383,0x3FF9 // C4 938data8 0xA51A6625307D3230,0x3FFD // C2 939data8 0,0 // C0 940// 941// polynomial approximation of ln(sin(Pi*x)/(Pi*x)), 9 <= x <= 0.5 942data8 0xBFDC1BF0931AE591,0x3FD36D6D6CE263D7 //S28,S26 943data8 0xBFBD516F4FD9FB18,0xBFBBE1703F315086 //S20,S18 944data8 0xAAB5A3CCEFCD3628,0xBFFC //S12 945data8 0x80859B5C318E19A5,0xBFFD //S8 946data8 0x8A8991563EC7EB33,0xBFFE //S4 947data8 0xBFD23AB9E6CC88AC,0xBF9957F5146FC7AF //S24,S22 948data8 0xBFC007B324E23040,0xBFC248DEC29CAC4A //S16,S14 949data8 0xCD00EFF2F8F86899,0xBFFC //S10 950data8 0xADA06587FACD668B,0xBFFD //S6 951data8 0xD28D3312983E98A0,0xBFFF //S2 952// 953data8 0x8090F777D7942F73,0x4001 // PR01 954data8 0xE5B521193CF61E63,0x4000 // PR11 955data8 0xC02C000000001939 // (-15;-14) 956data8 0x0000000000000233 // (-15;-14) 957data8 0xC02A000000016124 // (-14;-13) 958data8 0x0000000000002BFB // (-14;-13) 959data8 0xC02800000011EED9 // (-13;-12) 960data8 0x0000000000025CBB // (-13;-12) 961data8 0xC026000000D7322A // (-12;-11) 962data8 0x00000000001E1095 // (-12;-11) 963data8 0xC0240000093F2777 // (-11;-10) 964data8 0x00000000013DD3DC // (-11;-10) 965data8 0xC02200005C7768FB // (-10;-9) 966data8 0x000000000C9539B9 // (-10;-9) 967data8 0xC02000034028B3F9 // (-9;-8) 968data8 0x000000007570C565 // (-9;-8) 969data8 0xC01C0033FDEDFE1F // (-8;-7) 970data8 0x00000007357E670E // (-8;-7) 971data8 0xC018016B25897C8D // (-7;-6) 972data8 0x000000346DC5D639 // (-7;-6) 973data8 0xC014086A57F0B6D9 // (-6;-5) 974data8 0x0000010624DD2F1B // (-6;-5) 975data8 0xC010284E78599581 // (-5;-4) 976data8 0x0000051EB851EB85 // (-5;-4) 977data8 0xC009260DBC9E59AF // (-4;-3) 978data8 0x000028F5C28F5C29 // (-4;-3) 979data8 0xC003A7FC9600F86C // (-3;-2) 980data8 0x0000666666666666 // (-3;-2) 981data8 0xCC15879606130890,0x4000 // PR21 982data8 0xB42FE3281465E1CC,0x4000 // PR31 983// 984data8 0x828185F0B95C9916,0x4001 // PR00 985// 986data8 0xD4D3C819E4E5654B,0x4000 // PR10 987data8 0xA82FBBA4FCC75298,0x4000 // PR20 988data8 0xC02DFFFFFFFFFE52 // (-15;-14) 989data8 0x000000000000001C // (-15;-14) 990data8 0xC02BFFFFFFFFE6C7 // (-14;-13) 991data8 0x00000000000001A6 // (-14;-13) 992data8 0xC029FFFFFFFE9EDC // (-13;-12) 993data8 0x0000000000002BFB // (-13;-12) 994data8 0xC027FFFFFFEE1127 // (-12;-11) 995data8 0x000000000001EEC8 // (-12;-11) 996data8 0xC025FFFFFF28CDD4 // (-11;-10) 997data8 0x00000000001E1095 // (-11;-10) 998data8 0xC023FFFFF6C0D7C0 // (-10;-9) 999data8 0x000000000101B2B3 // (-10;-9) 1000data8 0xC021FFFFA3884BD0 // (-9;-8) 1001data8 0x000000000D6BF94D // (-9;-8) 1002data8 0xC01FFFF97F8159CF // (-8;-7) 1003data8 0x00000000C9539B89 // (-8;-7) 1004data8 0xC01BFFCBF76B86F0 // (-7;-6) 1005data8 0x00000007357E670E // (-7;-6) 1006data8 0xC017FE92F591F40D // (-6;-5) 1007data8 0x000000346DC5D639 // (-6;-5) 1008data8 0xC013F7577A6EEAFD // (-5;-4) 1009data8 0x00000147AE147AE1 // (-5;-4) 1010data8 0xC00FA471547C2FE5 // (-4;-3) 1011data8 0x00000C49BA5E353F // (-4;-3) 1012data8 0xC005FB410A1BD901 // (-3;-2) 1013data8 0x000053F7CED91687 // (-3;-2) 1014data8 0x80151BB918A293AA,0x4000 // PR30 1015data8 0xB3C9F8F47422A314,0x400B // PRN 1016// 1017// right negative roots 1018//(-3;-2) 1019data8 0x40BFCF8B90BE7F6B,0x40B237623345EFC3 // A15,A14 1020data8 0x407A92EFB03B281E,0x40728700C7819759 // A11,A10 1021data8 0x403809F04EF4D0F2,0x4038D32F682D9593 // A7,A6 1022data8 0xB4A5302C53C2F2D8,0x3FFF // A3 1023data8 0xC1FF4B357A9B0383,0x3FFF // A1 1024data8 0x409C46632EB4B2D3,0x4091A72AFA2148F5 // A13,A12 1025data8 0x4059297AC79A88DB,0x40548EAA7BE7FA6B // A9,A8 1026data8 0x4017339FE04B227F,0x4021718D7CA09E02 // A5,A4 1027data8 0x9B775D8017AAE668,0x4001 // A2 1028data8 0x8191DB68FF4366A1,0x3FC9 // A0 1029//(-4;-3) 1030data8 0x425260910D35307B,0x422668F5BE7983BB // A15,A14 1031data8 0x41A4454DBE4BEE43,0x41799CA93F6EA817 // A11,A10 1032data8 0x40FBB97AA1400F31,0x40D293C3F7ADAB15 // A7,A6 1033data8 0xE089B8926AE4517B,0x4005 // A3 1034data8 0xF90532F97D630C69,0x4001 // A1 1035data8 0x41F9F0CF98C5F2EA,0x41D026336C6BF394 // A13,A12 1036data8 0x415057F61156D5B8,0x41251EA3055CB754 // A9,A8 1037data8 0x40A99A6337D9FC2B,0x408267203D776151 // A5,A4 1038data8 0xCEA694BB8A8827A9,0x4003 // A2 1039data8 0xF4B02F1D73D30EED,0x3FCD // A0 1040//(-5;-4) 1041data8 0x4412365489340979,0x43C86441BAFDEE39 // A15,A14 1042data8 0x42ED68FCB19352DD,0x42A45FCE3905CD6F // A11,A10 1043data8 0x41CD14FE49FD4FCA,0x41855E3DBFA89744 // A7,A6 1044data8 0xAACD88D954E0EC16,0x400B // A3 1045data8 0xD652E7A490B0DCDF,0x4003 // A1 1046data8 0x437F52608E0E752A,0x433560E0633E33D5 // A13,A12 1047data8 0x425C83998976DE3D,0x421433DCCD3B473B // A9,A8 1048data8 0x4140261EB5732106,0x40F96D18E21AE6CC // A5,A4 1049data8 0xA220AE6C09FA8A0E,0x4007 // A2 1050data8 0xCC1682D17A2B5A58,0xBFCF // A0 1051//(-6;-5) 1052data8 0x4630E41D6386CF5A,0x45C2E7992C628C8C // A15,A14 1053data8 0x447AABEC714F913A,0x440EDCAB45339F3A // A11,A10 1054data8 0x42C9A8D00C97E3CE,0x425F7D8D5BEAB44D // A7,A6 1055data8 0x929EC2B1FB95BB5B,0x4012 // A3 1056data8 0xF6B970414D717D38,0x4005 // A1 1057data8 0x45545E578976F6A2,0x44E738288DD52686 // A13,A12 1058data8 0x43A20921FEC49492,0x433557FD7C6A41B3 // A9,A8 1059data8 0x41F3E01773761DB4,0x418A225DF2DA6C47 // A5,A4 1060data8 0xE7661976117F9312,0x400B // A2 1061data8 0xC33C13FEE07494DE,0x3FCF // A0 1062//(-7;-6) 1063data8 0x4898F1E6133305AD,0x4802C5306FE4A850 // A15,A14 1064data8 0x463FD37946B44094,0x45A8D489B784C2DD // A11,A10 1065data8 0x43E9500995815F06,0x4354F21E2FEE6DF5 // A7,A6 1066data8 0xEF281D1E1BBE10BD,0x4019 // A3 1067data8 0xB4EF24F1D78C2029,0x4008 // A1 1068data8 0x476AB1D5930011E5,0x46D4867E77BFB622 // A13,A12 1069data8 0x45139151ECDEF7C5,0x447F3A2BC6BF466F // A9,A8 1070data8 0x42C1D3D50713FA40,0x422F9C7B52556A1B // A5,A4 1071data8 0xFE711A4267CEA83A,0x4010 // A2 1072data8 0xD11E91B3FF8F4B94,0xBFD2 // A0 1073//(-8;-7) 1074data8 0x4B39E57569811B6E,0x4A7656073EB1FA21 // A15,A14 1075data8 0x482C9B24A516B0BB,0x47698FF55139C62B // A11,A10 1076data8 0x452393E2BC8E8D04,0x44628E1C710DA478 // A7,A6 1077data8 0x9F2A95AF1B7A773F,0x4022 // A3 1078data8 0x9DA03D51C303C918,0x400B // A1 1079data8 0x49B24C241A3D5BCB,0x48F01CB936ECDA67 // A13,A12 1080data8 0x46A712B3425C6797,0x45E5164114BD6DA1 // A9,A8 1081data8 0x43A216A356069D01,0x42E25E42A45E2108 // A5,A4 1082data8 0xC1F42ED57BBC2529,0x4016 // A2 1083data8 0xB1C7B615A7DCA8A9,0xBFD7 // A0 1084//(-9;-8) 1085data8 0x4E09D478E5EE857D,0x4D1647782106E9AB // A15,A14 1086data8 0x4A3C7F4D51927548,0x49497954796D743A // A11,A10 1087data8 0x467387BD6AF0CBDF,0x4582843E134111D2 // A7,A6 1088data8 0x9F003C6DE9666513,0x402B // A3 1089data8 0x9D8447F6BF99950A,0x400E // A1 1090data8 0x4C22364D238C61A9,0x4B300B18050AB940 // A13,A12 1091data8 0x4857004D64215772,0x4765074E448C3C9A // A9,A8 1092data8 0x44920E9EA07BF624,0x43A257BEC94BBF48 // A5,A4 1093data8 0xC1D1C49AC5B2A4B4,0x401C // A2 1094data8 0x9A749AF9F2D2E688,0x3FDB // A0 1095//(-10;-9) 1096data8 0x5102C7C43EA26C83,0x4FDCD174DEB0426B // A15,A14 1097data8 0x4C6A036195CD5BAD,0x4B44ABB52B65628A // A11,A10 1098data8 0x47D6439374B98FED,0x46B2C3903EF44D7D // A7,A6 1099data8 0xE25BAF73AB8A7DB3,0x4034 // A3 1100data8 0xB130901CA6D81B61,0x4011 // A1 1101data8 0x4EB50BB0726AE206,0x4D907A96E6D2B6E2 // A13,A12 1102data8 0x4A20975D78EAF01A,0x48FAF79C9C3E7908 // A9,A8 1103data8 0x459044144129A247,0x446D6043FA3150A3 // A5,A4 1104data8 0xF547997E083D9BA7,0x4022 // A2 1105data8 0x977AF525A6ECA1BC,0x3FDC // A0 1106//(-11;-10) 1107data8 0x5420A5D5E90C6D73,0x52C4710A503DC67A // A15,A14 1108data8 0x4EB2ED07BA88D2A8,0x4D581001ED9A5ECE // A11,A10 1109data8 0x494A8A28E9E3DFEF,0x47F1E4E1E476793E // A7,A6 1110data8 0xDD0C97E12D4A3378,0x403E // A3 1111data8 0xDD7C12D5182FD543,0x4014 // A1 1112data8 0x5167ED536877A072,0x500DF9AF21DDC0B6 // A13,A12 1113data8 0x4BFEE6F04BC34FF8,0x4AA4175CEF736A5E // A9,A8 1114data8 0x4698D1B4388FEC78,0x4541EDE7607A600D // A5,A4 1115data8 0xBF9F645F282AC552,0x4029 // A2 1116data8 0xAE1BBE4D3CDACCF4,0x3FE1 // A0 1117//(-12;-11) 1118data8 0x575F0EEF5FB7D4C0,0x55CBB7302B211A7C // A15,A14 1119data8 0x5113A4F1825C7CB2,0x4F822A0D46E0605A // A11,A10 1120data8 0x4ACED38FC8BE069A,0x493E3B56D2649F18 // A7,A6 1121data8 0x8FA8FF5DF8B72D5E,0x4049 // A3 1122data8 0x9845417E8598D642,0x4018 // A1 1123data8 0x5437780541C3F2D3,0x52A56279B563C1B2 // A13,A12 1124data8 0x4DF0F71A48C50188,0x4C600B358988DEBF // A9,A8 1125data8 0x47AE7EE95BDA3DE9,0x46200599DC16B18F // A5,A4 1126data8 0xB5249F914932E55D,0x4030 // A2 1127data8 0xEAE760CD2C086094,0x3FE5 // A0 1128//(-13;-12) 1129data8 0x5ABA5848651F6D18,0x58EF60D8A817650B // A15,A14 1130data8 0x538A8CA86E13EFB1,0x51C05DBD4D01076D // A11,A10 1131data8 0x4C607594C339D259,0x4A9585BD5BF932BB // A7,A6 1132data8 0xF26D282C36EC3611,0x4053 // A3 1133data8 0xE467DF4810EE7EEE,0x401B // A1 1134data8 0x5721D9BA485E8CC3,0x5555AF2CCFB2104D // A13,A12 1135data8 0x4FF4619A17B14EA6,0x4E29B2F29EB9F8C4 // A9,A8 1136data8 0x48CCF27629D46E79,0x47044715F991A63D // A5,A4 1137data8 0xCBC92FB9BDAA95A9,0x4037 // A2 1138data8 0xFB743A426163665B,0xBFE6 // A0 1139//(-14;-13) 1140data8 0x5E3295B24B353EAA,0x5C2B447E29796F20 // A15,A14 1141data8 0x5615A35CB5EAFAE5,0x54106AB089C95CAF // A11,A10 1142data8 0x4DFEC7D93501900A,0x4BF8C4C685F01B83 // A7,A6 1143data8 0x820899603D9A74D5,0x405F // A3 1144data8 0xB9949919933821CB,0x401F // A1 1145data8 0x5A23373DB9A995AC,0x581CBA0AF7F53009 // A13,A12 1146data8 0x520929836BB304CD,0x500386409A7076DA // A9,A8 1147data8 0x49F480173FEAF90B,0x47F1ACB14B810793 // A5,A4 1148data8 0x86881B8674DBF205,0x403F // A2 1149data8 0x8CF3CC35AA2C5F90,0x3FED // A0 1150//(-15;-14) 1151data8 0x61C37D53BE0029D6,0x5F80667CD9D68354 // A15,A14 1152data8 0x58B3F01898E6605B,0x567149652116DB6A // A11,A10 1153data8 0x4FA82FA4F5D35B00,0x4D663DB00832DF8F // A7,A6 1154data8 0xAE426731C9B94996,0x406A // A3 1155data8 0xA264C84BE3708F3F,0x4023 // A1 1156data8 0x5D3B254BC1C806A8,0x5AF72E736048B553 // A13,A12 1157data8 0x542E476505104BB0,0x51EAD96CDC4FB48F // A9,A8 1158data8 0x4B25095F498DB134,0x48E4B9FDEBFE24AB // A5,A4 1159data8 0xCE076A5A116C1D34,0x4046 // A2 1160data8 0x940013871A15050B,0x3FF1 // A0 1161// 1162// left negative roots 1163//(-3;-2) 1164data8 0x41AEB7998DBE2B2C,0xC19053D8FAC05DF7 // A16,A15 1165data8 0x4133197BF1ADEAF9,0xC1150728B9B82072 // A12,A11 1166data8 0x40BDBA65E74F4526,0xC0A12239BEEF8F72 // A8,A7 1167data8 0xFA8256664F99E2AA,0x4004 // A4 1168data8 0x9933F9E132D2A5DB,0x4002 // A2 1169data8 0x416FFB167B85F77C,0xC15166AE0ACCF87C // A14,A13 1170data8 0x40F75815106322C0,0xC0DA2D23C59C348D // A10,A9 1171data8 0x4084373F7CC42043,0xC0685884581F8C61 // A6,A5 1172data8 0xA0C2D6186460FF9D,0xC003 // A3 1173data8 0xF5096D48258CA0AD,0xBFFF // A1 1174//(-4;-3) 1175data8 0xC3E5BD233016D4B9,0x43A084DAD2D94AB1 // A15,A14 1176data8 0xC2CCFFF5E5AED722,0x4286D143AC7D29A6 // A11,A10 1177data8 0xC1B7DBBE0680D07B,0x4173E8F3ABB79CED // A7,A6 1178data8 0xE929ACEA59799BAF,0xC00A // A3 1179data8 0xA5CCECB362B21E1C,0xC003 // A1 1180data8 0xC357EED873871B81,0x43128E0B873204FC // A13,A12 1181data8 0xC242225FA76E8450,0x41FD2F76AE7386CE // A9,A8 1182data8 0xC13116F7806D0C7A,0x40EE8F829F141025 // A5,A4 1183data8 0xFBB6F57021B5B397,0x4006 // A2 1184data8 0xEEE019B4C05AC269,0xBFCB // A0 1185//(-5;-4) 1186data8 0xC626A52FE8AAA100,0x45B9FD1F4DDFE31E // A15,A14 1187data8 0xC473812A5675F08B,0x440738530AECC254 // A11,A10 1188data8 0xC2C5068B3F94AC27,0x425A8C5C539A500B // A7,A6 1189data8 0x869FBFF732F20C3A,0xC012 // A3 1190data8 0xE91251F7CF25A655,0xC005 // A1 1191data8 0xC54C18CB48E5DA0F,0x44E07BD36FF561DF // A13,A12 1192data8 0xC39BEC120D2FEBEA,0x4330FFA5388435BE // A9,A8 1193data8 0xC1F13D5D163B7FB5,0x418752A6F5AC0F39 // A5,A4 1194data8 0xDA99E33C51D360F0,0x400B // A2 1195data8 0x9F47A66A2F53D9B9,0x3FD1 // A0 1196//(-6;-5) 1197data8 0xC8970DAC16B6D59E,0x480170728306FD76 // A15,A14 1198data8 0xC63E0E5030604CF3,0x45A7924D74D57C65 // A11,A10 1199data8 0xC3E8684E41730FC6,0x43544D54EA2E5B9A // A7,A6 1200data8 0xEB7404450C47C5F4,0xC019 // A3 1201data8 0xB30FB521D2C19F8B,0xC008 // A1 1202data8 0xC768F34D35DF6320,0x46D348B3BB2E68B8 // A13,A12 1203data8 0xC512AC2FE5EA638E,0x447DF44BC7FC5E17 // A9,A8 1204data8 0xC2C15EA6B0AAFEF9,0x422EF5D308DBC420 // A5,A4 1205data8 0xFBCEE5BCA70FD3A3,0x4010 // A2 1206data8 0x8589A7CFFE0A3E86,0xBFD5 // A0 1207//(-7;-6) 1208data8 0xCB3995A0CC961E5A,0x4A7615C6C7116ADD // A15,A14 1209data8 0xC82C5AFE0BF9C427,0x47695BD2F367668B // A11,A10 1210data8 0xC52377E70BA14CF5,0x4462775E859E4392 // A7,A6 1211data8 0x9EC8ED6E4C3D4DBE,0xC022 // A3 1212data8 0x9D5FBD2E75520E65,0xC00B // A1 1213data8 0xC9B21BB881A4DDF8,0x48EFEAB06FBA0207 // A13,A12 1214data8 0xC6A6E8550CBC188F,0x45E4F3D26238B099 // A9,A8 1215data8 0xC3A20427DF1B110A,0x42E24F3D636F2E4E // A5,A4 1216data8 0xC1A4D12A82280CFB,0x4016 // A2 1217data8 0xEF46D8DCCA9E8197,0x3FD2 // A0 1218//(-8;-7) 1219data8 0xCE0946982B27DE5B,0x4D15DBC6664E2DD2 // A15,A14 1220data8 0xCA3C769F6B3B2B93,0x49497251CD0C4363 // A11,A10 1221data8 0xC67384066C47F489,0x458281393433AB28 // A7,A6 1222data8 0x9EF3459926D0F14F,0xC02B // A3 1223data8 0x9D7BB7F2600DFF0B,0xC00E // A1 1224data8 0xCC22351326C939A7,0x4B3009431C4F1D3F // A13,A12 1225data8 0xC856FAADDD48815D,0x476502BC3ECA040C // A9,A8 1226data8 0xC4920C2A84173810,0x43A255C052525F99 // A5,A4 1227data8 0xC1C73B6554011EFA,0x401C // A2 1228data8 0x954612700ADF8317,0xBFD8 // A0 1229//(-9;-8) 1230data8 0xD102F5CC7B590D3A,0x4FDD0F1C30E4EB22 // A15,A14 1231data8 0xCC6A02912B0DF650,0x4B44AB18E4FCC159 // A11,A10 1232data8 0xC7D64314B4A2FAAB,0x46B2C334AE5E2D34 // A7,A6 1233data8 0xE2598724F7E28E99,0xC034 // A3 1234data8 0xB12F6FE2E195452C,0xC011 // A1 1235data8 0xCEB507747AF9356A,0x4D907802C08BA48F // A13,A12 1236data8 0xCA2096E3DC29516F,0x48FAF6ED046A1DB7 // A9,A8 1237data8 0xC59043D21BA5EE56,0x446D5FE468B30450 // A5,A4 1238data8 0xF5460A8196B59C83,0x4022 // A2 1239data8 0xB108F35A8EDA92D5,0xBFDD // A0 1240//(-10;-9) 1241data8 0xD420430D91F8265B,0x52C406CAAAC9E0EE // A15,A14 1242data8 0xCEB2ECDDDAA3DAD1,0x4D580FDA97F92E3A // A11,A10 1243data8 0xC94A8A192341B5D4,0x47F1E4D8C690D07B // A7,A6 1244data8 0xDD0C5F920C2F0D2B,0xC03E // A3 1245data8 0xDD7BED3631657B48,0xC014 // A1 1246data8 0xD167F410E64E90A4,0x500DFFED20F714A7 // A13,A12 1247data8 0xCBFEE6D9043169E9,0x4AA4174F64B40AA7 // A9,A8 1248data8 0xC698D1A9AF0AB9C2,0x4541EDE14987A887 // A5,A4 1249data8 0xBF9F43D461B3DE6E,0x4029 // A2 1250data8 0xF3891A50642FAF26,0x3FE1 // A0 1251//(-11;-10) 1252data8 0xD75F0EEAF769D42A,0x55CBB72C8869183A // A15,A14 1253data8 0xD113A4EF80394F77,0x4F822A0B96B3ECA9 // A11,A10 1254data8 0xCACED38DC75763CB,0x493E3B5522D2D028 // A7,A6 1255data8 0x8FA8FB5C92533701,0xC049 // A3 1256data8 0x98453EDB9339C24E,0xC018 // A1 1257data8 0xD43778026CCD4B20,0x52A5627753273B9B // A13,A12 1258data8 0xCDF0F718DD7E1214,0x4C600B34582911EB // A9,A8 1259data8 0xC7AE7EE7F112362C,0x46200599439C264F // A5,A4 1260data8 0xB5249C335342B5BC,0x4030 // A2 1261data8 0x881550711D143475,0x3FE4 // A0 1262//(-12;-11) 1263data8 0xDAB9C724EEEE2BBB,0x58EEC971340EDDBA // A15,A14 1264data8 0xD38A8C8AE63BD8BF,0x51C05DB21CEE00D3 // A11,A10 1265data8 0xCC607594C311C12D,0x4A9585BD5BE6AB57 // A7,A6 1266data8 0xF26D282C36EC0E66,0xC053 // A3 1267data8 0xE467DF1FA674BFAE,0xC01B // A1 1268data8 0xD721DE506999AA9C,0x5555B34F71B45132 // A13,A12 1269data8 0xCFF4619A476BF76F,0x4E29B2F2BBE7A67E // A9,A8 1270data8 0xC8CCF27629D48EDC,0x47044715F991AB46 // A5,A4 1271data8 0xCBC92FB9BDAA928D,0x4037 // A2 1272data8 0xCE27C4F01CF53284,0xBFE6 // A0 1273//(-13;-12) 1274data8 0xDE3295B24355C5A1,0x5C2B447E298B562D // A15,A14 1275data8 0xD615A35CB5E92103,0x54106AB089C95E8C // A11,A10 1276data8 0xCDFEC7D935019005,0x4BF8C4C685F01B83 // A7,A6 1277data8 0x820899603D9A74D5,0xC05F // A3 1278data8 0xB9949916F8DF4AC4,0xC01F // A1 1279data8 0xDA23373DBA0B7548,0x581CBA0AF7F45C01 // A13,A12 1280data8 0xD20929836BB30934,0x500386409A7076D6 // A9,A8 1281data8 0xC9F480173FEAF90B,0x47F1ACB14B810793 // A5,A4 1282data8 0x86881B8674DBF205,0x403F // A2 1283data8 0x8CFAFA9A142C1FF0,0x3FED // A0 1284//(-14;-13) 1285data8 0xE1C33F356FA2C630,0x5F8038B8AA919DD7 // A15,A14 1286data8 0xD8B3F0167E14982D,0x5671496400BAE0DB // A11,A10 1287data8 0xCFA82FA4F5D25C3E,0x4D663DB008328C58 // A7,A6 1288data8 0xAE426731C9B94980,0xC06A // A3 1289data8 0xA264C84BB8A66F86,0xC023 // A1 1290data8 0xDD3B26E34762ED1E,0x5AF72F76E3C1B793 // A13,A12 1291data8 0xD42E476507E3D06E,0x51EAD96CDD881DFA // A9,A8 1292data8 0xCB25095F498DB15F,0x48E4B9FDEBFE24B5 // A5,A4 1293data8 0xCE076A5A116C1D32,0x4046 // A2 1294data8 0x94001BF5A24966F5,0x3FF1 // A0 1295//(-15;-14) 1296data8 0xE56DB8B72D7156FF,0x62EAB0CDB22539BE // A15,A14 1297data8 0xDB63D76B0D3457E7,0x58E254823D0AE4FF // A11,A10 1298data8 0xD15F060BF548404A,0x4EDE65C20CD4E961 // A7,A6 1299data8 0x900DA565ED76C19D,0xC076 // A3 1300data8 0x9868C809852DA712,0xC027 // A1 1301data8 0xE067CCDA0408AAF0,0x5DE5A79C5C5C54AF // A13,A12 1302data8 0xD6611ADBF5958ED0,0x53E0294092BE9677 // A9,A8 1303data8 0xCC5EA28D90EE8C5D,0x49E014930EF336EE // A5,A4 1304data8 0xB57930DCE7A61AE8,0x404E // A2 1305data8 0x976BEC1F30DF151C,0x3FF5 // A0 1306LOCAL_OBJECT_END(lgamma_data) 1307 1308 1309.section .text 1310GLOBAL_LIBM_ENTRY(__libm_lgamma) 1311 1312{ .mfi 1313 getf.exp GR_SignExp = f8 1314 frcpa.s1 FR_C,p9 = f1,f8 1315 mov GR_ExpMask = 0x1ffff 1316} 1317{ .mfi 1318 addl GR_ad_Data = @ltoff(lgamma_data),gp 1319 fcvt.fx.s1 FR_int_N = f8 1320 mov GR_2_25 = 0x4002 // 2.25 1321};; 1322{ .mfi 1323 getf.d GR_ArgAsIs = f8 1324 fclass.m p13,p0 = f8,0x1EF // is x NaTVal, NaN, 1325 // +/-0, +/-INF or +/-deno? 1326 mov GR_ExpBias = 0xFFFF 1327} 1328{ .mfi 1329 ld8 GR_ad_Data = [GR_ad_Data] 1330 fcvt.fx.trunc.s1 FR_int_Ntrunc = f8 1331 mov GR_ExpOf256 = 0x10007 1332};; 1333{ .mfi 1334 mov GR_ExpOf2 = 0x10000 1335 fcmp.lt.s1 p14,p15 = f8,f0 // p14 if x<0 1336 dep.z GR_Ind = GR_SignExp,8,4 1337} 1338{ .mfi 1339 and GR_Exp = GR_SignExp,GR_ExpMask 1340 fma.s1 FR_2 = f1,f1,f1 1341 cmp.lt p10,p0 = GR_SignExp,GR_ExpBias 1342};; 1343{ .mfi 1344 add GR_ad_1 = 0xB80,GR_ad_Data 1345 fnorm.s1 FR_NormX = f8 1346 shr.u GR_Arg = GR_ArgAsIs,48 1347} 1348{ .mib 1349 add GR_ad_Co = GR_Ind,GR_ad_Data 1350 add GR_ad_Ce = 0x10,GR_ad_Data 1351 // jump if the input argument is NaTVal, NaN, +/-0, +/-INF or +/-deno 1352(p13) br.cond.spnt lgamma_spec 1353};; 1354lgamma_common: 1355{ .mfi 1356 ldfpd FR_LocalMin,FR_05 = [GR_ad_1],16 1357 fmerge.se FR_x = f1,f8 1358 add GR_ad_2 = 0xBC0,GR_ad_Data 1359} 1360{ .mfb 1361 add GR_ad_Ce = GR_Ind,GR_ad_Ce 1362 fms.s1 FR_w = f8,f1,f1 // x-1 1363 // jump if the input argument is positive and less than 1.0 1364(p10) br.cond.spnt lgamma_0_1 1365};; 1366{ .mfi 1367 ldfe FR_C01 = [GR_ad_Co],32 1368 fnma.s1 FR_InvX = FR_C,f8,f1 // NR iteration #1 1369(p15) cmp.lt.unc p8,p0 = GR_ExpOf256,GR_SignExp 1370} 1371{ .mib 1372 ldfe FR_C11 = [GR_ad_Ce],32 1373(p15) cmp.lt.unc p11,p0 = GR_Arg,GR_2_25 1374 // jump if the input argument isn't less than 512.0 1375(p8) br.cond.spnt lgamma_pstirling 1376};; 1377{ .mfi 1378 ldfe FR_C21 = [GR_ad_Co],32 1379(p14) fms.s1 FR_r = FR_C,f8,f1 // reduced arg for log(x) 1380(p14) cmp.lt.unc p0,p9 = GR_Exp,GR_ExpOf256 1381} 1382{ .mib 1383 ldfe FR_C31 = [GR_ad_Ce],32 1384 add GR_ad_Co7 = 0x12C0,GR_ad_2 1385 // jump if the input argument is from range [1.0; 2.25) 1386(p11) br.cond.spnt lgamma_1_2 1387};; 1388{ .mfi 1389 ldfe FR_C41 = [GR_ad_Co],32 1390 fcvt.xf FR_N = FR_int_N 1391 add GR_ad_Ce7 = 0x1310,GR_ad_2 1392} 1393{ .mfb 1394 ldfe FR_C51 = [GR_ad_Ce],32 1395(p14) fma.s1 FR_5 = FR_2,FR_2,f1 1396 // jump if the input argument is less or equal to -512.0 1397(p9) br.cond.spnt lgamma_negstirling 1398};; 1399{ .mfi 1400 ldfe FR_C61 = [GR_ad_Co],32 1401(p14) fcvt.xf FR_Ntrunc = FR_int_Ntrunc 1402 shr GR_Ind = GR_Ind,4 1403} 1404{ .mfi 1405 ldfe FR_C71 = [GR_ad_Ce],32 1406(p14) fma.s1 FR_Xp1 = f1,f1,FR_NormX // x+1 1407 cmp.eq p6,p7 = GR_ExpOf2,GR_SignExp 1408};; 1409.pred.rel "mutex",p6,p7 1410{ .mfi 1411 ldfe FR_C81 = [GR_ad_Co],32 1412(p6) fma.s1 FR_x = f0,f0,FR_NormX 1413 shladd GR_Offs7 = GR_Ind,2,GR_Ind // (ind*16)*5 1414} 1415{ .mfi 1416 ldfe FR_C91 = [GR_ad_Ce],32 1417(p7) fms.s1 FR_x = FR_x,f1,f1 1418 add GR_ad_Co7 = 0x800,GR_ad_Data 1419};; 1420{ .mfi 1421 ldfe FR_CA1 = [GR_ad_Co],32 1422(p14) fma.s1 FR_3 = f1,f1,FR_2 1423 shladd GR_Offs7 = GR_Ind,1,GR_Offs7 // (ind*16)*7 1424} 1425{ .mfi 1426 ldfe FR_C00 = [GR_ad_Ce],32 1427(p14) fma.s1 FR_Xp4 = FR_2,FR_2,FR_NormX 1428 add GR_ad_Ce7 = 0x810,GR_ad_Data 1429};; 1430{ .mfi 1431 ldfe FR_C10 = [GR_ad_Co],32 1432(p6) fms.s1 FR_Xm2 = FR_w,f1,f1 1433 add GR_ad_Co7 = GR_ad_Co7,GR_Offs7 1434} 1435{ .mfi 1436 ldfe FR_C20 = [GR_ad_Ce],32 1437(p14) fma.s1 FR_r2 = FR_r,FR_r,f0 // log(x) 1438 add GR_ad_Ce7 = GR_ad_Ce7,GR_Offs7 1439};; 1440{ .mfi 1441 ldfe FR_C30 = [GR_ad_Co],32 1442(p14) fms.s1 FR_Xf = FR_NormX,f1,FR_N // xf = x - [x] 1443(p14) mov GR_Arg17 = 0xC031 // -17 1444} 1445{ .mfi 1446 ldfe FR_C40 = [GR_ad_Ce],32 1447(p14) fma.s1 FR_Xp5 = FR_5,f1,FR_NormX 1448(p14) sub GR_Exp = GR_Exp,GR_ExpBias 1449};; 1450{ .mfi 1451 ldfe FR_C50 = [GR_ad_Co7],32 1452(p14) fms.s1 FR_Xfr = FR_Xp1,f1,FR_Ntrunc // xfr = (x+1) - [x] 1453(p14) cmp.lt.unc p13,p0 = GR_Arg,GR_Arg17 1454} 1455{ .mfb 1456 ldfe FR_C60 = [GR_ad_Ce7],32 1457(p14) fma.s1 FR_Xp10 = FR_5,FR_2,FR_NormX 1458 // jump if the input argument is negative and great than -17.0 1459(p13) br.cond.spnt lgamma_negrecursion 1460};; 1461{ .mfi 1462 ldfe FR_C70 = [GR_ad_Co7],32 1463 fma.s1 FR_C01 = FR_x,f1,FR_C01 1464(p14) add GR_ad_Ce = 0x1310,GR_ad_2 1465} 1466{ .mfi 1467 ldfe FR_C80 = [GR_ad_Ce7],32 1468 fma.s1 FR_C11 = FR_x,f1,FR_C11 1469(p14) add GR_ad_Co = 0x12C0,GR_ad_2 1470};; 1471{ .mfi 1472 ldfe FR_C90 = [GR_ad_Co7],32 1473 fma.s1 FR_C21 = FR_x,f1,FR_C21 1474 nop.i 0 1475} 1476{ .mfi 1477 ldfe FR_CA0 = [GR_ad_Ce7],32 1478 fma.s1 FR_C31 = FR_x,f1,FR_C31 1479 nop.i 0 1480};; 1481{ .mfi 1482 ldfe FR_CN = [GR_ad_Co7],32 1483 fma.s1 FR_C41 = FR_x,f1,FR_C41 1484 nop.i 0 1485} 1486{ .mfi 1487(p14) ldfpd FR_P5,FR_P4 = [GR_ad_1],16 1488 fma.s1 FR_C51 = FR_x,f1,FR_C51 1489 nop.i 0 1490};; 1491{ .mfi 1492(p14) ldfpd FR_P3,FR_P2 = [GR_ad_2],16 1493 fma.s1 FR_C61 = FR_x,f1,FR_C61 1494 nop.i 0 1495} 1496{ .mfi 1497(p14) ldfe FR_Ln2 = [GR_ad_1] 1498 fma.s1 FR_C71 = FR_x,f1,FR_C71 1499 nop.i 0 1500};; 1501{ .mfi 1502(p14) ldfpd FR_S28,FR_S26 = [GR_ad_Co],16 1503 fma.s1 FR_C81 = FR_x,f1,FR_C81 1504 add GR_ad_2 = 0x60,GR_ad_2 1505} 1506{ .mfi 1507(p14) ldfpd FR_S24,FR_S22 = [GR_ad_Ce],16 1508 fma.s1 FR_C91 = FR_x,f1,FR_C91 1509 nop.i 0 1510};; 1511{ .mfi 1512(p14) ldfpd FR_S20,FR_S18 = [GR_ad_Co],16 1513 fma.s1 FR_CA1 = FR_x,f1,FR_CA1 1514 nop.i 0 1515} 1516{ .mfi 1517(p14) ldfpd FR_S16,FR_S14 = [GR_ad_Ce],16 1518 fma.s1 FR_C01 = FR_C01,FR_x,FR_C00 1519 nop.i 0 1520};; 1521{ .mfi 1522(p14) getf.exp GR_SignExp = FR_Xf 1523 fma.s1 FR_C11 = FR_C11,FR_x,FR_C10 1524 nop.i 0 1525} 1526{ .mfi 1527(p14) ldfe FR_S12 = [GR_ad_Co],16 1528 fma.s1 FR_C21 = FR_C21,FR_x,FR_C20 1529 nop.i 0 1530};; 1531{ .mfi 1532(p14) getf.sig GR_Sig = FR_Xf 1533(p14) frcpa.s1 FR_InvXf,p0 = f1,FR_Xf 1534 nop.i 0 1535} 1536{ .mfi 1537(p14) ldfe FR_S10 = [GR_ad_Ce],16 1538 fma.s1 FR_C41 = FR_C41,FR_x,FR_C40 1539 nop.i 0 1540};; 1541{ .mfi 1542(p14) ldfe FR_S8 = [GR_ad_Co],16 1543 fma.s1 FR_C51 = FR_C51,FR_x,FR_C50 1544 nop.i 0 1545} 1546{ .mfi 1547(p14) ldfe FR_S6 = [GR_ad_Ce],16 1548 fma.s1 FR_C61 = FR_C61,FR_x,FR_C60 1549(p14) and GR_Expf = GR_SignExp,GR_ExpMask 1550};; 1551{ .mfi 1552(p14) sub GR_Expf = GR_Expf,GR_ExpBias 1553 fma.s1 FR_C71 = FR_C71,FR_x,FR_C70 1554(p14) shl GR_Ind = GR_Sig,1 1555} 1556{ .mfi 1557(p14) ldfe FR_S4 = [GR_ad_Co],16 1558 fma.s1 FR_C81 = FR_C81,FR_x,FR_C80 1559(p14) cmp.eq.unc p8,p0 = 0,GR_Sig 1560};; 1561{ .mfi 1562(p14) setf.sig FR_int_Nf = GR_Expf 1563 fma.s1 FR_C91 = FR_C91,FR_x,FR_C90 1564(p14) shr.u GR_Ind = GR_Ind,56 1565} 1566{ .mfb 1567(p14) ldfe FR_S2 = [GR_ad_Ce],16 1568 fma.s1 FR_CA1 = FR_CA1,FR_x,FR_CA0 1569 // jump if the input argument is integer number from range (-512.0;-17.0] 1570(p8) br.cond.spnt lgamma_singularity 1571};; 1572{ .mfi 1573(p14) getf.sig GR_Sig = FR_int_Ntrunc 1574 fma.s1 FR_C01 = FR_C01,FR_C11,f0 1575 nop.i 0 1576} 1577{ .mfi 1578(p14) shladd GR_ad_T = GR_Ind,4,GR_ad_2 1579 fma.s1 FR_C31 = FR_C31,FR_x,FR_C30 1580 nop.i 0 1581};; 1582{ .mfi 1583(p14) ldfe FR_Tf = [GR_ad_T] 1584(p14) fms.s1 FR_rf = FR_InvXf,FR_Xf,f1 // reduced arg for log({x}) 1585(p14) extr.u GR_Ind = GR_ArgAsIs,44,8 1586} 1587{ .mfi 1588 // set p9 if signgum is 32-bit int 1589 // set p10 if signgum is 64-bit int 1590 cmp.eq p10,p9 = 8,r34 1591 fma.s1 FR_C21 = FR_C21,FR_C41,f0 1592 mov GR_SignOfGamma = 1 1593};; 1594{ .mfi 1595 nop.m 0 1596 fma.s1 FR_C51 = FR_C51,FR_C61,f0 1597(p14) tbit.z.unc p8,p0 = GR_Sig,0 1598} 1599{ .mfi 1600(p14) shladd GR_ad_T = GR_Ind,4,GR_ad_2 1601(p6) fma.s1 FR_CN = FR_CN,FR_Xm2,f0 1602 nop.i 0 1603};; 1604{ .mfi 1605(p14) setf.sig FR_int_N = GR_Exp 1606 fma.s1 FR_C71 = FR_C71,FR_C81,f0 1607(p8) sub GR_SignOfGamma = r0,GR_SignOfGamma 1608} 1609{ .mfi 1610 nop.m 0 1611(p14) fma.s1 FR_Xf2 = FR_Xf,FR_Xf,f0 1612 nop.i 0 1613};; 1614{ .mfi 1615(p14) ldfe FR_T = [GR_ad_T] 1616 fma.s1 FR_C91 = FR_C91,FR_CA1,f0 1617 nop.i 0 1618} 1619{ .mfi 1620 nop.m 0 1621(p14) fma.s1 FR_r2 = FR_r,FR_r,f0 1622 nop.i 0 1623};; 1624.pred.rel "mutex",p9,p10 1625{ .mfi 1626 // store sign of gamma(x) as 32-bit int 1627(p9) st4 [r33] = GR_SignOfGamma 1628 fma.s1 FR_C01 = FR_C01,FR_C31,f0 1629 nop.i 0 1630} 1631{ .mfi 1632 // store sign of gamma(x) as 64-bit int 1633(p10) st8 [r33] = GR_SignOfGamma 1634(p14) fma.s1 FR_P54 = FR_P5,FR_r,FR_P4 1635 nop.i 0 1636};; 1637{ .mfi 1638 nop.m 0 1639(p14) fma.s1 FR_P32 = FR_P3,FR_r,FR_P2 1640 nop.i 0 1641} 1642{ .mfb 1643 nop.m 0 1644(p14) fma.s1 FR_P54f = FR_P5,FR_rf,FR_P4 1645 // jump if the input argument is non-integer from range (-512.0;-17.0] 1646(p14) br.cond.spnt lgamma_negpoly 1647};; 1648{ .mfi 1649 nop.m 0 1650 fma.s1 FR_C21 = FR_C21,FR_C51,f0 1651 nop.i 0 1652};; 1653{ .mfi 1654 nop.m 0 1655 fma.s1 FR_C71 = FR_C71,FR_C91,f0 1656 nop.i 0 1657};; 1658{ .mfi 1659 nop.m 0 1660 fma.s1 FR_CN = FR_C01,FR_CN,f0 1661 nop.i 0 1662};; 1663{ .mfi 1664 nop.m 0 1665 fma.s1 FR_C21 = FR_C21,FR_C71,f0 1666 nop.i 0 1667};; 1668{ .mfb 1669 nop.m 0 1670 fma.d.s0 f8 = FR_C21,FR_CN,f0 1671 br.ret.sptk b0 // exit for arguments from range [2.25; 512.0) 1672};; 1673// branch for calculating of ln(GAMMA(x)) for -512 < x < -17 1674//--------------------------------------------------------------------- 1675.align 32 1676lgamma_negpoly: 1677{ .mfi 1678 nop.m 0 1679 fma.s1 FR_Xf4 = FR_Xf2,FR_Xf2,f0 1680 nop.i 0 1681} 1682{ .mfi 1683 nop.m 0 1684 fma.s1 FR_S28 = FR_S28,FR_Xf2,FR_S26 1685 nop.i 0 1686};; 1687{ .mfi 1688 nop.m 0 1689 fma.s1 FR_S24 = FR_S24,FR_Xf2,FR_S22 1690 nop.i 0 1691} 1692{ .mfi 1693 nop.m 0 1694 fma.s1 FR_S20 = FR_S20,FR_Xf2,FR_S18 1695 nop.i 0 1696};; 1697{ .mfi 1698 nop.m 0 1699 fma.s1 FR_S16 = FR_S16,FR_Xf2,FR_S14 1700 nop.i 0 1701} 1702{ .mfi 1703 nop.m 0 1704 fma.s1 FR_S12 = FR_S12,FR_Xf2,FR_S10 1705 nop.i 0 1706};; 1707{ .mfi 1708 nop.m 0 1709 fma.s1 FR_S8 = FR_S8,FR_Xf2,FR_S6 1710 nop.i 0 1711} 1712{ .mfi 1713 nop.m 0 1714 fma.s1 FR_S4 = FR_S4,FR_Xf2,FR_S2 1715 nop.i 0 1716};; 1717{ .mfi 1718 nop.m 0 1719 fma.s1 FR_rf2 = FR_rf,FR_rf,f0 1720 nop.i 0 1721} 1722{ .mfi 1723 nop.m 0 1724 fma.s1 FR_P32f = FR_P3,FR_rf,FR_P2 // log(x) 1725 nop.i 0 1726};; 1727{ .mfi 1728 nop.m 0 1729 fma.s1 FR_r3 = FR_r2,FR_r,f0 // log(x) 1730 nop.i 0 1731} 1732{ .mfi 1733 nop.m 0 1734 fcvt.xf FR_Nf = FR_int_Nf // log({x}) 1735 nop.i 0 1736};; 1737{ .mfi 1738 nop.m 0 1739 fma.s1 FR_S28 = FR_S28,FR_Xf4,FR_S24 1740 nop.i 0 1741} 1742{ .mfi 1743 nop.m 0 1744 fma.s1 FR_Xf8 = FR_Xf4,FR_Xf4,f0 1745 nop.i 0 1746};; 1747{ .mfi 1748 nop.m 0 1749 fma.s1 FR_S20 = FR_S20,FR_Xf4,FR_S16 1750 nop.i 0 1751} 1752{ .mfi 1753 nop.m 0 1754 fma.s1 FR_C21 = FR_C21,FR_C51,f0 1755 nop.i 0 1756};; 1757{ .mfi 1758 nop.m 0 1759 fma.s1 FR_S12 = FR_S12,FR_Xf4,FR_S8 1760 nop.i 0 1761} 1762{ .mfi 1763 nop.m 0 1764 fma.s1 FR_C71 = FR_C71,FR_C91,f0 1765 nop.i 0 1766};; 1767{ .mfi 1768 nop.m 0 1769 fnma.s1 FR_P10 = FR_r2,FR_05,FR_r // log(x) 1770 nop.i 0 1771} 1772{ .mfi 1773 nop.m 0 1774 fma.s1 FR_P54 = FR_P54,FR_r2,FR_P32 // log(x) 1775 nop.i 0 1776};; 1777{ .mfi 1778 nop.m 0 1779 fnma.s1 FR_P10f = FR_rf2,FR_05,FR_rf // log({x}) 1780 nop.i 0 1781} 1782{ .mfi 1783 nop.m 0 1784 fcvt.xf FR_N = FR_int_N // log(x) 1785 nop.i 0 1786};; 1787{ .mfi 1788 nop.m 0 1789 fma.s1 FR_rf3 = FR_rf2,FR_rf,f0 // log({x}) 1790 nop.i 0 1791} 1792{ .mfi 1793 nop.m 0 1794 fma.s1 FR_P54f = FR_P54f,FR_rf2,FR_P32f // log({x}) 1795 nop.i 0 1796};; 1797{ .mfi 1798 nop.m 0 1799 fma.s1 FR_S28 = FR_S28,FR_Xf8,FR_S20 1800 nop.i 0 1801} 1802{ .mfi 1803 nop.m 0 1804 fma.s1 FR_TpNxLn2f = FR_Nf,FR_Ln2,FR_Tf // log({x}) 1805 nop.i 0 1806};; 1807{ .mfi 1808 nop.m 0 1809 fma.s1 FR_CN = FR_C01,FR_CN,f0 1810 nop.i 0 1811} 1812{ .mfi 1813 nop.m 0 1814 fma.s1 FR_C21 = FR_C21,FR_C71,f0 1815 nop.i 0 1816};; 1817{ .mfi 1818 nop.m 0 1819 fma.s1 FR_P54 = FR_P54,FR_r3,FR_P10 // log(x) 1820 nop.i 0 1821};; 1822{ .mfi 1823 nop.m 0 1824 fma.s1 FR_TpNxLn2 = FR_N,FR_Ln2,FR_T // log(x) 1825 nop.i 0 1826};; 1827{ .mfi 1828 nop.m 0 1829 fma.s1 FR_P54f = FR_P54f,FR_rf3,FR_P10f // log({x}) 1830 nop.i 0 1831};; 1832{ .mfi 1833 nop.m 0 1834 fma.s1 FR_S28 = FR_S28,FR_Xf8,FR_S12 1835 nop.i 0 1836};; 1837{ .mfi 1838 nop.m 0 1839 fnma.s1 FR_C21 = FR_C21,FR_CN,f0 1840 nop.i 0 1841};; 1842{ .mfi 1843 nop.m 0 1844 fma.s1 FR_LnX = FR_TpNxLn2,f1,FR_P54 // log(x) 1845 nop.i 0 1846};; 1847{ .mfi 1848 nop.m 0 1849 fma.s1 FR_LnXf = FR_TpNxLn2f,f1,FR_P54f // log({x}) 1850 nop.i 0 1851};; 1852{ .mfi 1853 nop.m 0 1854 fma.s1 FR_S28 = FR_S28,FR_Xf4,FR_S4 1855 nop.i 0 1856};; 1857{ .mfi 1858 nop.m 0 1859 fma.s1 FR_LnX = FR_LnX,f1,FR_LnXf 1860 nop.i 0 1861};; 1862{ .mfi 1863 nop.m 0 1864 fnma.s1 FR_S28 = FR_S28,FR_Xf2,FR_C21 1865 nop.i 0 1866};; 1867{ .mfb 1868 nop.m 0 1869 fms.d.s0 f8 = FR_S28,f1,FR_LnX 1870 br.ret.sptk b0 1871};; 1872// branch for calculating of ln(GAMMA(x)) for x >= 512 1873//--------------------------------------------------------------------- 1874.align 32 1875lgamma_pstirling: 1876{ .mfi 1877 ldfpd FR_P5,FR_P4 = [GR_ad_1],16 1878 nop.f 0 1879 and GR_Exp = GR_SignExp,GR_ExpMask 1880} 1881{ .mfi 1882 ldfpd FR_P3,FR_P2 = [GR_ad_2],16 1883 fma.s1 FR_InvX = FR_C,FR_InvX,FR_C // NR iteration #1 1884 mov GR_ExpBias = 0xffff 1885};; 1886{ .mfi 1887 ldfe FR_Ln2 = [GR_ad_1],16 1888 nop.f 0 1889 sub GR_Exp = GR_Exp,GR_ExpBias 1890};; 1891{ .mfi 1892 ldfpd FR_W4,FR_OvfBound = [GR_ad_2],16 1893 nop.f 0 1894 nop.i 0 1895};; 1896{ .mfi 1897 setf.sig FR_int_N = GR_Exp 1898 fms.s1 FR_r = FR_C,f8,f1 1899 nop.i 0 1900};; 1901{ .mmf 1902 getf.sig GR_Sig = FR_NormX 1903 ldfe FR_LnSqrt2Pi = [GR_ad_1],16 1904 nop.f 0 1905};; 1906{ .mmf 1907 ldfe FR_W2 = [GR_ad_2],16 1908 nop.m 0 1909 fnma.s1 FR_InvX2 = FR_InvX,FR_NormX,f1 // NR iteration #2 1910};; 1911{ .mfi 1912 add GR_ad_2 = 0x40,GR_ad_2 1913 nop.f 0 1914 shl GR_Ind = GR_Sig,1 1915};; 1916{ .mfi 1917 mov GR_SignOfGamma = 1 1918 nop.f 0 1919 shr.u GR_Ind = GR_Ind,56 1920};; 1921{ .mfi 1922 shladd GR_ad_2 = GR_Ind,4,GR_ad_2 1923 fma.s1 FR_r2 = FR_r,FR_r,f0 1924 // set p9 if signgum is 32-bit int 1925 // set p10 if signgum is 64-bit int 1926 cmp.eq p10,p9 = 8,r34 1927};; 1928{ .mfi 1929 ldfe FR_T = [GR_ad_2] 1930 fma.s1 FR_P54 = FR_P5,FR_r,FR_P4 1931 nop.i 0 1932} 1933{ .mfi 1934 nop.m 0 1935 fma.s1 FR_P32 = FR_P3,FR_r,FR_P2 1936 nop.i 0 1937};; 1938{ .mfi 1939 nop.m 0 1940 fcmp.le.s1 p6,p0 = FR_OvfBound,FR_NormX 1941 nop.i 0 1942} 1943{ .mfi 1944 nop.m 0 1945 fma.s1 FR_InvX2 = FR_InvX,FR_InvX2,FR_InvX // NR iteration #2 1946 nop.i 0 1947};; 1948{ .mfi 1949 nop.m 0 1950 fcvt.xf FR_N = FR_int_N 1951 nop.i 0 1952} 1953{ .mfb 1954 nop.m 0 1955 nop.f 0 1956 // jump if x is great than OVERFLOW_BOUNDARY 1957(p6) br.cond.spnt lgamma_overflow 1958};; 1959.pred.rel "mutex",p9,p10 1960{ .mfi 1961 // store sign of gamma(x) as 32-bit int 1962(p9) st4 [r33] = GR_SignOfGamma 1963 fma.s1 FR_r3 = FR_r2,FR_r,f0 1964 nop.i 0 1965} 1966{ .mfi 1967 // store sign of gamma(x) as 64-bit int 1968(p10) st8 [r33] = GR_SignOfGamma 1969 fnma.s1 FR_P10 = FR_r2,FR_05,FR_r 1970 nop.i 0 1971};; 1972{ .mfi 1973 nop.m 0 1974 fma.s1 FR_P54 = FR_P54,FR_r2,FR_P32 1975 nop.i 0 1976};; 1977{ .mfi 1978 nop.m 0 1979 fnma.s1 FR_InvX = FR_InvX2,FR_NormX,f1 // NR iteration #3 1980 nop.i 0 1981};; 1982{ .mfi 1983 nop.m 0 1984 fms.s1 FR_Xm05 = FR_NormX,f1,FR_05 // (x-1/2) 1985 nop.i 0 1986};; 1987{ .mfi 1988 nop.m 0 1989 fma.s1 FR_TpNxLn2 = FR_N,FR_Ln2,FR_T 1990 nop.i 0 1991};; 1992{ .mfi 1993 nop.m 0 1994 fma.s1 FR_P54 = FR_P54,FR_r3,FR_P10 1995 nop.i 0 1996};; 1997{ .mfi 1998 nop.m 0 1999 fma.s1 FR_InvX = FR_InvX2,FR_InvX,FR_InvX2 // NR iteration #3 2000 nop.i 0 2001} 2002{ .mfi 2003 nop.m 0 2004 fms.s1 FR_LnSqrt2Pi = FR_LnSqrt2Pi,f1,FR_NormX // ln(sqrt(2*Pi))-x 2005 nop.i 0 2006};; 2007{ .mfi 2008 nop.m 0 2009 fma.s1 FR_LnX = FR_TpNxLn2,f1,FR_P54 2010 nop.i 0 2011};; 2012{ .mfi 2013 nop.m 0 2014 fma.s1 FR_InvX2 = FR_InvX,FR_InvX,f0 2015 nop.i 0 2016};; 2017{ .mfi 2018 nop.m 0 2019 // (x-1/2)*ln(x)+ln(sqrt(2*Pi))-x 2020 fma.s1 FR_LnX = FR_LnX,FR_Xm05,FR_LnSqrt2Pi 2021 nop.i 0 2022};; 2023{ .mfi 2024 nop.m 0 2025 fma.s1 FR_W2 = FR_W4,FR_InvX2,FR_W2 // W2 + W4/x^2 2026 nop.i 0 2027};; 2028{ .mfb 2029 nop.m 0 2030 fma.d.s0 f8 = FR_InvX,FR_W2,FR_LnX 2031 br.ret.sptk b0 2032};; 2033// branch for calculating of ln(GAMMA(x)) for x < -512 2034//--------------------------------------------------------------------- 2035.align 32 2036lgamma_negstirling: 2037{ .mfi 2038 ldfpd FR_P5,FR_P4 = [GR_ad_1],16 2039 fms.s1 FR_Xf = FR_NormX,f1,FR_N // xf = x - [x] 2040 and GR_Exp = GR_SignExp,GR_ExpMask 2041} 2042{ .mfi 2043 ldfpd FR_P3,FR_P2 = [GR_ad_2],16 2044 fma.s1 FR_InvX = FR_C,FR_InvX,FR_C // NR iteration #1 2045 mov GR_0x30033 = 0x30033 2046};; 2047{ .mfi 2048 ldfe FR_Ln2 = [GR_ad_1],16 2049 nop.f 0 2050 extr.u GR_Ind = GR_ArgAsIs,44,8 2051} 2052{ .mib 2053 ldfd FR_W4 = [GR_ad_2],16 2054 // jump if x is less or equal to -2^52, i.e. x is big negative integer 2055 cmp.leu.unc p7,p0 = GR_0x30033,GR_SignExp 2056(p7) br.cond.spnt lgamma_singularity 2057};; 2058{ .mfi 2059 ldfpd FR_S28,FR_S26 = [GR_ad_Co7],16 2060 nop.f 0 2061 add GR_ad_LnT = 0x50,GR_ad_2 2062} 2063{ .mfi 2064 ldfpd FR_S24,FR_S22 = [GR_ad_Ce7],16 2065 nop.f 0 2066 mov GR_ExpBias = 0xffff 2067};; 2068{ .mfi 2069 ldfpd FR_S20,FR_S18 = [GR_ad_Co7],16 2070 nop.f 0 2071 shladd GR_ad_T = GR_Ind,4,GR_ad_LnT 2072} 2073{ .mfi 2074 ldfpd FR_S16,FR_S14 = [GR_ad_Ce7],16 2075 nop.f 0 2076 sub GR_Exp = GR_Exp,GR_ExpBias 2077};; 2078{ .mfi 2079 ldfe FR_S12 = [GR_ad_Co7],16 2080 nop.f 0 2081 nop.i 0 2082} 2083{ .mfi 2084 ldfe FR_S10 = [GR_ad_Ce7],16 2085 fms.s1 FR_r = FR_C,f8,f1 2086 nop.i 0 2087};; 2088{ .mmf 2089 ldfe FR_S8 = [GR_ad_Co7],16 2090 ldfe FR_S6 = [GR_ad_Ce7],16 2091 nop.f 0 2092};; 2093{ .mfi 2094 ldfe FR_S4 = [GR_ad_Co7],16 2095 fma.s1 FR_Xf2 = FR_Xf,FR_Xf,f0 2096 nop.i 0 2097} 2098{ .mfi 2099 ldfe FR_S2 = [GR_ad_Ce7],16 2100 fnma.s1 FR_InvX2 = FR_InvX,FR_NormX,f1 // NR iteration #2 2101 nop.i 0 2102};; 2103{ .mfi 2104 setf.sig FR_int_N = GR_Exp 2105 frcpa.s1 FR_InvXf,p9 = f1,FR_Xf // 1/xf 2106 nop.i 0 2107} 2108{ .mfi 2109 ldfe FR_LnSqrt2Pi = [GR_ad_1],16 2110 nop.f 0 2111 nop.i 0 2112};; 2113{ .mfi 2114 getf.exp GR_SignExp = FR_Xf 2115 nop.f 0 2116 nop.i 0 2117} 2118{ .mfi 2119 ldfe FR_W2 = [GR_ad_2],16 2120 nop.f 0 2121 nop.i 0 2122};; 2123{ .mfi 2124 getf.sig GR_Sig = FR_Xf 2125 fma.s1 FR_P54 = FR_P5,FR_r,FR_P4 2126 nop.i 0 2127} 2128{ .mfi 2129 ldfe FR_T = [GR_ad_T] 2130 fma.s1 FR_P32 = FR_P3,FR_r,FR_P2 2131 nop.i 0 2132};; 2133{ .mfi 2134 and GR_Exp = GR_SignExp,GR_ExpMask 2135 fma.s1 FR_r2 = FR_r,FR_r,f0 2136 nop.i 0 2137} 2138{ .mfi 2139 nop.m 0 2140 fms.s1 FR_Xm05 = FR_NormX,f1,FR_05 // (x-1/2) 2141 nop.i 0 2142};; 2143{ .mfi 2144 nop.m 0 2145 fma.s1 FR_InvX2 = FR_InvX,FR_InvX2,FR_InvX // NR iteration #2 2146 extr.u GR_Ind = GR_Sig,55,8 2147} 2148{ .mfi 2149 sub GR_Exp = GR_Exp,GR_ExpBias 2150 fma.s1 FR_Xf4 = FR_Xf2,FR_Xf2,f0 2151 cmp.eq p6,p0 = 0,GR_Sig 2152};; 2153{ .mfi 2154 setf.sig FR_int_Nf = GR_Exp 2155 fma.s1 FR_S28 = FR_S28,FR_Xf2,FR_S26 2156 shladd GR_ad_T = GR_Ind,4,GR_ad_LnT 2157} 2158{ .mfb 2159 nop.m 0 2160 fma.s1 FR_S24 = FR_S24,FR_Xf2,FR_S22 2161 // jump if the input argument is integer number from range (-512.0;-17.0] 2162(p6) br.cond.spnt lgamma_singularity 2163};; 2164{ .mfi 2165 getf.sig GR_Sig = FR_int_Ntrunc 2166 fma.s1 FR_S20 = FR_S20,FR_Xf2,FR_S18 2167 nop.i 0 2168} 2169{ .mfi 2170 nop.m 0 2171 fma.s1 FR_S16 = FR_S16,FR_Xf2,FR_S14 2172 nop.i 0 2173};; 2174{ .mfi 2175 ldfe FR_Tf = [GR_ad_T] 2176 fma.s1 FR_S12 = FR_S12,FR_Xf2,FR_S10 2177 nop.i 0 2178} 2179{ .mfi 2180 nop.m 0 2181 fma.s1 FR_S8 = FR_S8,FR_Xf2,FR_S6 2182 mov GR_SignOfGamma = 1 2183};; 2184{ .mfi 2185 nop.m 0 2186 fms.s1 FR_rf = FR_InvXf,FR_Xf,f1 // reduced arg rf 2187 tbit.z p8,p0 = GR_Sig,0 2188} 2189{ .mfi 2190 nop.m 0 2191 fma.s1 FR_r3 = FR_r2,FR_r,f0 2192 // set p9 if signgum is 32-bit int 2193 // set p10 if signgum is 64-bit int 2194 cmp.eq p10,p9 = 8,r34 2195};; 2196{ .mfi 2197 nop.m 0 2198 fcvt.xf FR_N = FR_int_N 2199(p8) sub GR_SignOfGamma = r0,GR_SignOfGamma 2200} 2201{ .mfi 2202 nop.m 0 2203 fnma.s1 FR_InvX = FR_InvX2,FR_NormX,f1 // NR iteration #3 2204 nop.i 0 2205};; 2206.pred.rel "mutex",p9,p10 2207{ .mfi 2208 // store sign of gamma(x) as 32-bit int 2209(p9) st4 [r33] = GR_SignOfGamma 2210 fma.s1 FR_P54 = FR_P54,FR_r2,FR_P32 2211 nop.i 0 2212} 2213{ .mfi 2214 // store sign of gamma(x) as 64-bit int 2215(p10) st8 [r33] = GR_SignOfGamma 2216 fnma.s1 FR_P10 = FR_r2,FR_05,FR_r 2217 nop.i 0 2218};; 2219{ .mfi 2220 nop.m 0 2221 fma.s1 FR_Xf8 = FR_Xf4,FR_Xf4,f0 2222 nop.i 0 2223} 2224{ .mfi 2225 nop.m 0 2226 fma.s1 FR_S28 = FR_S28,FR_Xf4,FR_S24 2227 nop.i 0 2228};; 2229{ .mfi 2230 nop.m 0 2231 fma.s1 FR_S20 = FR_S20,FR_Xf4,FR_S16 2232 nop.i 0 2233} 2234{ .mfi 2235 nop.m 0 2236 fma.s1 FR_S12 = FR_S12,FR_Xf4,FR_S8 2237 nop.i 0 2238};; 2239{ .mfi 2240 nop.m 0 2241 fma.s1 FR_rf2 = FR_rf,FR_rf,f0 2242 nop.i 0 2243} 2244{ .mfi 2245 nop.m 0 2246 fma.s1 FR_P54f = FR_P5,FR_rf,FR_P4 2247 nop.i 0 2248};; 2249{ .mfi 2250 nop.m 0 2251 fma.s1 FR_P32f = FR_P3,FR_rf,FR_P2 2252 nop.i 0 2253} 2254{ .mfi 2255 nop.m 0 2256 fma.s1 FR_InvX = FR_InvX2,FR_InvX,FR_InvX2 // NR iteration #3 2257 nop.i 0 2258};; 2259{ .mfi 2260 nop.m 0 2261 fcvt.xf FR_Nf = FR_int_Nf 2262 nop.i 0 2263} 2264{ .mfi 2265 nop.m 0 2266 fma.s1 FR_LnSqrt2Pi = FR_NormX,f1,FR_LnSqrt2Pi // x+ln(sqrt(2*Pi)) 2267 nop.i 0 2268};; 2269{ .mfi 2270 nop.m 0 2271 fma.s1 FR_P54 = FR_P54,FR_r3,FR_P10 2272 nop.i 0 2273};; 2274{ .mfi 2275 nop.m 0 2276 fma.s1 FR_S28 = FR_S28,FR_Xf8,FR_S20 2277 nop.i 0 2278};; 2279{ .mfi 2280 nop.m 0 2281 fma.s1 FR_rf3 = FR_rf2,FR_rf,f0 2282 nop.i 0 2283} 2284{ .mfi 2285 nop.m 0 2286 fnma.s1 FR_P10f = FR_rf2,FR_05,FR_rf 2287 nop.i 0 2288};; 2289{ .mfi 2290 nop.m 0 2291 fma.s1 FR_TpNxLn2 = FR_N,FR_Ln2,FR_T 2292 nop.i 0 2293} 2294{ .mfi 2295 nop.m 0 2296 fma.s1 FR_P54f = FR_P54f,FR_rf2,FR_P32f 2297 nop.i 0 2298};; 2299{ .mfi 2300 nop.m 0 2301 fma.s1 FR_InvX2 = FR_InvX,FR_InvX,f0 2302 nop.i 0 2303};; 2304{ .mfi 2305 nop.m 0 2306 fma.s1 FR_S28 = FR_S28,FR_Xf8,FR_S12 2307 nop.i 0 2308} 2309{ .mfi 2310 nop.m 0 2311 fma.s1 FR_S4 = FR_S4,FR_Xf2,FR_S2 2312 nop.i 0 2313};; 2314{ .mfi 2315 nop.m 0 2316 fma.s1 FR_P54f = FR_P54f,FR_rf3,FR_P10f 2317 nop.i 0 2318} 2319{ .mfi 2320 nop.m 0 2321 fma.s1 FR_TpNxLn2f = FR_Nf,FR_Ln2,FR_Tf 2322 nop.i 0 2323};; 2324{ .mfi 2325 nop.m 0 2326 fma.s1 FR_LnX = FR_TpNxLn2,f1,FR_P54 2327 nop.i 0 2328} 2329{ .mfi 2330 nop.m 0 2331 fma.s1 FR_W2 = FR_W4,FR_InvX2,FR_W2 2332 nop.i 0 2333};; 2334{ .mfi 2335 nop.m 0 2336 fma.s1 FR_S28 = FR_S28,FR_Xf4,FR_S4 2337 nop.i 0 2338};; 2339{ .mfi 2340 nop.m 0 2341 fma.s1 FR_LnXf = FR_TpNxLn2f,f1,FR_P54f 2342 nop.i 0 2343};; 2344{ .mfi 2345 nop.m 0 2346 fms.s1 FR_LnX = FR_LnX,FR_Xm05,FR_LnSqrt2Pi 2347 nop.i 0 2348};; 2349{ .mfi 2350 nop.m 0 2351 fma.s1 FR_LnX = FR_InvX,FR_W2,FR_LnX 2352 nop.i 0 2353};; 2354{ .mfi 2355 nop.m 0 2356 fnma.s1 FR_LnX = FR_S28,FR_Xf2,FR_LnX 2357 nop.i 0 2358};; 2359{ .mfb 2360 nop.m 0 2361 fms.d.s0 f8 = FR_LnX,f1,FR_LnXf 2362 br.ret.sptk b0 2363};; 2364// branch for calculating of ln(GAMMA(x)) for 0 <= x < 1 2365//--------------------------------------------------------------------- 2366.align 32 2367lgamma_0_1: 2368{ .mfi 2369 ldfpd FR_P5,FR_P4 = [GR_ad_1],16 2370 fms.s1 FR_x = FR_NormX,f1,f0 // x 2371 mov GR_Arg025 = 0x3FD0 2372} 2373{ .mfi 2374 ldfpd FR_P3,FR_P2 = [GR_ad_2],16 2375 nop.f 0 2376 add GR_ad_Co = 0x1C40,GR_ad_Data 2377};; 2378{ .mfi 2379 ldfe FR_Ln2 = [GR_ad_1],0x50 2380 nop.f 0 2381 // p6 if arg < 0.25 2382 cmp.lt p6,p9 = GR_Arg,GR_Arg025 2383} 2384{ .mfi 2385 add GR_ad_2 = 0x40,GR_ad_2 2386 nop.f 0 2387 mov GR_Arg075 = 0x3FE8 2388};; 2389{ .mfi 2390 ldfpd FR_Q8,FR_Q7 = [GR_ad_1],16 2391 fma.s1 FR_w2 = FR_w,FR_w,f0 2392 // p7 if 0.25 <= arg < 0.75 2393 // p8 if 0.75 <= arg < 1.0 2394(p9) cmp.lt.unc p7,p8 = GR_Arg,GR_Arg075 2395} 2396{ .mfi 2397 mov GR_Arg0875 = 0x3FEC 2398 nop.f 0 2399 sub GR_Exp = GR_Exp,GR_ExpBias 2400};; 2401{ .mfi 2402 ldfpd FR_Q6,FR_Q5 = [GR_ad_2],16 2403 nop.f 0 2404(p8) cmp.lt p9,p0 = GR_Arg,GR_Arg0875 2405} 2406{ .mfi 2407 ldfpd FR_Q4,FR_Q3 = [GR_ad_1],16 2408 nop.f 0 2409 add GR_ad_Ce = 0x60,GR_ad_Co 2410};; 2411.pred.rel "mutex",p7,p8 2412{ .mfi 2413 ldfd FR_Q2 = [GR_ad_2],16 2414 fms.s1 FR_r = FR_C,f8,f1 2415(p7) mov GR_Offs = 0xC0 2416} 2417{ .mfi 2418 setf.sig FR_int_N = GR_Exp 2419 nop.f 0 2420(p8) mov GR_Offs = 0x180 2421};; 2422.pred.rel "mutex",p6,p7 2423{ .mfi 2424(p9) add GR_ad_Co = GR_Offs,GR_ad_Co 2425(p8) fms.s1 FR_x = FR_NormX,f1,f1 // x-1 2426 nop.i 0 2427} 2428{ .mfi 2429(p9) add GR_ad_Ce = GR_Offs,GR_ad_Ce 2430(p7) fms.s1 FR_x = FR_NormX,f1,FR_LocalMin // x-LocalMin 2431 cmp.lt p10,p0 = GR_Arg,GR_Arg0875 2432};; 2433lgamma_common_0_2: 2434{ .mfi 2435 ldfpd FR_A17,FR_A16 = [GR_ad_Co],16 2436 nop.f 0 2437 nop.i 0 2438} 2439{ .mfi 2440 ldfpd FR_A15,FR_A14 = [GR_ad_Ce],16 2441 nop.f 0 2442 nop.i 0 2443};; 2444{ .mfi 2445 ldfpd FR_A13,FR_A12 = [GR_ad_Co],16 2446 nop.f 0 2447(p10) extr.u GR_Ind = GR_ArgAsIs,44,8 2448} 2449{ .mfi 2450 ldfpd FR_A11,FR_A10 = [GR_ad_Ce],16 2451 nop.f 0 2452 nop.i 0 2453};; 2454{ .mfi 2455 ldfpd FR_A9,FR_A8 = [GR_ad_Co],16 2456(p10) fnma.s1 FR_Q1 = FR_05,FR_w2,FR_w 2457 nop.i 0 2458} 2459{ .mfi 2460 ldfpd FR_A7,FR_A6 = [GR_ad_Ce],16 2461(p10) fma.s1 FR_w3 = FR_w2,FR_w,f0 2462 nop.i 0 2463};; 2464{ .mfi 2465(p10) getf.exp GR_SignExp_w = FR_w 2466(p10) fma.s1 FR_w4 = FR_w2,FR_w2,f0 2467 nop.i 0 2468} 2469{ .mfi 2470(p10) shladd GR_ad_2 = GR_Ind,4,GR_ad_2 2471(p10) fma.s1 FR_r2 = FR_r,FR_r,f0 2472 nop.i 0 2473};; 2474{ .mfi 2475(p10) ldfe FR_T = [GR_ad_2] 2476(p10) fma.s1 FR_P54 = FR_P5,FR_r,FR_P4 2477 nop.i 0 2478} 2479{ .mfi 2480 ldfe FR_A5 = [GR_ad_Co],16 2481(p10) fma.s1 FR_P32 = FR_P3,FR_r,FR_P2 2482 nop.i 0 2483};; 2484{ .mfi 2485 ldfe FR_A4 = [GR_ad_Ce],16 2486 fma.s1 FR_x2 = FR_x,FR_x,f0 2487(p10) and GR_Exp_w = GR_ExpMask, GR_SignExp_w 2488} 2489{ .mfi 2490 ldfe FR_A3 = [GR_ad_Co],16 2491 nop.f 0 2492(p10) mov GR_fff9 = 0xfff9 2493};; 2494// p13 <== large w __libm_lgamma 2495// p14 <== small w __libm_lgamma 2496{ .mfi 2497 ldfe FR_A2 = [GR_ad_Ce],16 2498(p10) fma.s1 FR_Q8 = FR_Q8,FR_w,FR_Q7 2499(p10) cmp.ge.unc p13,p14 = GR_Exp_w,GR_fff9 2500} 2501{ .mfi 2502 ldfe FR_A1 = [GR_ad_Co],16 2503(p10) fma.s1 FR_Q6 = FR_Q6,FR_w,FR_Q5 2504 nop.i 0 2505};; 2506{ .mfi 2507 ldfe FR_A0 = [GR_ad_Ce],16 2508(p10) fma.s1 FR_Q4 = FR_Q4,FR_w,FR_Q3 2509 nop.i 0 2510} 2511{ .mfi 2512 nop.m 0 2513(p10) fma.s1 FR_Q2 = FR_Q2,FR_w3,FR_Q1 2514 nop.i 0 2515};; 2516{ .mfi 2517 // set p11 if signgum is 32-bit int 2518 // set p12 if signgum is 64-bit int 2519 cmp.eq p12,p11 = 8,r34 2520(p10) fma.s1 FR_r3 = FR_r2,FR_r,f0 2521 nop.i 0 2522} 2523{ .mfi 2524 nop.m 0 2525(p10) fnma.s1 FR_P10 = FR_r2,FR_05,FR_r 2526 mov GR_SignOfGamma = 1 2527};; 2528.pred.rel "mutex",p11,p12 2529{ .mfi 2530 // store sign of gamma(x) as 32-bit int 2531(p11) st4 [r33] = GR_SignOfGamma 2532 fma.s1 FR_A17 = FR_A17,FR_x,FR_A16 2533 nop.i 0 2534} 2535{ .mfi 2536 // store sign of gamma(x) as 64-bit int 2537(p12) st8 [r33] = GR_SignOfGamma 2538 fma.s1 FR_A15 = FR_A15,FR_x,FR_A14 2539 nop.i 0 2540};; 2541{ .mfi 2542 nop.m 0 2543(p10) fcvt.xf FR_N = FR_int_N 2544 nop.i 0 2545} 2546{ .mfi 2547 nop.m 0 2548(p10) fma.s1 FR_P54 = FR_P54,FR_r2,FR_P32 2549 nop.i 0 2550};; 2551{ .mfi 2552 nop.m 0 2553 fma.s1 FR_A13 = FR_A13,FR_x,FR_A12 2554 nop.i 0 2555} 2556{ .mfi 2557 nop.m 0 2558 fma.s1 FR_A11 = FR_A11,FR_x,FR_A10 2559 nop.i 0 2560};; 2561{ .mfi 2562 nop.m 0 2563 fma.s1 FR_A9 = FR_A9,FR_x,FR_A8 2564 nop.i 0 2565} 2566{ .mfi 2567 nop.m 0 2568 fma.s1 FR_A7 = FR_A7,FR_x,FR_A6 2569 nop.i 0 2570};; 2571{ .mfi 2572 nop.m 0 2573(p10) fma.s1 FR_Qlo = FR_Q8,FR_w2,FR_Q6 2574 nop.i 0 2575} 2576{ .mfi 2577 nop.m 0 2578(p10) fma.s1 FR_w6 = FR_w3,FR_w3,f0 2579 nop.i 0 2580};; 2581{ .mfi 2582 nop.m 0 2583(p10) fma.s1 FR_Qhi = FR_Q4,FR_w4,FR_Q2 2584 nop.i 0 2585} 2586{ .mfi 2587 nop.m 0 2588 fma.s1 FR_A5 = FR_A5,FR_x,FR_A4 2589 nop.i 0 2590};; 2591{ .mfi 2592 nop.m 0 2593(p10) fma.s1 FR_TpNxLn2 = FR_N,FR_Ln2,FR_T 2594 nop.i 0 2595} 2596{ .mfi 2597 nop.m 0 2598 fma.s1 FR_A3 = FR_A3,FR_x,FR_A2 2599 nop.i 0 2600};; 2601{ .mfi 2602 nop.m 0 2603(p10) fma.s1 FR_P54 = FR_P54,FR_r3,FR_P10 2604 nop.i 0 2605} 2606{ .mfi 2607 nop.m 0 2608 fma.s1 FR_A1 = FR_A1,FR_x,FR_A0 2609 nop.i 0 2610};; 2611{ .mfi 2612 nop.m 0 2613 fma.s1 FR_A17 = FR_A17,FR_x2,FR_A15 2614 nop.i 0 2615} 2616{ .mfi 2617 nop.m 0 2618 fma.s1 FR_A13 = FR_A13,FR_x2,FR_A11 2619 nop.i 0 2620};; 2621{ .mfi 2622 nop.m 0 2623 fma.s1 FR_A9 = FR_A9,FR_x2,FR_A7 2624 nop.i 0 2625} 2626{ .mfi 2627 nop.m 0 2628 fma.s1 FR_x4 = FR_x2,FR_x2,f0 2629 nop.i 0 2630};; 2631{ .mfi 2632 nop.m 0 2633(p14) fma.s1 FR_LnX = FR_Qlo,FR_w6,FR_Qhi 2634 nop.i 0 2635};; 2636{ .mfi 2637 nop.m 0 2638 fma.s1 FR_A5 = FR_A5,FR_x2,FR_A3 2639 nop.i 0 2640};; 2641{ .mfi 2642 nop.m 0 2643(p13) fma.s1 FR_LnX = FR_TpNxLn2,f1,FR_P54 2644 nop.i 0 2645};; 2646{ .mfi 2647 nop.m 0 2648 fma.s1 FR_A17 = FR_A17,FR_x4,FR_A13 2649 nop.i 0 2650} 2651{ .mfi 2652 nop.m 0 2653 fma.s1 FR_x8 = FR_x4,FR_x4,f0 2654 nop.i 0 2655};; 2656{ .mfi 2657 nop.m 0 2658 fma.s1 FR_A9 = FR_A9,FR_x4,FR_A5 2659 nop.i 0 2660};; 2661{ .mfi 2662 nop.m 0 2663 fma.s1 FR_A17 = FR_A17,FR_x8,FR_A9 2664 nop.i 0 2665};; 2666{ .mfi 2667 nop.m 0 2668(p10) fms.s1 FR_A1 = FR_A1,f1,FR_LnX 2669 nop.i 0 2670};; 2671{ .mfb 2672 nop.m 0 2673 fma.d.s0 f8 = FR_A17,FR_x2,FR_A1 2674 br.ret.sptk b0 2675};; 2676// branch for calculating of ln(GAMMA(x)) for 1.0 <= x < 2.25 2677//--------------------------------------------------------------------- 2678.align 32 2679lgamma_1_2: 2680{ .mfi 2681 add GR_ad_Co = 0x10B0,GR_ad_1 2682 fcmp.eq.s1 p12,p0 = f1,FR_w 2683 mov GR_Arg125 = 0x3FF4 2684} 2685{ .mfi 2686 add GR_ad_Ce = 0x1110,GR_ad_1 2687 nop.f 0 2688 mov GR_Arg175 = 0x3FFC 2689};; 2690{ .mfi 2691 mov GR_SignOfGamma = 1 2692 fcmp.eq.s1 p13,p0 = f1,FR_NormX 2693 cmp.lt p6,p9 = GR_Arg,GR_Arg125 // 1.0 <= x < 1.25 2694} 2695{ .mfi 2696 // set p10 if signgum is 32-bit int 2697 // set p11 if signgum is 64-bit int 2698 cmp.eq p11,p10 = 8,r34 2699 nop.f 0 2700 cmp.ge p8,p0 = GR_Arg,GR_Arg175 // x >= 1.75 2701};; 2702.pred.rel "mutex",p10,p11 2703{ .mfi 2704 // store sign of gamma(x) as 32-bit int 2705(p10) st4 [r33] = GR_SignOfGamma 2706(p12) fma.d.s0 f8 = f0,f0,f0 2707(p9) cmp.lt.unc p7,p0 = GR_Arg,GR_Arg175 // 1.25 <= x < 1.75 2708} 2709{ .mib 2710 // store sign of gamma(x) as 64-bit int 2711(p11) st8 [r33] = GR_SignOfGamma 2712 mov GR_Offs = 0 2713(p12) br.ret.spnt b0 // fast exit for 2.0 2714};; 2715.pred.rel "mutex",p7,p8 2716{ .mfi 2717(p7) mov GR_Offs = 0xC0 2718(p7) fms.s1 FR_x = FR_w,f1,FR_LocalMin 2719 nop.i 0 2720} 2721{ .mfb 2722(p8) mov GR_Offs = 0x180 2723(p13) fma.d.s0 f8 = f0,f0,f0 2724(p13) br.ret.spnt b0 // fast exit for 1.0 2725};; 2726.pred.rel "mutex",p6,p8 2727{ .mfi 2728 add GR_ad_Co = GR_ad_Co,GR_Offs 2729(p8) fms.s1 FR_x = FR_w,f1,f1 2730 cmp.eq p0,p10 = r0,r0 2731} 2732{ .mfb 2733 add GR_ad_Ce = GR_ad_Ce,GR_Offs 2734(p6) fma.s1 FR_x = f0,f0,FR_w 2735 br.cond.sptk lgamma_common_0_2 2736};; 2737// branch for calculating of ln(GAMMA(x)) for -17 < x < 0 2738//--------------------------------------------------------------------- 2739.align 32 2740lgamma_negrecursion: 2741{ .mfi 2742 getf.d GR_ArgXfrAsIs = FR_Xfr 2743 fma.s1 FR_Xp2 = FR_2,f1,FR_NormX 2744 mov GR_Arg05 = 0x3FE 2745} 2746{ .mfi 2747 add GR_ad_Roots = 0x1390,GR_ad_1 2748 fma.s1 FR_NormX = FR_NormX,FR_Xfr,f0 2749 mov GR_Arg075 = 0x3FE8 2750};; 2751{ .mfi 2752 getf.sig GR_Sig = FR_int_Ntrunc 2753 fma.s1 FR_Xp3 = FR_2,f1,FR_Xp1 2754 shl GR_Arg05 = GR_Arg05,52 2755} 2756{ .mfi 2757 mov GR_Arg025 = 0x3FD0 2758 fma.s1 FR_Xp6 = FR_5,f1,FR_Xp1 2759 add GR_ad_Co = 0x1C40,GR_ad_Data 2760};; 2761{ .mfi 2762 add GR_ad_Dx = 8,GR_ad_Roots 2763 fma.s1 FR_Xp7 = FR_2,f1,FR_Xp5 2764 shr.u GR_ArgXfr = GR_ArgXfrAsIs,48 2765} 2766{ .mfi 2767 add GR_ad_Ce = 0x60,GR_ad_Co 2768 fma.s1 FR_Xp8 = FR_3,f1,FR_Xp5 2769 cmp.lt p6,p0 = GR_ArgXfrAsIs,GR_Arg05 2770};; 2771{ .mfi 2772 and GR_RootInd = 0xF,GR_Sig 2773 fma.s1 FR_Xp9 = FR_2,FR_2,FR_Xp5 2774 // p10 if arg < 0.25 2775 cmp.lt p10,p14 = GR_ArgXfr,GR_Arg025 2776} 2777{ .mfi 2778(p6) add GR_ad_Roots = 0x120,GR_ad_Roots 2779 fma.s1 FR_Xp11 = f1,f1,FR_Xp10 2780(p6) add GR_ad_Dx = 0x120,GR_ad_Dx 2781};; 2782{ .mfi 2783 shladd GR_ad_Root = GR_RootInd,4,GR_ad_Roots 2784 fma.s1 FR_Xp12 = FR_2,f1,FR_Xp10 2785 // p11 if 0.25 <= arg < 0.75 2786 // p12 if 0.75 <= arg < 1.0 2787(p14) cmp.lt.unc p11,p12 = GR_ArgXfr,GR_Arg075 2788} 2789{ .mfi 2790 shladd GR_ad_Dx = GR_RootInd,4,GR_ad_Dx 2791 fma.s1 FR_Xp13 = FR_3,f1,FR_Xp10 2792 cmp.eq p0,p13 = 0,GR_Sig 2793};; 2794{ .mfi 2795 ld8 GR_Root = [GR_ad_Root] 2796 fma.s1 FR_Xp14 = FR_2,FR_2,FR_Xp10 2797(p12) mov GR_Offs = 0x180 2798} 2799{ .mfi 2800 ldfd FR_Root = [GR_ad_Root] 2801 fma.s1 FR_Xp15 = FR_5,f1,FR_Xp10 2802 and GR_Sig = 0xF,GR_Sig 2803};; 2804{ .mfi 2805 ld8 GR_Dx = [GR_ad_Dx] 2806 fma.s1 FR_Xp16 = FR_3,FR_2,FR_Xp10 2807(p13) cmp.ge.unc p6,p0 = 0xD,GR_Sig 2808} 2809{ .mfi 2810(p11) mov GR_Offs = 0xC0 2811(p13) fma.s1 FR_NormX = FR_NormX,FR_Xp1,f0 2812(p13) cmp.ge.unc p7,p0 = 0xB,GR_Sig 2813};; 2814{ .mfi 2815(p14) add GR_ad_Co = GR_Offs,GR_ad_Co 2816(p6) fma.s1 FR_Xp2 = FR_Xp2,FR_Xp3,f0 2817(p13) cmp.ge.unc p8,p0 = 0x9,GR_Sig 2818} 2819{ .mfi 2820(p14) add GR_ad_Ce = GR_Offs,GR_ad_Ce 2821(p7) fma.s1 FR_Xp4 = FR_Xp4,FR_Xp5,f0 2822(p13) cmp.ge.unc p9,p0 = 0x7,GR_Sig 2823};; 2824{ .mfi 2825 ldfpd FR_B17,FR_B16 = [GR_ad_Co],16 2826(p8) fma.s1 FR_Xp6 = FR_Xp6,FR_Xp7,f0 2827(p13) cmp.ge.unc p6,p0 = 0x5,GR_Sig 2828} 2829{ .mfi 2830 ldfpd FR_B15,FR_B14 = [GR_ad_Ce],16 2831(p9) fma.s1 FR_Xp8 = FR_Xp8,FR_Xp9,f0 2832(p13) cmp.ge.unc p7,p0 = 0x3,GR_Sig 2833};; 2834{ .mfi 2835 ldfpd FR_B13,FR_B12 = [GR_ad_Co],16 2836(p6) fma.s1 FR_Xp10 = FR_Xp10,FR_Xp11,f0 2837(p13) cmp.ge.unc p8,p0 = 0x1,GR_Sig 2838} 2839{ .mfi 2840 ldfpd FR_B11,FR_B10 = [GR_ad_Ce],16 2841(p7) fma.s1 FR_Xp12 = FR_Xp12,FR_Xp13,f0 2842(p13) cmp.eq.unc p9,p0 = 0,GR_Sig 2843};; 2844{ .mfi 2845 ldfpd FR_B9,FR_B8 = [GR_ad_Co],16 2846(p8) fma.s1 FR_Xp14 = FR_Xp14,FR_Xp15,f0 2847 mov GR_Arg15 = 0xC02E // -15 2848} 2849{ .mfi 2850 ldfpd FR_B7,FR_B6 = [GR_ad_Ce],16 2851 fcmp.eq.s1 p15,p0 = f0,FR_Xf 2852(p13) cmp.ge.unc p6,p0 = 0xC,GR_Sig 2853};; 2854{ .mfi 2855 ldfe FR_B5 = [GR_ad_Co],16 2856(p9) fma.s1 FR_NormX = FR_NormX,FR_Xp16,f0 2857 sub GR_Root = GR_ArgAsIs,GR_Root 2858} 2859{ .mfi 2860 sub GR_RootInd = 0xE,GR_RootInd 2861(p11) fms.s1 FR_x = FR_Xfr,f1,FR_LocalMin // x-LocalMin 2862(p13) cmp.ge.unc p7,p0 = 0x8,GR_Sig 2863};; 2864.pred.rel "mutex",p10,p12 2865{ .mfi 2866 ldfe FR_B4 = [GR_ad_Ce],16 2867(p10) fms.s1 FR_x = FR_Xfr,f1,f0 // x 2868 add GR_Root = GR_Root,GR_Dx 2869} 2870{ .mfb 2871 cmp.gtu p14,p0 = 0xE,GR_RootInd 2872(p12) fms.s1 FR_x = FR_Xfr,f1,f1 // x-1 2873(p15) br.cond.spnt lgamma_singularity 2874};; 2875{ .mfi 2876 ldfe FR_B3 = [GR_ad_Co],16 2877(p6) fma.s1 FR_Xp2 = FR_Xp2,FR_Xp4,f0 2878(p14) cmp.lt.unc p11,p0 = GR_Arg,GR_Arg15 2879} 2880{ .mfi 2881 ldfe FR_B2 = [GR_ad_Ce],16 2882(p7) fma.s1 FR_Xp6 = FR_Xp6,FR_Xp8,f0 2883 add GR_2xDx = GR_Dx,GR_Dx 2884};; 2885{ .mfi 2886 ldfe FR_B1 = [GR_ad_Co],16 2887 fms.s1 FR_r = f8,f1,FR_Root 2888(p13) cmp.ge.unc p6,p0 = 0x4,GR_Sig 2889} 2890{ .mib 2891 ldfe FR_B0 = [GR_ad_Ce],16 2892(p11) cmp.leu.unc p10,p0 = GR_Root,GR_2xDx 2893(p10) br.cond.spnt lgamma_negroots 2894};; 2895{ .mfi 2896 ldfpd FR_P5,FR_P4 = [GR_ad_1],16 2897(p6) fma.s1 FR_Xp10 = FR_Xp10,FR_Xp12,f0 2898 tbit.z p14,p15 = GR_Sig,0 2899} 2900{ .mfi 2901 ldfpd FR_P3,FR_P2 = [GR_ad_2],16 2902 fnma.d.s0 FR_T = f1,f1,f8 // nop.f 0 2903 2904(p13) cmp.ge.unc p7,p0 = 0x2,GR_Sig 2905};; 2906{ .mfi 2907 ldfe FR_Ln2 = [GR_ad_1],0x50 2908(p7) fma.s1 FR_NormX = FR_NormX,FR_Xp14,f0 2909 mov GR_PseudoRoot = 0xBFFBC 2910} 2911{ .mlx 2912 add GR_ad_2 = 0x40,GR_ad_2 2913 movl GR_2xDx = 0x00002346DC5D6389 2914};; 2915{ .mfi 2916 ldfpd FR_Q8,FR_Q7 = [GR_ad_1],16 2917 fma.s1 FR_x2 = FR_x,FR_x,f0 2918 shl GR_PseudoRoot = GR_PseudoRoot,44 2919} 2920{ .mfi 2921 ldfpd FR_Q6,FR_Q5 = [GR_ad_2],16 2922 fma.s1 FR_B17 = FR_B17,FR_x,FR_B16 2923(p13) cmp.ge.unc p6,p0 = 0xA,GR_Sig 2924};; 2925{ .mfi 2926 ldfpd FR_Q4,FR_Q3 = [GR_ad_1],16 2927(p6) fma.s1 FR_Xp2 = FR_Xp2,FR_Xp6,f0 2928 sub GR_PseudoRoot = GR_ArgAsIs,GR_PseudoRoot 2929} 2930{ .mfi 2931 ldfpd FR_Q2,FR_Q1 = [GR_ad_2],16 2932 fma.s1 FR_B15 = FR_B15,FR_x,FR_B14 2933(p13) cmp.ge.unc p7,p0 = 0x6,GR_Sig 2934};; 2935{ .mfi 2936 add GR_ad_Co = 0x12F0,GR_ad_2 2937 fma.s1 FR_B13 = FR_B13,FR_x,FR_B12 2938 cmp.leu.unc p10,p0 = GR_PseudoRoot,GR_2xDx 2939} 2940{ .mfi 2941 add GR_ad_Ce = 0x1300,GR_ad_2 2942 fma.s1 FR_B11 = FR_B11,FR_x,FR_B10 2943 mov GR_ExpMask = 0x1ffff 2944};; 2945{ .mfi 2946(p10) ldfe FR_PR01 = [GR_ad_Co],0xF0 2947 fma.s1 FR_B9 = FR_B9,FR_x,FR_B8 2948 mov GR_ExpBias = 0xFFFF 2949} 2950{ .mfb 2951(p10) ldfe FR_PR11 = [GR_ad_Ce],0xF0 2952 fma.s1 FR_B7 = FR_B7,FR_x,FR_B6 2953(p10) br.cond.spnt lgamma_pseudoroot 2954};; 2955{ .mfi 2956(p13) cmp.ge.unc p6,p0 = 0xE,GR_Sig 2957(p7) fma.s1 FR_NormX = FR_NormX,FR_Xp10,f0 2958 tbit.z.unc p8,p0 = GR_Sig,0 2959} 2960{ .mfi 2961 mov GR_SignOfGamma = 1 2962 fma.s1 FR_B5 = FR_B5,FR_x,FR_B4 2963 // set p9 if signgum is 32-bit int 2964 // set p10 if signgum is 64-bit int 2965 cmp.eq p10,p9 = 8,r34 2966};; 2967{ .mfi 2968 nop.m 0 2969 fma.s1 FR_B3 = FR_B3,FR_x,FR_B2 2970(p8) sub GR_SignOfGamma = r0,GR_SignOfGamma 2971} 2972{ .mfi 2973 nop.m 0 2974(p14) fms.s1 FR_w = f0,f0,f1 2975 nop.i 0 2976};; 2977.pred.rel "mutex",p9,p10 2978{ .mfi 2979 // store sign of gamma(x) as 32-bit int 2980(p9) st4 [r33] = GR_SignOfGamma 2981 fma.s1 FR_B1 = FR_B1,FR_x,FR_B0 2982 nop.i 0 2983} 2984{ .mfi 2985 // store sign of gamma(x) as 64-bit int 2986(p10) st8 [r33] = GR_SignOfGamma 2987 fma.s1 FR_B17 = FR_B17,FR_x2,FR_B15 2988 nop.i 0 2989};; 2990{ .mfi 2991 nop.m 0 2992 fma.s1 FR_B13 = FR_B13,FR_x2,FR_B11 2993 nop.i 0 2994};; 2995{ .mfi 2996 nop.m 0 2997 fma.s1 FR_B9 = FR_B9,FR_x2,FR_B7 2998 nop.i 0 2999} 3000{ .mfi 3001 nop.m 0 3002 fma.s1 FR_x4 = FR_x2,FR_x2,f0 3003 nop.i 0 3004};; 3005{ .mfi 3006 nop.m 0 3007(p6) fma.s1 FR_NormX = FR_NormX,FR_Xp2,f0 3008 nop.i 0 3009};; 3010{ .mfi 3011 nop.m 0 3012 fma.s1 FR_B5 = FR_B5,FR_x2,FR_B3 3013 nop.i 0 3014};; 3015{ .mfi 3016 nop.m 0 3017 fma.s1 FR_B17 = FR_B17,FR_x4,FR_B13 3018 nop.i 0 3019} 3020{ .mfi 3021 nop.m 0 3022 fma.s1 FR_x8 = FR_x4,FR_x4,f0 3023 nop.i 0 3024};; 3025.pred.rel "mutex",p14,p15 3026{ .mfi 3027 nop.m 0 3028(p15) fms.s1 FR_w = FR_NormX,f1,f1 3029 nop.i 0 3030} 3031{ .mfi 3032 nop.m 0 3033(p14) fnma.s1 FR_w = FR_NormX,f1,FR_w 3034 nop.i 0 3035};; 3036{ .mfi 3037 nop.m 0 3038 fma.s1 FR_B9 = FR_B9,FR_x4,FR_B5 3039 nop.i 0 3040};; 3041{ .mfi 3042 nop.m 0 3043 frcpa.s1 FR_C,p0 = f1,FR_NormX 3044 nop.i 0 3045};; 3046{ .mfi 3047 getf.exp GR_Exp = FR_NormX 3048 nop.f 0 3049 nop.i 0 3050};; 3051{ .mfi 3052 getf.d GR_ArgAsIs = FR_NormX 3053 nop.f 0 3054 nop.i 0 3055};; 3056{ .mfi 3057 nop.m 0 3058 fma.s1 FR_w2 = FR_w,FR_w,f0 3059 nop.i 0 3060} 3061{ .mfi 3062 and GR_Exp = GR_Exp,GR_ExpMask 3063 fma.s1 FR_Q8 = FR_Q8,FR_w,FR_Q7 3064 nop.i 0 3065};; 3066{ .mfi 3067 sub GR_Exp = GR_Exp,GR_ExpBias 3068 fma.s1 FR_B17 = FR_B17,FR_x8,FR_B9 3069 extr.u GR_Ind = GR_ArgAsIs,44,8 3070} 3071{ .mfi 3072 nop.m 0 3073 fma.s1 FR_Q6 = FR_Q6,FR_w,FR_Q5 3074 nop.i 0 3075};; 3076{ .mfi 3077 setf.sig FR_int_N = GR_Exp 3078 fms.s1 FR_r = FR_C,FR_NormX,f1 3079 nop.i 0 3080} 3081{ .mfi 3082 shladd GR_ad_2 = GR_Ind,4,GR_ad_2 3083 nop.f 0 3084 nop.i 0 3085};; 3086{ .mfi 3087 getf.exp GR_SignExp_w = FR_w 3088 fma.s1 FR_Q4 = FR_Q4,FR_w,FR_Q3 3089 nop.i 0 3090} 3091{ .mfi 3092 ldfe FR_T = [GR_ad_2] 3093 nop.f 0 3094 nop.i 0 3095};; 3096{ .mfi 3097 and GR_Exp_w = GR_ExpMask, GR_SignExp_w 3098 fnma.s1 FR_Q1 = FR_05,FR_w2,FR_w 3099 mov GR_fff9 = 0xfff9 3100} 3101{ .mfi 3102 nop.m 0 3103 fma.s1 FR_w3 = FR_w2,FR_w,f0 3104 nop.i 0 3105};; 3106{ .mfi 3107 nop.m 0 3108 fma.s1 FR_w4 = FR_w2,FR_w2,f0 3109// p13 <== large w __libm_lgamma 3110// p14 <== small w __libm_lgamma 3111 cmp.ge p13,p14 = GR_Exp_w,GR_fff9 3112} 3113{ .mfi 3114 nop.m 0 3115 fma.s1 FR_Qlo = FR_Q8,FR_w2,FR_Q6 3116 nop.i 0 3117};; 3118{ .mfi 3119 nop.m 0 3120(p13) fma.s1 FR_r2 = FR_r,FR_r,f0 3121 nop.i 0 3122} 3123{ .mfi 3124 nop.m 0 3125 fma.s1 FR_B17 = FR_B17,FR_x2,FR_B1 3126 nop.i 0 3127};; 3128{ .mfi 3129 nop.m 0 3130(p13) fma.s1 FR_P32 = FR_P3,FR_r,FR_P2 3131 nop.i 0 3132} 3133{ .mfi 3134 nop.m 0 3135(p13) fma.s1 FR_P54 = FR_P5,FR_r,FR_P4 3136 nop.i 0 3137};; 3138{ .mfi 3139 nop.m 0 3140(p14) fma.s1 FR_Q2 = FR_Q2,FR_w3,FR_Q1 3141 nop.i 0 3142} 3143{ .mfi 3144 nop.m 0 3145(p14) fma.s1 FR_w6 = FR_w3,FR_w3,f0 3146 nop.i 0 3147};; 3148{ .mfi 3149 nop.m 0 3150(p13) fcvt.xf FR_N = FR_int_N 3151 nop.i 0 3152};; 3153{ .mfi 3154 nop.m 0 3155(p13) fma.s1 FR_r3 = FR_r2,FR_r,f0 3156 nop.i 0 3157} 3158{ .mfi 3159 nop.m 0 3160(p13) fnma.s1 FR_P10 = FR_r2,FR_05,FR_r 3161 nop.i 0 3162};; 3163{ .mfi 3164 nop.m 0 3165(p13) fma.s1 FR_P54 = FR_P54,FR_r2,FR_P32 3166 nop.i 0 3167};; 3168{ .mfi 3169 nop.m 0 3170(p14) fma.s1 FR_Qhi = FR_Q4,FR_w4,FR_Q2 3171 nop.i 0 3172} 3173{ .mfi 3174 nop.m 0 3175(p14) fnma.s1 FR_Qlo = FR_Qlo,FR_w6,FR_B17 3176 nop.i 0 3177};; 3178{ .mfi 3179 nop.m 0 3180(p13) fma.s1 FR_TpNxLn2 = FR_N,FR_Ln2,FR_T 3181 nop.i 0 3182};; 3183{ .mfi 3184 nop.m 0 3185(p13) fma.s1 FR_P54 = FR_P54,FR_r3,FR_P10 3186 nop.i 0 3187};; 3188.pred.rel "mutex",p13,p14 3189{ .mfi 3190 nop.m 0 3191(p14) fms.d.s0 f8 = FR_Qlo,f1,FR_Qhi 3192 nop.i 0 3193} 3194{ .mfi 3195 nop.m 0 3196(p13) fma.s1 FR_LnX = FR_TpNxLn2,f1,FR_P54 3197 nop.i 0 3198};; 3199{ .mfb 3200 nop.m 0 3201(p13) fms.d.s0 f8 = FR_B17,f1,FR_LnX 3202 br.ret.sptk b0 3203};; 3204// branch for calculating of ln(GAMMA(x)) near negative roots 3205//--------------------------------------------------------------------- 3206.align 32 3207lgamma_negroots: 3208{ .mfi 3209 shladd GR_Offs = GR_RootInd,3,r0 //GR_RootInd*8 3210 fma.s1 FR_r2 = FR_r,FR_r,f0 3211 add GR_ad_Co = 0x15C0,GR_ad_1//0x1590,GR_ad_1 3212} 3213{ .mfi 3214 add GR_ad_Ce = 0x1610,GR_ad_1//0x15E0,GR_ad_1 3215 nop.f 0 3216 cmp.lt p6,p0 = GR_ArgXfrAsIs,GR_Arg05 3217};; 3218{ .mfi 3219 add GR_ad_Roots = 0x10A0,GR_ad_1 3220 nop.f 0 3221(p6) add GR_ad_Co = 0x820,GR_ad_Co 3222} 3223{ .mfi 3224(p6) add GR_ad_Ce = 0x820,GR_ad_Ce 3225 nop.f 0 3226 shladd GR_Offs = GR_RootInd,1,GR_Offs //GR_RootInd*10 3227};; 3228{ .mmi 3229 shladd GR_ad_Co = GR_Offs,4,GR_ad_Co 3230 shladd GR_ad_Ce = GR_Offs,4,GR_ad_Ce 3231 cmp.eq p8,p7 = r0,r0 3232};; 3233{ .mmi 3234 ldfpd FR_A15,FR_A14 = [GR_ad_Co],16 3235 ldfpd FR_A13,FR_A12 = [GR_ad_Ce],16 3236 mov GR_SignOfGamma = 1 3237};; 3238{ .mmi 3239 ldfpd FR_A11,FR_A10 = [GR_ad_Co],16 3240 ldfpd FR_A9,FR_A8 = [GR_ad_Ce],16 3241(p6) cmp.eq p7,p8 = r0,GR_RootInd 3242};; 3243{ .mmi 3244 ldfpd FR_A7,FR_A6 = [GR_ad_Co],16 3245 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16 3246 tbit.z p11,p0 = GR_Sig,0 3247};; 3248{ .mmi 3249 ldfe FR_A3 = [GR_ad_Co],16 3250 ldfe FR_A2 = [GR_ad_Ce],16 3251 // set p9 if signgum is 32-bit int 3252 // set p10 if signgum is 64-bit int 3253 cmp.eq p10,p9 = 8,r34 3254};; 3255{ .mmi 3256 ldfe FR_A1 = [GR_ad_Co],16 3257 ldfe FR_A0 = [GR_ad_Ce],16 3258(p11) sub GR_SignOfGamma = r0,GR_SignOfGamma 3259};; 3260{ .mfi 3261 ldfe FR_A00 = [GR_ad_Roots] 3262 fma.s1 FR_r4 = FR_r2,FR_r2,f0 3263 nop.i 0 3264};; 3265{ .mfi 3266 nop.m 0 3267 fma.s1 FR_A15 = FR_A15,FR_r,FR_A14 3268 nop.i 0 3269} 3270{ .mfi 3271 nop.m 0 3272 fma.s1 FR_A13 = FR_A13,FR_r,FR_A12 3273 nop.i 0 3274};; 3275.pred.rel "mutex",p9,p10 3276{ .mfi 3277 // store sign of gamma(x) as 32-bit int 3278(p9) st4 [r33] = GR_SignOfGamma 3279 fma.s1 FR_A11 = FR_A11,FR_r,FR_A10 3280 nop.i 0 3281} 3282{ .mfi 3283 // store sign of gamma(x) as 64-bit int 3284(p10) st8 [r33] = GR_SignOfGamma 3285 fma.s1 FR_A9 = FR_A9,FR_r,FR_A8 3286 nop.i 0 3287};; 3288{ .mfi 3289 nop.m 0 3290 fma.s1 FR_A7 = FR_A7,FR_r,FR_A6 3291 nop.i 0 3292} 3293{ .mfi 3294 nop.m 0 3295 fma.s1 FR_A5 = FR_A5,FR_r,FR_A4 3296 nop.i 0 3297};; 3298{ .mfi 3299 nop.m 0 3300 fma.s1 FR_A3 = FR_A3,FR_r,FR_A2 3301 nop.i 0 3302} 3303{ .mfi 3304 nop.m 0 3305 fma.s1 FR_r8 = FR_r4,FR_r4,f0 3306 nop.i 0 3307};; 3308{ .mfi 3309 nop.m 0 3310 fma.s1 FR_A1 = FR_A1,FR_r,FR_A0 3311 nop.i 0 3312};; 3313{ .mfi 3314 nop.m 0 3315 fma.s1 FR_A15 = FR_A15,FR_r2,FR_A13 3316 nop.i 0 3317};; 3318{ .mfi 3319 nop.m 0 3320 fma.s1 FR_A11 = FR_A11,FR_r2,FR_A9 3321 nop.i 0 3322};; 3323{ .mfi 3324 nop.m 0 3325 fma.s1 FR_A7 = FR_A7,FR_r2,FR_A5 3326 nop.i 0 3327};; 3328{ .mfi 3329 nop.m 0 3330 fma.s1 FR_A3 = FR_A3,FR_r2,FR_A1 3331 nop.i 0 3332};; 3333{ .mfi 3334 nop.m 0 3335 fma.s1 FR_A15 = FR_A15,FR_r4,FR_A11 3336 nop.i 0 3337};; 3338{ .mfi 3339 nop.m 0 3340 fma.s1 FR_A7 = FR_A7,FR_r4,FR_A3 3341 nop.i 0 3342};; 3343.pred.rel "mutex",p7,p8 3344{ .mfi 3345 nop.m 0 3346(p7) fma.s1 FR_A1 = FR_A15,FR_r8,FR_A7 3347 nop.i 0 3348} 3349{ .mfi 3350 nop.m 0 3351(p8) fma.d.s0 f8 = FR_A15,FR_r8,FR_A7 3352 nop.i 0 3353};; 3354{ .mfb 3355 nop.m 0 3356(p7) fma.d.s0 f8 = FR_A1,FR_r,FR_A00 3357 br.ret.sptk b0 3358};; 3359// branch for handling pseudo root on (-2;-1) 3360//--------------------------------------------------------------------- 3361.align 32 3362lgamma_pseudoroot: 3363{ .mmi 3364 ldfe FR_PR21 = [GR_ad_Co],32 3365 ldfe FR_PR31 = [GR_ad_Ce],32 3366 // set p9 if signgum is 32-bit int 3367 // set p10 if signgum is 64-bit int 3368 cmp.eq p10,p9 = 8,r34 3369};; 3370{ .mmi 3371 ldfe FR_PR00 = [GR_ad_Co],32 3372 ldfe FR_PR10 = [GR_ad_Ce],0xF0 3373 mov GR_SignOfGamma = 1 3374};; 3375{ .mmi 3376 ldfe FR_PR20 = [GR_ad_Co],0xF0 3377 ldfe FR_PR30 = [GR_ad_Ce] 3378 tbit.z p8,p0 = GR_Sig,0 3379};; 3380{ .mfi 3381 ldfe FR_PRN = [GR_ad_Co] 3382 fma.s1 FR_PR01 = f8,f1,FR_PR01 3383 nop.i 0 3384} 3385{ .mfi 3386 nop.m 0 3387 fma.s1 FR_PR11 = f8,f1,FR_PR11 3388(p8) sub GR_SignOfGamma = r0,GR_SignOfGamma 3389};; 3390.pred.rel "mutex",p9,p10 3391{ .mfi 3392 // store sign of gamma(x) as 32-bit int 3393(p9) st4 [r33] = GR_SignOfGamma 3394 fma.s1 FR_PR21 = f8,f1,FR_PR21 3395 nop.i 0 3396} 3397{ .mfi 3398 // store sign of gamma(x) as 64-bit int 3399(p10) st8 [r33] = GR_SignOfGamma 3400 fma.s1 FR_PR31 = f8,f1,FR_PR31 3401 nop.i 0 3402};; 3403{ .mfi 3404 nop.m 0 3405 fma.s1 FR_PR01 = f8,FR_PR01,FR_PR00 3406 nop.i 0 3407} 3408{ .mfi 3409 nop.m 0 3410 fma.s1 FR_PR11 = f8,FR_PR11,FR_PR10 3411 nop.i 0 3412};; 3413{ .mfi 3414 nop.m 0 3415 fma.s1 FR_PR21 = f8,FR_PR21,FR_PR20 3416 nop.i 0 3417} 3418{ .mfi 3419 nop.m 0 3420 fma.s1 FR_PR31 = f8,FR_PR31,FR_PR30 3421 nop.i 0 3422};; 3423{ .mfi 3424 nop.m 0 3425 fma.s1 FR_PR01 = FR_PR11,FR_PR01,f0 3426 nop.i 0 3427};; 3428{ .mfi 3429 nop.m 0 3430 fma.s1 FR_PR21 = FR_PR31,FR_PR21,f0 3431 nop.i 0 3432};; 3433{ .mfi 3434 nop.m 0 3435 fma.s1 FR_PR01 = FR_PR21,FR_PR01,f0 3436 nop.i 0 3437};; 3438{ .mfb 3439 nop.m 0 3440 fma.d.s0 f8 = FR_PR01,FR_PRN,f0 3441 br.ret.sptk b0 3442};; 3443// branch for handling +/-0, NaT, QNaN, +/-INF and denormalised numbers 3444//--------------------------------------------------------------------- 3445.align 32 3446lgamma_spec: 3447{ .mfi 3448 getf.exp GR_SignExp = FR_NormX 3449 fclass.m p6,p0 = f8,0x21 // is arg +INF? 3450 mov GR_SignOfGamma = 1 3451};; 3452{ .mfi 3453 getf.sig GR_ArgAsIs = FR_NormX 3454 fclass.m p7,p0 = f8,0xB // is x deno? 3455 // set p11 if signgum is 32-bit int 3456 // set p12 if signgum is 64-bit int 3457 cmp.eq p12,p11 = 8,r34 3458};; 3459.pred.rel "mutex",p11,p12 3460{ .mfi 3461 // store sign of gamma(x) as 32-bit int 3462(p11) st4 [r33] = GR_SignOfGamma 3463 fclass.m p8,p0 = f8,0x1C0 // is arg NaT or NaN? 3464 dep.z GR_Ind = GR_SignExp,8,4 3465} 3466{ .mib 3467 // store sign of gamma(x) as 64-bit int 3468(p12) st8 [r33] = GR_SignOfGamma 3469 cmp.lt p10,p0 = GR_SignExp,GR_ExpBias 3470(p6) br.ret.spnt b0 // exit for +INF 3471};; 3472{ .mfi 3473 and GR_Exp = GR_SignExp,GR_ExpMask 3474 fclass.m p9,p0 = f8,0x22 // is arg -INF? 3475 nop.i 0 3476};; 3477{ .mfi 3478 add GR_ad_Co = GR_Ind,GR_ad_Data 3479(p7) fma.s0 FR_tmp = f8,f8,f8 3480 extr.u GR_ArgAsIs = GR_ArgAsIs,11,52 3481} 3482{ .mfb 3483 nop.m 0 3484(p8) fms.d.s0 f8 = f8,f1,f8 3485(p8) br.ret.spnt b0 // exit for NaT and NaN 3486};; 3487{ .mib 3488 nop.m 0 3489 shr.u GR_Arg = GR_ArgAsIs,48 3490(p7) br.cond.sptk lgamma_common 3491};; 3492{ .mfb 3493 nop.m 0 3494(p9) fmerge.s f8 = f1,f8 3495(p9) br.ret.spnt b0 // exit -INF 3496};; 3497// branch for handling negative integers and +/-0 3498//--------------------------------------------------------------------- 3499.align 32 3500lgamma_singularity: 3501{ .mfi 3502 mov GR_ad_SignGam = r33 3503 fclass.m p6,p0 = f8, 0x6 // is x -0? 3504 mov GR_SignOfGamma = 1 3505} 3506{ .mfi 3507 // set p9 if signgum is 32-bit int 3508 // set p10 if signgum is 64-bit int 3509 cmp.eq p10,p9 = 8,r34 3510 fma.s1 FR_X = f0,f0,f8 3511 nop.i 0 3512};; 3513{ .mfi 3514 nop.m 0 3515 frcpa.s0 f8,p0 = f1,f0 3516 mov GR_TAG = 106 // negative 3517} 3518{ .mib 3519 nop.m 0 3520(p6) sub GR_SignOfGamma = r0,GR_SignOfGamma 3521 br.cond.sptk lgamma_libm_err 3522};; 3523// overflow (x > OVERFLOV_BOUNDARY) 3524//--------------------------------------------------------------------- 3525.align 32 3526lgamma_overflow: 3527{ .mfi 3528 mov GR_SignOfGamma = 1 3529 nop.f 0 3530 mov r8 = 0x1FFFE 3531};; 3532{ .mfi 3533 setf.exp f9 = r8 3534 fmerge.s FR_X = f8,f8 3535 mov GR_TAG = 105 // overflow 3536};; 3537{ .mfi 3538 mov GR_ad_SignGam = r33 3539 nop.f 0 3540 // set p9 if signgum is 32-bit int 3541 // set p10 if signgum is 64-bit int 3542 cmp.eq p10,p9 = 8,r34 3543} 3544{ .mfi 3545 nop.m 0 3546 fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result 3547 nop.i 0 3548};; 3549// 3550//--------------------------------------------------------------------- 3551.align 32 3552lgamma_libm_err: 3553{ .mmi 3554 alloc r32 = ar.pfs,1,4,4,0 3555 mov GR_Parameter_TAG = GR_TAG 3556 nop.i 0 3557};; 3558.pred.rel "mutex",p9,p10 3559{ .mmi 3560 // store sign of gamma(x) as 32-bit int 3561(p9) st4 [GR_ad_SignGam] = GR_SignOfGamma 3562 // store sign of gamma(x) as 64-bit int 3563(p10) st8 [GR_ad_SignGam] = GR_SignOfGamma 3564 nop.i 0 3565};; 3566GLOBAL_LIBM_END(__libm_lgamma) 3567 3568 3569LOCAL_LIBM_ENTRY(__libm_error_region) 3570.prologue 3571{ .mfi 3572 add GR_Parameter_Y=-32,sp // Parameter 2 value 3573 nop.f 0 3574.save ar.pfs,GR_SAVE_PFS 3575 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 3576} 3577{ .mfi 3578.fframe 64 3579 add sp=-64,sp // Create new stack 3580 nop.f 0 3581 mov GR_SAVE_GP=gp // Save gp 3582};; 3583{ .mmi 3584 stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack 3585 add GR_Parameter_X = 16,sp // Parameter 1 address 3586.save b0, GR_SAVE_B0 3587 mov GR_SAVE_B0=b0 // Save b0 3588};; 3589.body 3590{ .mib 3591 stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 3592 // on stack 3593 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 3594 nop.b 0 3595} 3596{ .mib 3597 stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 3598 // on stack 3599 add GR_Parameter_Y = -16,GR_Parameter_Y 3600 br.call.sptk b0=__libm_error_support# // Call error handling 3601 // function 3602};; 3603{ .mmi 3604 nop.m 0 3605 nop.m 0 3606 add GR_Parameter_RESULT = 48,sp 3607};; 3608{ .mmi 3609 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack 3610.restore sp 3611 add sp = 64,sp // Restore stack pointer 3612 mov b0 = GR_SAVE_B0 // Restore return address 3613};; 3614{ .mib 3615 mov gp = GR_SAVE_GP // Restore gp 3616 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 3617 br.ret.sptk b0 // Return 3618};; 3619 3620LOCAL_LIBM_END(__libm_error_region) 3621.type __libm_error_support#,@function 3622.global __libm_error_support# 3623