1.file "tgamma.s" 2 3 4// Copyright (c) 2001 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT 25// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, 28// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code,and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39//********************************************************************* 40// 41// History: 42// 10/12/01 Initial version 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 02/10/03 Reordered header: .section, .global, .proc, .align 45// 04/04/03 Changed error codes for overflow and negative integers 46// 04/10/03 Changed code for overflow near zero handling 47// 03/31/05 Reformatted delimiters between data tables 48// 49//********************************************************************* 50// 51//********************************************************************* 52// 53// Function: tgamma(x) computes the principle value of the GAMMA 54// function of x. 55// 56//********************************************************************* 57// 58// Resources Used: 59// 60// Floating-Point Registers: f8-f15 61// f33-f87 62// 63// General Purpose Registers: 64// r8-r11 65// r14-r28 66// r32-r36 67// r37-r40 (Used to pass arguments to error handling routine) 68// 69// Predicate Registers: p6-p15 70// 71//********************************************************************* 72// 73// IEEE Special Conditions: 74// 75// tgamma(+inf) = +inf 76// tgamma(-inf) = QNaN 77// tgamma(+/-0) = +/-inf 78// tgamma(x<0, x - integer) = QNaN 79// tgamma(SNaN) = QNaN 80// tgamma(QNaN) = QNaN 81// 82//********************************************************************* 83// 84// Overview 85// 86// The method consists of three cases. 87// 88// If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular; 89// else if 0 < x < 2 use case tgamma_from_0_to_2; 90// else if -(i+1) < x < -i, i = 0...184 use case tgamma_negatives; 91// 92// Case 2 <= x < OVERFLOW_BOUNDARY 93// ------------------------------- 94// Here we use algorithm based on the recursive formula 95// GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval 96// [2; OVERFLOW_BOUNDARY] into intervals [16*n; 16*(n+1)] and 97// approximate GAMMA(x) by polynomial of 22th degree on each 98// [16*n; 16*n+1], recursive formula is used to expand GAMMA(x) 99// to [16*n; 16*n+1]. In other words we need to find n, i and r 100// such that x = 16 * n + i + r where n and i are integer numbers 101// and r is fractional part of x. So GAMMA(x) = GAMMA(16*n+i+r) = 102// = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) = 103// = (x-1)*(x-2)*...*(x-i)*GAMMA(16*n+r) ~ 104// ~ (x-1)*(x-2)*...*(x-i)*P22n(r). 105// 106// Step 1: Reduction 107// ----------------- 108// N = [x] with truncate 109// r = x - N, note 0 <= r < 1 110// 111// n = N & ~0xF - index of table that contains coefficient of 112// polynomial approximation 113// i = N & 0xF - is used in recursive formula 114// 115// 116// Step 2: Approximation 117// --------------------- 118// We use factorized minimax approximation polynomials 119// P22n(r) = A22*(r^2+C01(n)*R+C00(n))* 120// *(r^2+C11(n)*R+C10(n))*...*(r^2+CA1(n)*R+CA0(n)) 121// 122// Step 3: Recursion 123// ----------------- 124// In case when i > 0 we need to multiply P22n(r) by product 125// R(i)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions 126// we can calculate R as follow: 127// R(i) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is 128// even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))* 129// *(i-1) if i is odd. In both cases we need to calculate 130// R2(i) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) = 131// = (x^2-3*x+2)*(x^2-7*x+12)*...*((x^2+x)+2*j*(2*(j-1)+(1-2*x))) = 132// = (RA+2*(2-RB))*(RA+4*(4-RB))*...*(RA+2*j*(2*(j-1)+RB)) 133// where j = 1..[i/2], RA = x^2+x, RB = 1-2*x. 134// 135// Step 4: Reconstruction 136// ---------------------- 137// Reconstruction is just simple multiplication i.e. 138// GAMMA(x) = P22n(r)*R(i) 139// 140// Case 0 < x < 2 141// -------------- 142// To calculate GAMMA(x) on this interval we do following 143// if 1 <= x < 1.25 than GAMMA(x) = P15(x-1) 144// if 1.25 <= x < 1.5 than GAMMA(x) = P15(x-x_min) where 145// x_min is point of local minimum on [1; 2] interval. 146// if 1.5 <= x < 2.0 than GAMMA(x) = P15(x-1.5) 147// and 148// if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x 149// 150// Case -(i+1) < x < -i, i = 0...184 151// ---------------------------------- 152// Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and 153// so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of 154// GAMMA(x) is described above. 155// 156// Step 1: Reduction 157// ----------------- 158// Note that period of sin(PI*x) is 2 and range reduction for 159// sin(PI*x) is like to range reduction for GAMMA(x) 160// i.e r = x - [x] with exception of cases 161// when r > 0.5 (in such cases r = 1 - (x - [x])). 162// 163// Step 2: Approximation 164// --------------------- 165// To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI = 166// = (-1)^n*sin(PI*r)/PI Taylor series is used. 167// sin(PI*r)/PI ~ S21(r). 168// 169// Step 3: Division 170// ---------------- 171// To calculate 1/(x*GAMMA(x)*S21(r)) we use frcpa instruction 172// with following Newton-Raphson interations. 173// 174// 175//********************************************************************* 176 177GR_Sig = r8 178GR_TAG = r8 179GR_ad_Data = r9 180GR_SigRqLin = r10 181GR_iSig = r11 182GR_ExpOf1 = r11 183GR_ExpOf8 = r11 184 185 186GR_Sig2 = r14 187GR_Addr_Mask1 = r15 188GR_Sign_Exp = r16 189GR_Tbl_Offs = r17 190GR_Addr_Mask2 = r18 191GR_ad_Co = r19 192GR_Bit2 = r19 193GR_ad_Ce = r20 194GR_ad_Co7 = r21 195GR_NzOvfBound = r21 196GR_ad_Ce7 = r22 197GR_Tbl_Ind = r23 198GR_Tbl_16xInd = r24 199GR_ExpOf025 = r24 200GR_ExpOf05 = r25 201GR_0x30033 = r26 202GR_10 = r26 203GR_12 = r27 204GR_185 = r27 205GR_14 = r28 206GR_2 = r28 207GR_fpsr = r28 208 209GR_SAVE_B0 = r33 210GR_SAVE_PFS = r34 211GR_SAVE_GP = r35 212GR_SAVE_SP = r36 213 214GR_Parameter_X = r37 215GR_Parameter_Y = r38 216GR_Parameter_RESULT = r39 217GR_Parameter_TAG = r40 218 219 220 221FR_X = f10 222FR_Y = f1 // tgamma is single argument function 223FR_RESULT = f8 224 225FR_AbsX = f9 226FR_NormX = f9 227FR_r02 = f11 228FR_AbsXp1 = f12 229FR_X2pX = f13 230FR_1m2X = f14 231FR_Rq1 = f14 232FR_Xt = f15 233 234FR_r = f33 235FR_OvfBound = f34 236FR_Xmin = f35 237FR_2 = f36 238FR_Rcp1 = f36 239FR_Rcp3 = f36 240FR_4 = f37 241FR_5 = f38 242FR_6 = f39 243FR_8 = f40 244FR_10 = f41 245FR_12 = f42 246FR_14 = f43 247FR_GAMMA = f43 248FR_05 = f44 249 250FR_Rq2 = f45 251FR_Rq3 = f46 252FR_Rq4 = f47 253FR_Rq5 = f48 254FR_Rq6 = f49 255FR_Rq7 = f50 256FR_RqLin = f51 257 258FR_InvAn = f52 259 260FR_C01 = f53 261FR_A15 = f53 262FR_C11 = f54 263FR_A14 = f54 264FR_C21 = f55 265FR_A13 = f55 266FR_C31 = f56 267FR_A12 = f56 268FR_C41 = f57 269FR_A11 = f57 270FR_C51 = f58 271FR_A10 = f58 272FR_C61 = f59 273FR_A9 = f59 274FR_C71 = f60 275FR_A8 = f60 276FR_C81 = f61 277FR_A7 = f61 278FR_C91 = f62 279FR_A6 = f62 280FR_CA1 = f63 281FR_A5 = f63 282FR_C00 = f64 283FR_A4 = f64 284FR_rs2 = f64 285FR_C10 = f65 286FR_A3 = f65 287FR_rs3 = f65 288FR_C20 = f66 289FR_A2 = f66 290FR_rs4 = f66 291FR_C30 = f67 292FR_A1 = f67 293FR_rs7 = f67 294FR_C40 = f68 295FR_A0 = f68 296FR_rs8 = f68 297FR_C50 = f69 298FR_r2 = f69 299FR_C60 = f70 300FR_r3 = f70 301FR_C70 = f71 302FR_r4 = f71 303FR_C80 = f72 304FR_r7 = f72 305FR_C90 = f73 306FR_r8 = f73 307FR_CA0 = f74 308FR_An = f75 309 310FR_S21 = f76 311FR_S19 = f77 312FR_Rcp0 = f77 313FR_Rcp2 = f77 314FR_S17 = f78 315FR_S15 = f79 316FR_S13 = f80 317FR_S11 = f81 318FR_S9 = f82 319FR_S7 = f83 320FR_S5 = f84 321FR_S3 = f85 322 323FR_iXt = f86 324FR_rs = f87 325 326 327// Data tables 328//============================================================== 329RODATA 330.align 16 331 332LOCAL_OBJECT_START(tgamma_data) 333data8 0x406573FAE561F648 // overflow boundary (171.624376956302739927196) 334data8 0x3FDD8B618D5AF8FE // point of local minium (0.461632144968362356785) 335// 336//[2; 3] 337data8 0xEF0E85C9AE40ABE2,0x00004000 // C01 338data8 0xCA2049DDB4096DD8,0x00004000 // C11 339data8 0x99A203B4DC2D1A8C,0x00004000 // C21 340data8 0xBF5D9D9C0C295570,0x00003FFF // C31 341data8 0xE8DD037DEB833BAB,0x00003FFD // C41 342data8 0xB6AE39A2A36AA03A,0x0000BFFE // C51 343data8 0x804960DC2850277B,0x0000C000 // C61 344data8 0xD9F3973841C09F80,0x0000C000 // C71 345data8 0x9C198A676F8A2239,0x0000C001 // C81 346data8 0xC98B7DAE02BE3226,0x0000C001 // C91 347data8 0xE9CAF31AC69301BA,0x0000C001 // CA1 348data8 0xFBBDD58608A0D172,0x00004000 // C00 349data8 0xFDD0316D1E078301,0x00004000 // C10 350data8 0x8630B760468C15E4,0x00004001 // C20 351data8 0x93EDE20E47D9152E,0x00004001 // C30 352data8 0xA86F3A38C77D6B19,0x00004001 // C40 353//[16; 17] 354data8 0xF87F757F365EE813,0x00004000 // C01 355data8 0xECA84FBA92759DA4,0x00004000 // C11 356data8 0xD4E0A55E07A8E913,0x00004000 // C21 357data8 0xB0EB45E94C8A5F7B,0x00004000 // C31 358data8 0x8050D6B4F7C8617D,0x00004000 // C41 359data8 0x8471B111AA691E5A,0x00003FFF // C51 360data8 0xADAF462AF96585C9,0x0000BFFC // C61 361data8 0xD327C7A587A8C32B,0x0000BFFF // C71 362data8 0xDEF5192B4CF5E0F1,0x0000C000 // C81 363data8 0xBADD64BB205AEF02,0x0000C001 // C91 364data8 0x9330A24AA67D6860,0x0000C002 // CA1 365data8 0xF57EEAF36D8C47BE,0x00004000 // C00 366data8 0x807092E12A251B38,0x00004001 // C10 367data8 0x8C458F80DEE7ED1C,0x00004001 // C20 368data8 0x9F30C731DC77F1A6,0x00004001 // C30 369data8 0xBAC4E7E099C3A373,0x00004001 // C40 370//[32; 33] 371data8 0xC3059A415F142DEF,0x00004000 // C01 372data8 0xB9C1DAC24664587A,0x00004000 // C11 373data8 0xA7101D910992FFB2,0x00004000 // C21 374data8 0x8A9522B8E4AA0AB4,0x00004000 // C31 375data8 0xC76A271E4BA95DCC,0x00003FFF // C41 376data8 0xC5D6DE2A38DB7FF2,0x00003FFE // C51 377data8 0xDBA42086997818B2,0x0000BFFC // C61 378data8 0xB8EDDB1424C1C996,0x0000BFFF // C71 379data8 0xBF7372FB45524B5D,0x0000C000 // C81 380data8 0xA03DDE759131580A,0x0000C001 // C91 381data8 0xFDA6FC4022C1FFE3,0x0000C001 // CA1 382data8 0x9759ABF797B2533D,0x00004000 // C00 383data8 0x9FA160C6CF18CEC5,0x00004000 // C10 384data8 0xB0EFF1E3530E0FCD,0x00004000 // C20 385data8 0xCCD60D5C470165D1,0x00004000 // C30 386data8 0xF5E53F6307B0B1C1,0x00004000 // C40 387//[48; 49] 388data8 0xAABE577FBCE37F5E,0x00004000 // C01 389data8 0xA274CAEEB5DF7172,0x00004000 // C11 390data8 0x91B90B6646C1B924,0x00004000 // C21 391data8 0xF06718519CA256D9,0x00003FFF // C31 392data8 0xAA9EE181C0E30263,0x00003FFF // C41 393data8 0xA07BDB5325CB28D2,0x00003FFE // C51 394data8 0x86C8B873204F9219,0x0000BFFD // C61 395data8 0xB0192C5D3E4787D6,0x0000BFFF // C71 396data8 0xB1E0A6263D4C19EF,0x0000C000 // C81 397data8 0x93BA32A118EAC9AE,0x0000C001 // C91 398data8 0xE942A39CD9BEE887,0x0000C001 // CA1 399data8 0xE838B0957B0D3D0D,0x00003FFF // C00 400data8 0xF60E0F00074FCF34,0x00003FFF // C10 401data8 0x89869936AE00C2A5,0x00004000 // C20 402data8 0xA0FE4E8AA611207F,0x00004000 // C30 403data8 0xC3B1229CFF1DDAFE,0x00004000 // C40 404//[64; 65] 405data8 0x9C00DDF75CDC6183,0x00004000 // C01 406data8 0x9446AE9C0F6A833E,0x00004000 // C11 407data8 0x84ABC5083310B774,0x00004000 // C21 408data8 0xD9BA3A0977B1ED83,0x00003FFF // C31 409data8 0x989B18C99411D300,0x00003FFF // C41 410data8 0x886E66402318CE6F,0x00003FFE // C51 411data8 0x99028C2468F18F38,0x0000BFFD // C61 412data8 0xAB72D17DCD40CCE1,0x0000BFFF // C71 413data8 0xA9D9AC9BE42C2EF9,0x0000C000 // C81 414data8 0x8C11D983AA177AD2,0x0000C001 // C91 415data8 0xDC779E981C1F0F06,0x0000C001 // CA1 416data8 0xC1FD4AC85965E8D6,0x00003FFF // C00 417data8 0xCE3D2D909D389EC2,0x00003FFF // C10 418data8 0xE7F79980AD06F5D8,0x00003FFF // C20 419data8 0x88DD9F73C8680B5D,0x00004000 // C30 420data8 0xA7D6CB2CB2D46F9D,0x00004000 // C40 421//[80; 81] 422data8 0x91C7FF4E993430D0,0x00004000 // C01 423data8 0x8A6E7AB83E45A7E9,0x00004000 // C11 424data8 0xF72D6382E427BEA9,0x00003FFF // C21 425data8 0xC9E2E4F9B3B23ED6,0x00003FFF // C31 426data8 0x8BEFEF56AE05D775,0x00003FFF // C41 427data8 0xEE9666AB6A185560,0x00003FFD // C51 428data8 0xA6AFAF5CEFAEE04D,0x0000BFFD // C61 429data8 0xA877EAFEF1F9C880,0x0000BFFF // C71 430data8 0xA45BD433048ECA15,0x0000C000 // C81 431data8 0x86BD1636B774CC2E,0x0000C001 // C91 432data8 0xD3721BE006E10823,0x0000C001 // CA1 433data8 0xA97EFABA91854208,0x00003FFF // C00 434data8 0xB4AF0AEBB3F97737,0x00003FFF // C10 435data8 0xCC38241936851B0B,0x00003FFF // C20 436data8 0xF282A6261006EA84,0x00003FFF // C30 437data8 0x95B8E9DB1BD45BAF,0x00004000 // C40 438//[96; 97] 439data8 0x8A1FA3171B35A106,0x00004000 // C01 440data8 0x830D5B8843890F21,0x00004000 // C11 441data8 0xE98B0F1616677A23,0x00003FFF // C21 442data8 0xBDF8347F5F67D4EC,0x00003FFF // C31 443data8 0x825F15DE34EC055D,0x00003FFF // C41 444data8 0xD4846186B8AAC7BE,0x00003FFD // C51 445data8 0xB161093AB14919B1,0x0000BFFD // C61 446data8 0xA65758EEA4800EF4,0x0000BFFF // C71 447data8 0xA046B67536FA329C,0x0000C000 // C81 448data8 0x82BBEC1BCB9E9068,0x0000C001 // C91 449data8 0xCC9DE2B23BA91B0B,0x0000C001 // CA1 450data8 0x983B16148AF77F94,0x00003FFF // C00 451data8 0xA2A4D8EE90FEE5DD,0x00003FFF // C10 452data8 0xB89446FA37FF481C,0x00003FFF // C20 453data8 0xDC5572648485FB01,0x00003FFF // C30 454data8 0x88CD5D7DB976129A,0x00004000 // C40 455//[112; 113] 456data8 0x8417098FD62AC5E3,0x00004000 // C01 457data8 0xFA7896486B779CBB,0x00003FFF // C11 458data8 0xDEC98B14AF5EEBD1,0x00003FFF // C21 459data8 0xB48E153C6BF0B5A3,0x00003FFF // C31 460data8 0xF597B038BC957582,0x00003FFE // C41 461data8 0xBFC6F0884A415694,0x00003FFD // C51 462data8 0xBA075A1392BDB5E5,0x0000BFFD // C61 463data8 0xA4B79E01B44C7DB4,0x0000BFFF // C71 464data8 0x9D12FA7711BFAB0F,0x0000C000 // C81 465data8 0xFF24C47C8E108AB4,0x0000C000 // C91 466data8 0xC7325EC86562606A,0x0000C001 // CA1 467data8 0x8B47DCD9E1610938,0x00003FFF // C00 468data8 0x9518B111B70F88B8,0x00003FFF // C10 469data8 0xA9CC197206F68682,0x00003FFF // C20 470data8 0xCB98294CC0D7A6A6,0x00003FFF // C30 471data8 0xFE09493EA9165181,0x00003FFF // C40 472//[128; 129] 473data8 0xFE53D03442270D90,0x00003FFF // C01 474data8 0xF0F857BAEC1993E4,0x00003FFF // C11 475data8 0xD5FF6D70DBBC2FD3,0x00003FFF // C21 476data8 0xACDAA5F4988B1074,0x00003FFF // C31 477data8 0xE92E069F8AD75B54,0x00003FFE // C41 478data8 0xAEBB64645BD94234,0x00003FFD // C51 479data8 0xC13746249F39B43C,0x0000BFFD // C61 480data8 0xA36B74F5B6297A1F,0x0000BFFF // C71 481data8 0x9A77860DF180F6E5,0x0000C000 // C81 482data8 0xF9F8457D84410A0C,0x0000C000 // C91 483data8 0xC2BF44C649EB8597,0x0000C001 // CA1 484data8 0x81225E7489BCDC0E,0x00003FFF // C00 485data8 0x8A788A09CE0EED11,0x00003FFF // C10 486data8 0x9E2E6F86D1B1D89C,0x00003FFF // C20 487data8 0xBE6866B21CF6CCB5,0x00003FFF // C30 488data8 0xEE94426EC1486AAE,0x00003FFF // C40 489//[144; 145] 490data8 0xF6113E09732A6497,0x00003FFF // C01 491data8 0xE900D45931B04FC8,0x00003FFF // C11 492data8 0xCE9FD58F745EBA5D,0x00003FFF // C21 493data8 0xA663A9636C864C86,0x00003FFF // C31 494data8 0xDEBF5315896CE629,0x00003FFE // C41 495data8 0xA05FEA415EBD7737,0x00003FFD // C51 496data8 0xC750F112BD9C4031,0x0000BFFD // C61 497data8 0xA2593A35C51C6F6C,0x0000BFFF // C71 498data8 0x9848E1DA7FB40C8C,0x0000C000 // C81 499data8 0xF59FEE87A5759A4B,0x0000C000 // C91 500data8 0xBF00203909E45A1D,0x0000C001 // CA1 501data8 0xF1D8E157200127E5,0x00003FFE // C00 502data8 0x81DD5397CB08D487,0x00003FFF // C10 503data8 0x94C1DC271A8B766F,0x00003FFF // C20 504data8 0xB3AFAF9B5D6EDDCF,0x00003FFF // C30 505data8 0xE1FB4C57CA81BE1E,0x00003FFF // C40 506//[160; 161] 507data8 0xEEFFE5122AC72FFD,0x00003FFF // C01 508data8 0xE22F70BB52AD54B3,0x00003FFF // C11 509data8 0xC84FF021FE993EEA,0x00003FFF // C21 510data8 0xA0DA2208EB5B2752,0x00003FFF // C31 511data8 0xD5CDD2FCF8AD2DF5,0x00003FFE // C41 512data8 0x940BEC6DCD811A59,0x00003FFD // C51 513data8 0xCC954EF4FD4EBB81,0x0000BFFD // C61 514data8 0xA1712E29A8C04554,0x0000BFFF // C71 515data8 0x966B55DFB243521A,0x0000C000 // C81 516data8 0xF1E6A2B9CEDD0C4C,0x0000C000 // C91 517data8 0xBBC87BCC031012DB,0x0000C001 // CA1 518data8 0xE43974E6D2818583,0x00003FFE // C00 519data8 0xF5702A516B64C5B7,0x00003FFE // C10 520data8 0x8CEBCB1B32E19471,0x00003FFF // C20 521data8 0xAAC10F05BB77E0AF,0x00003FFF // C30 522data8 0xD776EFCAB205CC58,0x00003FFF // C40 523//[176; 177] 524data8 0xE8DA614119811E5D,0x00003FFF // C01 525data8 0xDC415E0288B223D8,0x00003FFF // C11 526data8 0xC2D2243E44EC970E,0x00003FFF // C21 527data8 0x9C086664B5307BEA,0x00003FFF // C31 528data8 0xCE03D7A08B461156,0x00003FFE // C41 529data8 0x894BE3BAAAB66ADC,0x00003FFD // C51 530data8 0xD131EDD71A702D4D,0x0000BFFD // C61 531data8 0xA0A907CDDBE10898,0x0000BFFF // C71 532data8 0x94CC3CD9C765C808,0x0000C000 // C81 533data8 0xEEA85F237815FC0D,0x0000C000 // C91 534data8 0xB8FA04B023E43F91,0x0000C001 // CA1 535data8 0xD8B2C7D9FCBD7EF9,0x00003FFE // C00 536data8 0xE9566E93AAE7E38F,0x00003FFE // C10 537data8 0x8646E78AABEF0255,0x00003FFF // C20 538data8 0xA32AEDB62E304345,0x00003FFF // C30 539data8 0xCE83E40280EE7DF0,0x00003FFF // C40 540// 541//[2; 3] 542data8 0xC44FB47E90584083,0x00004001 // C50 543data8 0xE863EE77E1C45981,0x00004001 // C60 544data8 0x8AC15BE238B9D70E,0x00004002 // C70 545data8 0xA5D94B6592350EF4,0x00004002 // C80 546data8 0xC379DB3E20A148B3,0x00004002 // C90 547data8 0xDACA49B73974F6C9,0x00004002 // CA0 548data8 0x810E496A1AFEC895,0x00003FE1 // An 549//[16; 17] 550data8 0xE17C0357AAF3F817,0x00004001 // C50 551data8 0x8BA8804750FBFBFE,0x00004002 // C60 552data8 0xB18EAB3CB64BEBEE,0x00004002 // C70 553data8 0xE90AB7015AF1C28F,0x00004002 // C80 554data8 0xA0AB97CE9E259196,0x00004003 // C90 555data8 0xF5E0E0A000C2D720,0x00004003 // CA0 556data8 0xD97F0F87EC791954,0x00004005 // An 557//[32; 33] 558data8 0x980C293F3696040D,0x00004001 // C50 559data8 0xC0DBFFBB948A9A4E,0x00004001 // C60 560data8 0xFAB54625E9A588A2,0x00004001 // C70 561data8 0xA7E08176D6050FBF,0x00004002 // C80 562data8 0xEBAAEC4952270A9F,0x00004002 // C90 563data8 0xB7479CDAD20550FE,0x00004003 // CA0 564data8 0xAACD45931C3FF634,0x00004054 // An 565//[48; 49] 566data8 0xF5180F0000419AD5,0x00004000 // C50 567data8 0x9D507D07BFBB2273,0x00004001 // C60 568data8 0xCEB53F7A13A383E3,0x00004001 // C70 569data8 0x8BAFEF9E0A49128F,0x00004002 // C80 570data8 0xC58EF912D39E228C,0x00004002 // C90 571data8 0x9A88118422BA208E,0x00004003 // CA0 572data8 0xBD6C0E2477EC12CB,0x000040AC // An 573//[64; 65] 574data8 0xD410AC48BF7748DA,0x00004000 // C50 575data8 0x89399B90AFEBD931,0x00004001 // C60 576data8 0xB596DF8F77EB8560,0x00004001 // C70 577data8 0xF6D9445A047FB4A6,0x00004001 // C80 578data8 0xAF52F0DD65221357,0x00004002 // C90 579data8 0x8989B45BFC881989,0x00004003 // CA0 580data8 0xB7FCAE86E6E10D5A,0x0000410B // An 581//[80; 81] 582data8 0xBE759740E3B5AA84,0x00004000 // C50 583data8 0xF8037B1B07D27609,0x00004000 // C60 584data8 0xA4F6F6C7F0977D4F,0x00004001 // C70 585data8 0xE131960233BF02C4,0x00004001 // C80 586data8 0xA06DF43D3922BBE2,0x00004002 // C90 587data8 0xFC266AB27255A360,0x00004002 // CA0 588data8 0xD9F4B012EDAFEF2F,0x0000416F // An 589//[96; 97] 590data8 0xAEFC84CDA8E1EAA6,0x00004000 // C50 591data8 0xE5009110DB5F3C8A,0x00004000 // C60 592data8 0x98F5F48738E7B232,0x00004001 // C70 593data8 0xD17EE64E21FFDC6B,0x00004001 // C80 594data8 0x9596F7A7E36145CC,0x00004002 // C90 595data8 0xEB64DBE50E125CAF,0x00004002 // CA0 596data8 0xA090530D79E32D2E,0x000041D8 // An 597//[112; 113] 598data8 0xA33AEA22A16B2655,0x00004000 // C50 599data8 0xD682B93BD7D7945C,0x00004000 // C60 600data8 0x8FC854C6E6E30CC3,0x00004001 // C70 601data8 0xC5754D828AFFDC7A,0x00004001 // C80 602data8 0x8D41216B397139C2,0x00004002 // C90 603data8 0xDE78D746848116E5,0x00004002 // CA0 604data8 0xB8A297A2DC0630DB,0x00004244 // An 605//[128; 129] 606data8 0x99EB00F11D95E292,0x00004000 // C50 607data8 0xCB005CB911EB779A,0x00004000 // C60 608data8 0x8879AA2FDFF3A37A,0x00004001 // C70 609data8 0xBBDA538AD40CAC2C,0x00004001 // C80 610data8 0x8696D849D311B9DE,0x00004002 // C90 611data8 0xD41E1C041481199F,0x00004002 // CA0 612data8 0xEBA1A43D34EE61EE,0x000042B3 // An 613//[144; 145] 614data8 0x924F822578AA9F3D,0x00004000 // C50 615data8 0xC193FAF9D3B36960,0x00004000 // C60 616data8 0x827AE3A6B68ED0CA,0x00004001 // C70 617data8 0xB3F52A27EED23F0B,0x00004001 // C80 618data8 0x811A079FB3C94D79,0x00004002 // C90 619data8 0xCB94415470B6F8D2,0x00004002 // CA0 620data8 0x80A0260DCB3EC9AC,0x00004326 // An 621//[160; 161] 622data8 0x8BF24091E88B331D,0x00004000 // C50 623data8 0xB9ADE01187E65201,0x00004000 // C60 624data8 0xFAE4508F6E7625FE,0x00004000 // C70 625data8 0xAD516668AD6D7367,0x00004001 // C80 626data8 0xF8F5FF171154F637,0x00004001 // C90 627data8 0xC461321268990C82,0x00004002 // CA0 628data8 0xC3B693F344B0E6FE,0x0000439A // An 629// 630//[176; 177] 631data8 0x868545EB42A258ED,0x00004000 // C50 632data8 0xB2EF04ACE8BA0E6E,0x00004000 // C60 633data8 0xF247D22C22E69230,0x00004000 // C70 634data8 0xA7A1AB93E3981A90,0x00004001 // C80 635data8 0xF10951733E2C697F,0x00004001 // C90 636data8 0xBE3359BFAD128322,0x00004002 // CA0 637data8 0x8000000000000000,0x00003fff 638// 639//[160; 161] for negatives 640data8 0xA76DBD55B2E32D71,0x00003C63 // 1/An 641// 642// sin(pi*x)/pi 643data8 0xBCBC4342112F52A2,0x00003FDE // S21 644data8 0xFAFCECB86536F655,0x0000BFE3 // S19 645data8 0x87E4C97F9CF09B92,0x00003FE9 // S17 646data8 0xEA124C68E704C5CB,0x0000BFED // S15 647data8 0x9BA38CFD59C8AA1D,0x00003FF2 // S13 648data8 0x99C0B552303D5B21,0x0000BFF6 // S11 649// 650//[176; 177] for negatives 651data8 0xBA5D5869211696FF,0x00003BEC // 1/An 652// 653// sin(pi*x)/pi 654data8 0xD63402E79A853175,0x00003FF9 // S9 655data8 0xC354723906DB36BA,0x0000BFFC // S7 656data8 0xCFCE5A015E236291,0x00003FFE // S5 657data8 0xD28D3312983E9918,0x0000BFFF // S3 658// 659// 660// [1.0;1.25] 661data8 0xA405530B067ECD3C,0x0000BFFC // A15 662data8 0xF5B5413F95E1C282,0x00003FFD // A14 663data8 0xC4DED71C782F76C8,0x0000BFFE // A13 664data8 0xECF7DDDFD27C9223,0x00003FFE // A12 665data8 0xFB73D31793068463,0x0000BFFE // A11 666data8 0xFF173B7E66FD1D61,0x00003FFE // A10 667data8 0xFFA5EF3959089E94,0x0000BFFE // A9 668data8 0xFF8153BD42E71A4F,0x00003FFE // A8 669data8 0xFEF9CAEE2CB5B533,0x0000BFFE // A7 670data8 0xFE3F02E5EDB6811E,0x00003FFE // A6 671data8 0xFB64074CED2658FB,0x0000BFFE // A5 672data8 0xFB52882A095B18A4,0x00003FFE // A4 673data8 0xE8508C7990A0DAC0,0x0000BFFE // A3 674data8 0xFD32C611D8A881D0,0x00003FFE // A2 675data8 0x93C467E37DB0C536,0x0000BFFE // A1 676data8 0x8000000000000000,0x00003FFF // A0 677// 678// [1.25;1.5] 679data8 0xD038092400619677,0x0000BFF7 // A15 680data8 0xEA6DE925E6EB8C8F,0x00003FF3 // A14 681data8 0xC53F83645D4597FC,0x0000BFF7 // A13 682data8 0xE366DB2FB27B7ECD,0x00003FF7 // A12 683data8 0xAC8FD5E11F6EEAD8,0x0000BFF8 // A11 684data8 0xFB14010FB3697785,0x00003FF8 // A10 685data8 0xB6F91CB5C371177B,0x0000BFF9 // A9 686data8 0x85A262C6F8FEEF71,0x00003FFA // A8 687data8 0xC038E6E3261568F9,0x0000BFFA // A7 688data8 0x8F4BDE8883232364,0x00003FFB // A6 689data8 0xBCFBBD5786537E9A,0x0000BFFB // A5 690data8 0xA4C08BAF0A559479,0x00003FFC // A4 691data8 0x85D74FA063E81476,0x0000BFFC // A3 692data8 0xDB629FB9BBDC1C4E,0x00003FFD // A2 693data8 0xF4F8FBC7C0C9D317,0x00003FC6 // A1 694data8 0xE2B6E4153A57746C,0x00003FFE // A0 695// 696// [1.25;1.5] 697data8 0x9533F9D3723B448C,0x0000BFF2 // A15 698data8 0xF1F75D3C561CBBAF,0x00003FF5 // A14 699data8 0xBA55A9A1FC883523,0x0000BFF8 // A13 700data8 0xB5D5E9E5104FA995,0x00003FFA // A12 701data8 0xFD84F35B70CD9AE2,0x0000BFFB // A11 702data8 0x87445235F4688CC5,0x00003FFD // A10 703data8 0xE7F236EBFB9F774E,0x0000BFFD // A9 704data8 0xA6605F2721F787CE,0x00003FFE // A8 705data8 0xCF579312AD7EAD72,0x0000BFFE // A7 706data8 0xE96254A2407A5EAC,0x00003FFE // A6 707data8 0xF41312A8572ED346,0x0000BFFE // A5 708data8 0xF9535027C1B1F795,0x00003FFE // A4 709data8 0xE7E82D0C613A8DE4,0x0000BFFE // A3 710data8 0xFD23CD9741B460B8,0x00003FFE // A2 711data8 0x93C30FD9781DBA88,0x0000BFFE // A1 712data8 0xFFFFF1781FDBEE84,0x00003FFE // A0 713LOCAL_OBJECT_END(tgamma_data) 714 715 716//============================================================== 717// Code 718//============================================================== 719 720.section .text 721GLOBAL_LIBM_ENTRY(tgamma) 722{ .mfi 723 getf.exp GR_Sign_Exp = f8 724 fma.s1 FR_1m2X = f8,f1,f8 // 2x 725 addl GR_ad_Data = @ltoff(tgamma_data), gp 726} 727{ .mfi 728 mov GR_ExpOf8 = 0x10002 // 8 729 fcvt.fx.trunc.s1 FR_iXt = f8 // [x] 730 mov GR_ExpOf05 = 0xFFFE // 0.5 731};; 732{ .mfi 733 getf.sig GR_Sig = f8 734 fma.s1 FR_2 = f1,f1,f1 // 2 735 mov GR_Addr_Mask1 = 0x780 736} 737{ .mlx 738 setf.exp FR_8 = GR_ExpOf8 739 movl GR_10 = 0x4024000000000000 740};; 741{ .mfi 742 ld8 GR_ad_Data = [GR_ad_Data] 743 fcmp.lt.s1 p14,p15 = f8,f0 744 tbit.z p12,p13 = GR_Sign_Exp,0x10 // p13 if x >= 2 745} 746{ .mlx 747 and GR_Bit2 = 4,GR_Sign_Exp 748 movl GR_12 = 0x4028000000000000 749};; 750{ .mfi 751 setf.d FR_10 = GR_10 752 fma.s1 FR_r02 = f8,f1,f0 753 extr.u GR_Tbl_Offs = GR_Sig,58,6 754} 755{ .mfi 756(p12) mov GR_Addr_Mask1 = r0 757 fma.s1 FR_NormX = f8,f1,f0 758 cmp.ne p8,p0 = GR_Bit2,r0 759};; 760{ .mfi 761(p8) shladd GR_Tbl_Offs = GR_Tbl_Offs,4,r0 762 fclass.m p10,p0 = f8,0x1E7 // Test x for NaTVal, NaN, +/-0, +/-INF 763 tbit.nz p11,p0 = GR_Sign_Exp,1 764} 765{ .mlx 766 add GR_Addr_Mask2 = GR_Addr_Mask1,GR_Addr_Mask1 767 movl GR_14 = 0x402C000000000000 768};; 769.pred.rel "mutex",p14,p15 770{ .mfi 771 setf.d FR_12 = GR_12 772(p14) fma.s1 FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x| 773 tbit.nz p8,p9 = GR_Sign_Exp,0 774} 775{ .mfi 776 ldfpd FR_OvfBound,FR_Xmin = [GR_ad_Data],16 777(p15) fms.s1 FR_1m2X = f1,f1,FR_1m2X // RB=1-2|x| 778(p11) shladd GR_Tbl_Offs = GR_Tbl_Offs,2,r0 779};; 780.pred.rel "mutex",p9,p8 781{ .mfi 782 setf.d FR_14 = GR_14 783 fma.s1 FR_4 = FR_2,FR_2,f0 784(p8) and GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask1 785} 786{ .mfi 787 setf.exp FR_05 = GR_ExpOf05 788 fma.s1 FR_6 = FR_2,FR_2,FR_2 789(p9) and GR_Tbl_Offs = GR_Tbl_Offs, GR_Addr_Mask2 790};; 791.pred.rel "mutex",p9,p8 792{ .mfi 793(p8) shladd GR_ad_Co = GR_Tbl_Offs,1,GR_ad_Data 794 fcvt.xf FR_Xt = FR_iXt // [x] 795(p15) tbit.z.unc p11,p0 = GR_Sign_Exp,0x10 // p11 if 0 < x < 2 796} 797{ .mfi 798(p9) add GR_ad_Co = GR_ad_Data,GR_Tbl_Offs 799 fma.s1 FR_5 = FR_2,FR_2,f1 800(p15) cmp.lt.unc p7,p6 = GR_ExpOf05,GR_Sign_Exp // p7 if 0 < x < 1 801};; 802{ .mfi 803 add GR_ad_Ce = 16,GR_ad_Co 804(p11) frcpa.s1 FR_Rcp0,p0 = f1,f8 805 sub GR_Tbl_Offs = GR_ad_Co,GR_ad_Data 806} 807{ .mfb 808 ldfe FR_C01 = [GR_ad_Co],32 809(p7) fms.s1 FR_r02 = FR_r02,f1,f1 810 // jump if x is NaTVal, NaN, +/-0, +/-INF 811(p10) br.cond.spnt tgamma_spec 812};; 813.pred.rel "mutex",p14,p15 814{ .mfi 815 ldfe FR_C11 = [GR_ad_Ce],32 816(p14) fms.s1 FR_X2pX = f8,f8,f8 // RA=x^2+|x| 817 shr GR_Tbl_Ind = GR_Tbl_Offs,8 818} 819{ .mfb 820 ldfe FR_C21 = [GR_ad_Co],32 821(p15) fma.s1 FR_X2pX = f8,f8,f8 // RA=x^2+x 822 // jump if 0 < x < 2 823(p11) br.cond.spnt tgamma_from_0_to_2 824};; 825{ .mfi 826 ldfe FR_C31 = [GR_ad_Ce],32 827 fma.s1 FR_Rq2 = FR_2,f1,FR_1m2X // 2 + B 828 cmp.ltu p7,p0=0xB,GR_Tbl_Ind 829} 830{ .mfb 831 ldfe FR_C41 = [GR_ad_Co],32 832 fma.s1 FR_Rq3 = FR_2,FR_2,FR_1m2X // 4 + B 833 // jump if GR_Tbl_Ind > 11, i.e |x| is more than 192 834(p7) br.cond.spnt tgamma_spec_res 835};; 836{ .mfi 837 ldfe FR_C51 = [GR_ad_Ce],32 838 fma.s1 FR_Rq4 = FR_6,f1,FR_1m2X // 6 + B 839 shr GR_Tbl_Offs = GR_Tbl_Offs,1 840} 841{ .mfi 842 ldfe FR_C61 = [GR_ad_Co],32 843 fma.s1 FR_Rq5 = FR_4,FR_2,FR_1m2X // 8 + B 844 nop.i 0 845};; 846{ .mfi 847 ldfe FR_C71 = [GR_ad_Ce],32 848(p14) fms.s1 FR_r = FR_Xt,f1,f8 // r = |x| - [|x|] 849 shr GR_Tbl_16xInd = GR_Tbl_Offs,3 850} 851{ .mfi 852 ldfe FR_C81 = [GR_ad_Co],32 853(p15) fms.s1 FR_r = f8,f1,FR_Xt // r = x - [x] 854 add GR_ad_Data = 0xC00,GR_ad_Data 855};; 856{ .mfi 857 ldfe FR_C91 = [GR_ad_Ce],32 858 fma.s1 FR_Rq6 = FR_5,FR_2,FR_1m2X // 10 + B 859(p14) mov GR_0x30033 = 0x30033 860} 861{ .mfi 862 ldfe FR_CA1 = [GR_ad_Co],32 863 fma.s1 FR_Rq7 = FR_6,FR_2,FR_1m2X // 12 + B 864 sub GR_Tbl_Offs = GR_Tbl_Offs,GR_Tbl_16xInd 865};; 866{ .mfi 867 ldfe FR_C00 = [GR_ad_Ce],32 868 fma.s1 FR_Rq1 = FR_Rq1,FR_2,FR_X2pX // (x-1)*(x-2) 869(p13) cmp.eq.unc p8,p0 = r0,GR_Tbl_16xInd // index is 0 i.e. arg from [2;16) 870} 871{ .mfi 872 ldfe FR_C10 = [GR_ad_Co],32 873(p14) fms.s1 FR_AbsX = f0,f0,FR_NormX // absolute value of argument 874 add GR_ad_Co7 = GR_ad_Data,GR_Tbl_Offs 875};; 876{ .mfi 877 ldfe FR_C20 = [GR_ad_Ce],32 878 fma.s1 FR_Rq2 = FR_Rq2,FR_4,FR_X2pX // (x-3)*(x-4) 879 add GR_ad_Ce7 = 16,GR_ad_Co7 880} 881{ .mfi 882 ldfe FR_C30 = [GR_ad_Co],32 883 fma.s1 FR_Rq3 = FR_Rq3,FR_6,FR_X2pX // (x-5)*(x-6) 884 nop.i 0 885};; 886{ .mfi 887 ldfe FR_C40 = [GR_ad_Ce],32 888 fma.s1 FR_Rq4 = FR_Rq4,FR_8,FR_X2pX // (x-7)*(x-8) 889(p14) cmp.leu.unc p7,p0 = GR_0x30033,GR_Sign_Exp 890} 891{ .mfb 892 ldfe FR_C50 = [GR_ad_Co7],32 893 fma.s1 FR_Rq5 = FR_Rq5,FR_10,FR_X2pX // (x-9)*(x-10) 894 // jump if x is less or equal to -2^52, i.e. x is big negative integer 895(p7) br.cond.spnt tgamma_singularity 896};; 897{ .mfi 898 ldfe FR_C60 = [GR_ad_Ce7],32 899 fma.s1 FR_C01 = FR_C01,f1,FR_r 900 add GR_ad_Ce = 0x560,GR_ad_Data 901} 902{ .mfi 903 ldfe FR_C70 = [GR_ad_Co7],32 904 fma.s1 FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x) 905 add GR_ad_Co = 0x550,GR_ad_Data 906};; 907{ .mfi 908 ldfe FR_C80 = [GR_ad_Ce7],32 909 fma.s1 FR_C11 = FR_C11,f1,FR_r 910 nop.i 0 911} 912{ .mfi 913 ldfe FR_C90 = [GR_ad_Co7],32 914 fma.s1 FR_C21 = FR_C21,f1,FR_r 915 nop.i 0 916};; 917.pred.rel "mutex",p12,p13 918{ .mfi 919(p13) getf.sig GR_iSig = FR_iXt 920 fcmp.lt.s1 p11,p0 = FR_05,FR_r 921 mov GR_185 = 185 922} 923{ .mfi 924 nop.m 0 925 fma.s1 FR_Rq6 = FR_Rq6,FR_12,FR_X2pX // (x-11)*(x-12) 926 nop.i 0 927};; 928{ .mfi 929 ldfe FR_CA0 = [GR_ad_Ce7],32 930 fma.s1 FR_C31 = FR_C31,f1,FR_r 931(p12) mov GR_iSig = 0 932} 933{ .mfi 934 ldfe FR_An = [GR_ad_Co7],0x80 935 fma.s1 FR_C41 = FR_C41,f1,FR_r 936 nop.i 0 937};; 938{ .mfi 939(p14) getf.sig GR_Sig = FR_r 940 fma.s1 FR_C51 = FR_C51,f1,FR_r 941(p14) sub GR_iSig = r0,GR_iSig 942} 943{ .mfi 944 ldfe FR_S21 = [GR_ad_Co],32 945 fma.s1 FR_C61 = FR_C61,f1,FR_r 946 nop.i 0 947};; 948{ .mfi 949 ldfe FR_S19 = [GR_ad_Ce],32 950 fma.s1 FR_C71 = FR_C71,f1,FR_r 951 and GR_SigRqLin = 0xF,GR_iSig 952} 953{ .mfi 954 ldfe FR_S17 = [GR_ad_Co],32 955 fma.s1 FR_C81 = FR_C81,f1,FR_r 956 mov GR_2 = 2 957};; 958{ .mfi 959(p14) ldfe FR_InvAn = [GR_ad_Co7] 960 fma.s1 FR_C91 = FR_C91,f1,FR_r 961 // if significand of r is 0 tnan argument is negative integer 962(p14) cmp.eq.unc p12,p0 = r0,GR_Sig 963} 964{ .mfb 965(p8) sub GR_SigRqLin = GR_SigRqLin,GR_2 // subtract 2 if 2 <= x < 16 966 fma.s1 FR_CA1 = FR_CA1,f1,FR_r 967 // jump if x is negative integer such that -2^52 < x < -185 968(p12) br.cond.spnt tgamma_singularity 969};; 970{ .mfi 971 setf.sig FR_Xt = GR_SigRqLin 972(p11) fms.s1 FR_rs = f1,f1,FR_r 973(p14) cmp.ltu.unc p7,p0 = GR_185,GR_iSig 974} 975{ .mfb 976 ldfe FR_S15 = [GR_ad_Ce],32 977 fma.s1 FR_Rq7 = FR_Rq7,FR_14,FR_X2pX // (x-13)*(x-14) 978 // jump if x is noninteger such that -2^52 < x < -185 979(p7) br.cond.spnt tgamma_underflow 980};; 981{ .mfi 982 ldfe FR_S13 = [GR_ad_Co],48 983 fma.s1 FR_C01 = FR_C01,FR_r,FR_C00 984 and GR_Sig2 = 0xE,GR_SigRqLin 985} 986{ .mfi 987 ldfe FR_S11 = [GR_ad_Ce],48 988 fma.s1 FR_C11 = FR_C11,FR_r,FR_C10 989 nop.i 0 990};; 991{ .mfi 992 ldfe FR_S9 = [GR_ad_Co],32 993 fma.s1 FR_C21 = FR_C21,FR_r,FR_C20 994 // should we mul by polynomial of recursion? 995 cmp.eq p13,p12 = r0,GR_SigRqLin 996} 997{ .mfi 998 ldfe FR_S7 = [GR_ad_Ce],32 999 fma.s1 FR_C31 = FR_C31,FR_r,FR_C30 1000 nop.i 0 1001};; 1002{ .mfi 1003 ldfe FR_S5 = [GR_ad_Co],32 1004 fma.s1 FR_C41 = FR_C41,FR_r,FR_C40 1005 nop.i 0 1006} 1007{ .mfi 1008 ldfe FR_S3 = [GR_ad_Ce],32 1009 fma.s1 FR_C51 = FR_C51,FR_r,FR_C50 1010 nop.i 0 1011};; 1012{ .mfi 1013 nop.m 0 1014 fma.s1 FR_C61 = FR_C61,FR_r,FR_C60 1015 nop.i 0 1016} 1017{ .mfi 1018 nop.m 0 1019 fma.s1 FR_C71 = FR_C71,FR_r,FR_C70 1020 nop.i 0 1021};; 1022{ .mfi 1023 nop.m 0 1024 fma.s1 FR_C81 = FR_C81,FR_r,FR_C80 1025 nop.i 0 1026} 1027{ .mfi 1028 nop.m 0 1029 fma.s1 FR_C91 = FR_C91,FR_r,FR_C90 1030 nop.i 0 1031};; 1032{ .mfi 1033 nop.m 0 1034 fma.s1 FR_CA1 = FR_CA1,FR_r,FR_CA0 1035 nop.i 0 1036} 1037{ .mfi 1038 nop.m 0 1039 fma.s1 FR_C01 = FR_C01,FR_C11,f0 1040 nop.i 0 1041};; 1042{ .mfi 1043 nop.m 0 1044 fma.s1 FR_C21 = FR_C21,FR_C31,f0 1045 nop.i 0 1046} 1047{ .mfi 1048 nop.m 0 1049 fma.s1 FR_rs2 = FR_rs,FR_rs,f0 1050(p12) cmp.lt.unc p7,p0 = 2,GR_Sig2 // should mul by FR_Rq2? 1051};; 1052{ .mfi 1053 nop.m 0 1054 fma.s1 FR_C41 = FR_C41,FR_C51,f0 1055 nop.i 0 1056} 1057{ .mfi 1058 nop.m 0 1059(p7) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0 1060(p12) cmp.lt.unc p9,p0 = 6,GR_Sig2 // should mul by FR_Rq4? 1061};; 1062{ .mfi 1063 nop.m 0 1064 fma.s1 FR_C61 = FR_C61,FR_C71,f0 1065(p15) cmp.eq p11,p0 = r0,r0 1066} 1067{ .mfi 1068 nop.m 0 1069(p9) fma.s1 FR_Rq3 = FR_Rq3,FR_Rq4,f0 1070(p12) cmp.lt.unc p8,p0 = 10,GR_Sig2 // should mul by FR_Rq6? 1071};; 1072{ .mfi 1073 nop.m 0 1074 fma.s1 FR_C81 = FR_C81,FR_C91,f0 1075 nop.i 0 1076} 1077{ .mfi 1078 nop.m 0 1079(p8) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq6,f0 1080(p14) cmp.ltu p0,p11 = 0x9,GR_Tbl_Ind 1081};; 1082{ .mfi 1083 nop.m 0 1084 fcvt.xf FR_RqLin = FR_Xt 1085 nop.i 0 1086} 1087{ .mfi 1088 nop.m 0 1089(p11) fma.s1 FR_CA1 = FR_CA1,FR_An,f0 1090 nop.i 0 1091};; 1092{ .mfi 1093 nop.m 0 1094 fma.s1 FR_S21 = FR_S21,FR_rs2,FR_S19 1095 nop.i 0 1096} 1097{ .mfi 1098 nop.m 0 1099 fma.s1 FR_S17 = FR_S17,FR_rs2,FR_S15 1100 nop.i 0 1101};; 1102{ .mfi 1103 nop.m 0 1104 fma.s1 FR_C01 = FR_C01,FR_C21,f0 1105 nop.i 0 1106} 1107{ .mfi 1108 nop.m 0 1109 fma.s1 FR_rs4 = FR_rs2,FR_rs2,f0 1110(p12) cmp.lt.unc p8,p0 = 4,GR_Sig2 // should mul by FR_Rq3? 1111};; 1112{ .mfi 1113 nop.m 0 1114(p8) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq3,f0 1115 nop.i 0 1116} 1117{ .mfi 1118 nop.m 0 1119 fma.s1 FR_S13 = FR_S13,FR_rs2,FR_S11 1120(p12) cmp.lt.unc p9,p0 = 12,GR_Sig2 // should mul by FR_Rq7? 1121};; 1122{ .mfi 1123 nop.m 0 1124 fma.s1 FR_C41 = FR_C41,FR_C61,f0 1125 nop.i 0 1126} 1127{ .mfi 1128 nop.m 0 1129(p9) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq7,f0 1130 nop.i 0 1131};; 1132{ .mfi 1133 nop.m 0 1134 fma.s1 FR_C81 = FR_C81,FR_CA1,f0 1135 nop.i 0 1136} 1137{ .mfi 1138 nop.m 0 1139 fma.s1 FR_S9 = FR_S9,FR_rs2,FR_S7 1140 nop.i 0 1141};; 1142{ .mfi 1143 nop.m 0 1144 fma.s1 FR_S5 = FR_S5,FR_rs2,FR_S3 1145 nop.i 0 1146};; 1147{ .mfi 1148 nop.m 0 1149 fma.s1 FR_rs3 = FR_rs2,FR_rs,f0 1150(p12) tbit.nz.unc p6,p0 = GR_SigRqLin,0 1151} 1152{ .mfi 1153 nop.m 0 1154 fma.s1 FR_rs8 = FR_rs4,FR_rs4,f0 1155 nop.i 0 1156};; 1157{ .mfi 1158 nop.m 0 1159 fma.s1 FR_S21 = FR_S21,FR_rs4,FR_S17 1160 mov GR_ExpOf1 = 0x2FFFF 1161} 1162{ .mfi 1163 nop.m 0 1164(p6) fms.s1 FR_RqLin = FR_AbsX,f1,FR_RqLin 1165(p12) cmp.lt.unc p8,p0 = 8,GR_Sig2 // should mul by FR_Rq5? 1166};; 1167{ .mfi 1168 nop.m 0 1169 fma.s1 FR_C01 = FR_C01,FR_C41,f0 1170 nop.i 0 1171} 1172{ .mfi 1173 nop.m 0 1174(p8) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq5,f0 1175(p14) cmp.gtu.unc p7,p0 = GR_Sign_Exp,GR_ExpOf1 1176};; 1177{ .mfi 1178 nop.m 0 1179 fma.s1 FR_S13 = FR_S13,FR_rs4,FR_S9 1180 nop.i 0 1181} 1182{ .mfi 1183 nop.m 0 1184(p7) fma.s1 FR_C81 = FR_C81,FR_AbsX,f0 1185 nop.i 0 1186};; 1187{ .mfi 1188 nop.m 0 1189(p14) fma.s1 FR_AbsXp1 = f1,f1,FR_AbsX // |x|+1 1190 nop.i 0 1191} 1192{ .mfi 1193 nop.m 0 1194(p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary 1195 nop.i 0 1196};; 1197{ .mfi 1198 nop.m 0 1199 fma.s1 FR_rs7 = FR_rs4,FR_rs3,f0 1200 nop.i 0 1201} 1202{ .mfi 1203 nop.m 0 1204 fma.s1 FR_S5 = FR_S5,FR_rs3,FR_rs 1205 nop.i 0 1206};; 1207{ .mib 1208(p14) cmp.lt p13,p0 = r0,r0 // set p13 to 0 if x < 0 1209(p12) cmp.eq.unc p8,p9 = 1,GR_SigRqLin 1210(p10) br.cond.spnt tgamma_spec_res 1211};; 1212{ .mfi 1213 getf.sig GR_Sig = FR_iXt 1214(p6) fma.s1 FR_Rq1 = FR_Rq1,FR_RqLin,f0 1215 // should we mul by polynomial of recursion? 1216(p15) cmp.eq.unc p0,p11 = r0,GR_SigRqLin 1217} 1218{ .mfb 1219 nop.m 0 1220 fma.s1 FR_GAMMA = FR_C01,FR_C81,f0 1221(p11) br.cond.spnt tgamma_positives 1222};; 1223{ .mfi 1224 nop.m 0 1225 fma.s1 FR_S21 = FR_S21,FR_rs8,FR_S13 1226 nop.i 0 1227} 1228{ .mfb 1229 nop.m 0 1230(p13) fma.d.s0 f8 = FR_C01,FR_C81,f0 1231(p13) br.ret.spnt b0 1232};; 1233.pred.rel "mutex",p8,p9 1234{ .mfi 1235 nop.m 0 1236(p9) fma.s1 FR_GAMMA = FR_GAMMA,FR_Rq1,f0 1237 tbit.z p6,p7 = GR_Sig,0 // p6 if sin<0, p7 if sin>0 1238} 1239{ .mfi 1240 nop.m 0 1241(p8) fma.s1 FR_GAMMA = FR_GAMMA,FR_RqLin,f0 1242 nop.i 0 1243};; 1244{ .mfi 1245 nop.m 0 1246 fma.s1 FR_S21 = FR_S21,FR_rs7,FR_S5 1247 nop.i 0 1248};; 1249.pred.rel "mutex",p6,p7 1250{ .mfi 1251 nop.m 0 1252(p6) fnma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0 1253 nop.i 0 1254} 1255{ .mfi 1256 nop.m 0 1257(p7) fma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0 1258 mov GR_Sig2 = 1 1259};; 1260{ .mfi 1261 nop.m 0 1262 frcpa.s1 FR_Rcp0,p0 = f1,FR_GAMMA 1263 cmp.ltu p13,p0 = GR_Sign_Exp,GR_ExpOf1 1264};; 1265// NR method: ineration #1 1266{ .mfi 1267(p13) getf.exp GR_Sign_Exp = FR_AbsX 1268 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1 // t = 1 - r0*x 1269(p13) shl GR_Sig2 = GR_Sig2,63 1270};; 1271{ .mfi 1272(p13) getf.sig GR_Sig = FR_AbsX 1273 nop.f 0 1274(p13) mov GR_NzOvfBound = 0xFBFF 1275};; 1276{ .mfi 1277(p13) cmp.ltu.unc p8,p0 = GR_Sign_Exp,GR_NzOvfBound // p8 <- overflow 1278 nop.f 0 1279(p13) cmp.eq.unc p9,p0 = GR_Sign_Exp,GR_NzOvfBound 1280};; 1281{ .mfb 1282 nop.m 0 1283(p13) fma.d.s0 FR_X = f1,f1,f8 // set deno & inexact flags 1284(p8) br.cond.spnt tgamma_ovf_near_0 //tgamma_neg_overflow 1285};; 1286{ .mib 1287 nop.m 0 1288(p9) cmp.eq.unc p8,p0 = GR_Sig,GR_Sig2 1289(p8) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_neg_overflow 1290};; 1291{ .mfi 1292 nop.m 0 1293 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 1294 nop.i 0 1295};; 1296// NR method: ineration #2 1297{ .mfi 1298 nop.m 0 1299 fnma.s1 FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1 // t = 1 - r1*x 1300 nop.i 0 1301};; 1302{ .mfi 1303 nop.m 0 1304 fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1 1305 nop.i 0 1306};; 1307// NR method: ineration #3 1308{ .mfi 1309 nop.m 0 1310 fnma.s1 FR_Rcp3 = FR_Rcp2,FR_GAMMA,f1 // t = 1 - r2*x 1311 nop.i 0 1312} 1313{ .mfi 1314 nop.m 0 1315(p13) fma.s1 FR_Rcp2 = FR_Rcp2,FR_AbsXp1,f0 1316(p14) cmp.ltu p10,p11 = 0x9,GR_Tbl_Ind 1317};; 1318.pred.rel "mutex",p10,p11 1319{ .mfi 1320 nop.m 0 1321(p10) fma.s1 FR_GAMMA = FR_Rcp2,FR_Rcp3,FR_Rcp2 1322 nop.i 0 1323} 1324{ .mfi 1325 nop.m 0 1326(p11) fma.d.s0 f8 = FR_Rcp2,FR_Rcp3,FR_Rcp2 1327 nop.i 0 1328};; 1329{ .mfb 1330 nop.m 0 1331(p10) fma.d.s0 f8 = FR_GAMMA,FR_InvAn,f0 1332 br.ret.sptk b0 1333};; 1334 1335 1336// here if x >= 3 1337//-------------------------------------------------------------------- 1338.align 32 1339tgamma_positives: 1340.pred.rel "mutex",p8,p9 1341{ .mfi 1342 nop.m 0 1343(p9) fma.d.s0 f8 = FR_GAMMA,FR_Rq1,f0 1344 nop.i 0 1345} 1346{ .mfb 1347 nop.m 0 1348(p8) fma.d.s0 f8 = FR_GAMMA,FR_RqLin,f0 1349 br.ret.sptk b0 1350};; 1351 1352// here if 0 < x < 1 1353//-------------------------------------------------------------------- 1354.align 32 1355tgamma_from_0_to_2: 1356{ .mfi 1357 getf.exp GR_Sign_Exp = FR_r02 1358 fms.s1 FR_r = FR_r02,f1,FR_Xmin 1359 mov GR_ExpOf025 = 0xFFFD 1360} 1361{ .mfi 1362 add GR_ad_Co = 0x1200,GR_ad_Data 1363(p6) fnma.s1 FR_Rcp1 = FR_Rcp0,FR_NormX,f1 // t = 1 - r0*x 1364(p6) mov GR_Sig2 = 1 1365};; 1366{ .mfi 1367(p6) getf.sig GR_Sig = FR_NormX 1368 nop.f 0 1369(p6) shl GR_Sig2 = GR_Sig2,63 1370} 1371{ .mfi 1372 add GR_ad_Ce = 0x1210,GR_ad_Data 1373 nop.f 0 1374(p6) mov GR_NzOvfBound = 0xFBFF 1375};; 1376{ .mfi 1377 cmp.eq p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2 1378 nop.f 0 1379 cmp.eq p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4 1380} 1381{ .mfi 1382(p6) cmp.ltu.unc p11,p0 = GR_Sign_Exp,GR_NzOvfBound // p11 <- overflow 1383 nop.f 0 1384(p6) cmp.eq.unc p12,p0 = GR_Sign_Exp,GR_NzOvfBound 1385};; 1386.pred.rel "mutex",p8,p9 1387{ .mfi 1388(p8) add GR_ad_Co = 0x200,GR_ad_Co 1389(p6) fma.d.s0 FR_X = f1,f1,f8 // set deno & inexact flags 1390(p9) add GR_ad_Co = 0x100,GR_ad_Co 1391} 1392{ .mib 1393(p8) add GR_ad_Ce = 0x200,GR_ad_Ce 1394(p9) add GR_ad_Ce = 0x100,GR_ad_Ce 1395(p11) br.cond.spnt tgamma_ovf_near_0 //tgamma_spec_res 1396};; 1397{ .mfi 1398 ldfe FR_A15 = [GR_ad_Co],32 1399 nop.f 0 1400(p12) cmp.eq.unc p13,p0 = GR_Sig,GR_Sig2 1401} 1402{ .mfb 1403 ldfe FR_A14 = [GR_ad_Ce],32 1404 nop.f 0 1405(p13) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_spec_res 1406};; 1407{ .mfi 1408 ldfe FR_A13 = [GR_ad_Co],32 1409 nop.f 0 1410 nop.i 0 1411} 1412{ .mfi 1413 ldfe FR_A12 = [GR_ad_Ce],32 1414 nop.f 0 1415 nop.i 0 1416};; 1417.pred.rel "mutex",p9,p10 1418{ .mfi 1419 ldfe FR_A11 = [GR_ad_Co],32 1420(p10) fma.s1 FR_r2 = FR_r02,FR_r02,f0 1421 nop.i 0 1422} 1423{ .mfi 1424 ldfe FR_A10 = [GR_ad_Ce],32 1425(p9) fma.s1 FR_r2 = FR_r,FR_r,f0 1426 nop.i 0 1427};; 1428{ .mfi 1429 ldfe FR_A9 = [GR_ad_Co],32 1430(p6) fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 1431 nop.i 0 1432} 1433{ .mfi 1434 ldfe FR_A8 = [GR_ad_Ce],32 1435(p10) fma.s1 FR_r = f0,f0,FR_r02 1436 nop.i 0 1437};; 1438{ .mfi 1439 ldfe FR_A7 = [GR_ad_Co],32 1440 nop.f 0 1441 nop.i 0 1442} 1443{ .mfi 1444 ldfe FR_A6 = [GR_ad_Ce],32 1445 nop.f 0 1446 nop.i 0 1447};; 1448{ .mfi 1449 ldfe FR_A5 = [GR_ad_Co],32 1450 nop.f 0 1451 nop.i 0 1452} 1453{ .mfi 1454 ldfe FR_A4 = [GR_ad_Ce],32 1455 nop.f 0 1456 nop.i 0 1457};; 1458{ .mfi 1459 ldfe FR_A3 = [GR_ad_Co],32 1460 nop.f 0 1461 nop.i 0 1462} 1463{ .mfi 1464 ldfe FR_A2 = [GR_ad_Ce],32 1465 nop.f 0 1466 nop.i 0 1467};; 1468{ .mfi 1469 ldfe FR_A1 = [GR_ad_Co],32 1470 fma.s1 FR_r4 = FR_r2,FR_r2,f0 1471 nop.i 0 1472} 1473{ .mfi 1474 ldfe FR_A0 = [GR_ad_Ce],32 1475 nop.f 0 1476 nop.i 0 1477};; 1478{ .mfi 1479 nop.m 0 1480(p6) fnma.s1 FR_Rcp2 = FR_Rcp1,FR_NormX,f1 // t = 1 - r1*x 1481 nop.i 0 1482};; 1483{ .mfi 1484 nop.m 0 1485 fma.s1 FR_A15 = FR_A15,FR_r,FR_A14 1486 nop.i 0 1487} 1488{ .mfi 1489 nop.m 0 1490 fma.s1 FR_A11 = FR_A11,FR_r,FR_A10 1491 nop.i 0 1492};; 1493{ .mfi 1494 nop.m 0 1495 fma.s1 FR_r8 = FR_r4,FR_r4,f0 1496 nop.i 0 1497};; 1498{ .mfi 1499 nop.m 0 1500(p6) fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1 1501 nop.i 0 1502};; 1503{ .mfi 1504 nop.m 0 1505 fma.s1 FR_A7 = FR_A7,FR_r,FR_A6 1506 nop.i 0 1507} 1508{ .mfi 1509 nop.m 0 1510 fma.s1 FR_A3 = FR_A3,FR_r,FR_A2 1511 nop.i 0 1512};; 1513{ .mfi 1514 nop.m 0 1515 fma.s1 FR_A15 = FR_A15,FR_r,FR_A13 1516 nop.i 0 1517} 1518{ .mfi 1519 nop.m 0 1520 fma.s1 FR_A11 = FR_A11,FR_r,FR_A9 1521 nop.i 0 1522};; 1523{ .mfi 1524 nop.m 0 1525(p6) fnma.s1 FR_Rcp3 = FR_Rcp2,FR_NormX,f1 // t = 1 - r1*x 1526 nop.i 0 1527};; 1528{ .mfi 1529 nop.m 0 1530 fma.s1 FR_A7 = FR_A7,FR_r,FR_A5 1531 nop.i 0 1532} 1533{ .mfi 1534 nop.m 0 1535 fma.s1 FR_A3 = FR_A3,FR_r,FR_A1 1536 nop.i 0 1537};; 1538{ .mfi 1539 nop.m 0 1540 fma.s1 FR_A15 = FR_A15,FR_r,FR_A12 1541 nop.i 0 1542} 1543{ .mfi 1544 nop.m 0 1545 fma.s1 FR_A11 = FR_A11,FR_r,FR_A8 1546 nop.i 0 1547};; 1548{ .mfi 1549 nop.m 0 1550(p6) fma.s1 FR_Rcp3 = FR_Rcp2,FR_Rcp3,FR_Rcp2 1551 nop.i 0 1552};; 1553{ .mfi 1554 nop.m 0 1555 fma.s1 FR_A7 = FR_A7,FR_r,FR_A4 1556 nop.i 0 1557} 1558{ .mfi 1559 nop.m 0 1560 fma.s1 FR_A3 = FR_A3,FR_r,FR_A0 1561 nop.i 0 1562};; 1563{ .mfi 1564 nop.m 0 1565 fma.s1 FR_A15 = FR_A15,FR_r4,FR_A11 1566 nop.i 0 1567} 1568{ .mfi 1569 nop.m 0 1570 fma.s1 FR_A7 = FR_A7,FR_r4,FR_A3 1571 nop.i 0 1572};; 1573.pred.rel "mutex",p6,p7 1574{ .mfi 1575 nop.m 0 1576(p6) fma.s1 FR_A15 = FR_A15,FR_r8,FR_A7 1577 nop.i 0 1578} 1579{ .mfi 1580 nop.m 0 1581(p7) fma.d.s0 f8 = FR_A15,FR_r8,FR_A7 1582 nop.i 0 1583};; 1584{ .mfb 1585 nop.m 0 1586(p6) fma.d.s0 f8 = FR_A15,FR_Rcp3,f0 1587 br.ret.sptk b0 1588};; 1589 1590// overflow 1591//-------------------------------------------------------------------- 1592.align 32 1593tgamma_ovf_near_0_boundary: 1594.pred.rel "mutex",p14,p15 1595{ .mfi 1596 mov GR_fpsr = ar.fpsr 1597 nop.f 0 1598(p15) mov r8 = 0x7ff 1599} 1600{ .mfi 1601 nop.m 0 1602 nop.f 0 1603(p14) mov r8 = 0xfff 1604};; 1605{ .mfi 1606 nop.m 0 1607 nop.f 0 1608 shl r8 = r8,52 1609};; 1610{ .mfi 1611 sub r8 = r8,r0,1 1612 nop.f 0 1613 extr.u GR_fpsr = GR_fpsr,10,2 // rounding mode 1614};; 1615.pred.rel "mutex",p14,p15 1616{ .mfi 1617 // set p8 to 0 in case of overflow and to 1 otherwise 1618 // for negative arg: 1619 // no overflow if rounding mode either Z or +Inf, i.e. 1620 // GR_fpsr > 1 1621(p14) cmp.lt p8,p0 = 1,GR_fpsr 1622 nop.f 0 1623 // for positive arg: 1624 // no overflow if rounding mode either Z or -Inf, i.e. 1625 // (GR_fpsr & 1) == 0 1626(p15) tbit.z p0,p8 = GR_fpsr,0 1627};; 1628{ .mib 1629(p8) setf.d f8 = r8 // set result to 0x7fefffffffffffff without 1630 // OVERFLOW flag raising 1631 nop.i 0 1632(p8) br.ret.sptk b0 1633};; 1634.align 32 1635tgamma_ovf_near_0: 1636{ .mfi 1637 mov r8 = 0x1FFFE 1638 nop.f 0 1639 nop.i 0 1640};; 1641{ .mfi 1642 setf.exp f9 = r8 1643 fmerge.s FR_X = f8,f8 1644 mov GR_TAG = 258 // overflow 1645};; 1646.pred.rel "mutex",p14,p15 1647{ .mfi 1648 nop.m 0 1649(p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result 1650 nop.i 0 1651} 1652{ .mfb 1653 nop.m 0 1654(p14) fnma.d.s0 f8 = f9,f9,f0 // Set I,O and -INF result 1655 br.cond.sptk tgamma_libm_err 1656};; 1657// overflow or absolute value of x is too big 1658//-------------------------------------------------------------------- 1659.align 32 1660tgamma_spec_res: 1661{ .mfi 1662 mov GR_0x30033 = 0x30033 1663(p14) fcmp.eq.unc.s1 p10,p11 = f8,FR_Xt 1664(p15) mov r8 = 0x1FFFE 1665};; 1666{ .mfi 1667(p15) setf.exp f9 = r8 1668 nop.f 0 1669 nop.i 0 1670};; 1671{ .mfb 1672(p11) cmp.ltu.unc p7,p8 = GR_0x30033,GR_Sign_Exp 1673 nop.f 0 1674(p10) br.cond.spnt tgamma_singularity 1675};; 1676.pred.rel "mutex",p7,p8 1677{ .mbb 1678 nop.m 0 1679(p7) br.cond.spnt tgamma_singularity 1680(p8) br.cond.spnt tgamma_underflow 1681};; 1682{ .mfi 1683 nop.m 0 1684 fmerge.s FR_X = f8,f8 1685 mov GR_TAG = 258 // overflow 1686} 1687{ .mfb 1688 nop.m 0 1689(p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result 1690 br.cond.sptk tgamma_libm_err 1691};; 1692 1693// x is negative integer or +/-0 1694//-------------------------------------------------------------------- 1695.align 32 1696tgamma_singularity: 1697{ .mfi 1698 nop.m 0 1699 fmerge.s FR_X = f8,f8 1700 mov GR_TAG = 259 // negative 1701} 1702{ .mfb 1703 nop.m 0 1704 frcpa.s0 f8,p0 = f0,f0 1705 br.cond.sptk tgamma_libm_err 1706};; 1707// x is negative noninteger with big absolute value 1708//-------------------------------------------------------------------- 1709.align 32 1710tgamma_underflow: 1711{ .mmi 1712 getf.sig GR_Sig = FR_iXt 1713 mov r11 = 0x00001 1714 nop.i 0 1715};; 1716{ .mfi 1717 setf.exp f9 = r11 1718 nop.f 0 1719 nop.i 0 1720};; 1721{ .mfi 1722 nop.m 0 1723 nop.f 0 1724 tbit.z p6,p7 = GR_Sig,0 1725};; 1726.pred.rel "mutex",p6,p7 1727{ .mfi 1728 nop.m 0 1729(p6) fms.d.s0 f8 = f9,f9,f9 1730 nop.i 0 1731} 1732{ .mfb 1733 nop.m 0 1734(p7) fma.d.s0 f8 = f9,f9,f9 1735 br.ret.sptk b0 1736};; 1737 1738// x for natval, nan, +/-inf or +/-0 1739//-------------------------------------------------------------------- 1740.align 32 1741tgamma_spec: 1742{ .mfi 1743 nop.m 0 1744 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf 1745 nop.i 0 1746};; 1747{ .mfi 1748 nop.m 0 1749 fclass.m p7,p8 = f8,0x7 // +/-0 1750 nop.i 0 1751};; 1752{ .mfi 1753 nop.m 0 1754 fmerge.s FR_X = f8,f8 1755 nop.i 0 1756} 1757{ .mfb 1758 nop.m 0 1759(p6) fma.d.s0 f8 = f8,f1,f8 1760(p6) br.ret.spnt b0 1761};; 1762.pred.rel "mutex",p7,p8 1763{ .mfi 1764(p7) mov GR_TAG = 259 // negative 1765(p7) frcpa.s0 f8,p0 = f1,f8 1766 nop.i 0 1767} 1768{ .mib 1769 nop.m 0 1770 nop.i 0 1771(p8) br.cond.spnt tgamma_singularity 1772};; 1773 1774.align 32 1775tgamma_libm_err: 1776{ .mfi 1777 alloc r32 = ar.pfs,1,4,4,0 1778 nop.f 0 1779 mov GR_Parameter_TAG = GR_TAG 1780};; 1781 1782GLOBAL_LIBM_END(tgamma) 1783libm_alias_double_other (tgamma, tgamma) 1784 1785LOCAL_LIBM_ENTRY(__libm_error_region) 1786.prologue 1787{ .mfi 1788 add GR_Parameter_Y=-32,sp // Parameter 2 value 1789 nop.f 0 1790.save ar.pfs,GR_SAVE_PFS 1791 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 1792} 1793{ .mfi 1794.fframe 64 1795 add sp=-64,sp // Create new stack 1796 nop.f 0 1797 mov GR_SAVE_GP=gp // Save gp 1798};; 1799{ .mmi 1800 stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack 1801 add GR_Parameter_X = 16,sp // Parameter 1 address 1802.save b0, GR_SAVE_B0 1803 mov GR_SAVE_B0=b0 // Save b0 1804};; 1805.body 1806{ .mib 1807 stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack 1808 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 1809 nop.b 0 1810} 1811{ .mib 1812 stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack 1813 add GR_Parameter_Y = -16,GR_Parameter_Y 1814 br.call.sptk b0=__libm_error_support# // Call error handling function 1815};; 1816{ .mmi 1817 nop.m 0 1818 nop.m 0 1819 add GR_Parameter_RESULT = 48,sp 1820};; 1821{ .mmi 1822 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack 1823.restore sp 1824 add sp = 64,sp // Restore stack pointer 1825 mov b0 = GR_SAVE_B0 // Restore return address 1826};; 1827{ .mib 1828 mov gp = GR_SAVE_GP // Restore gp 1829 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 1830 br.ret.sptk b0 // Return 1831};; 1832 1833LOCAL_LIBM_END(__libm_error_region) 1834.type __libm_error_support#,@function 1835.global __libm_error_support# 1836