1.file "tgammaf.s" 2 3 4// Copyright (c) 2001 - 2005, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT 25// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, 28// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code,and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39//********************************************************************* 40// 41// History: 42// 11/30/01 Initial version 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 02/10/03 Reordered header: .section, .global, .proc, .align 45// 04/04/03 Changed error codes for overflow and negative integers 46// 04/10/03 Changed code for overflow near zero handling 47// 12/16/03 Fixed parameter passing to/from error handling routine 48// 03/31/05 Reformatted delimiters between data tables 49// 50//********************************************************************* 51// 52//********************************************************************* 53// 54// Function: tgammaf(x) computes the principle value of the GAMMA 55// function of x. 56// 57//********************************************************************* 58// 59// Resources Used: 60// 61// Floating-Point Registers: f8-f15 62// f33-f75 63// 64// General Purpose Registers: 65// r8-r11 66// r14-r29 67// r32-r36 68// r37-r40 (Used to pass arguments to error handling routine) 69// 70// Predicate Registers: p6-p15 71// 72//********************************************************************* 73// 74// IEEE Special Conditions: 75// 76// tgammaf(+inf) = +inf 77// tgammaf(-inf) = QNaN 78// tgammaf(+/-0) = +/-inf 79// tgammaf(x<0, x - integer) = QNaN 80// tgammaf(SNaN) = QNaN 81// tgammaf(QNaN) = QNaN 82// 83//********************************************************************* 84// 85// Overview 86// 87// The method consists of three cases. 88// 89// If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular; 90// else if 0 < x < 2 use case tgamma_from_0_to_2; 91// else if -(i+1) < x < -i, i = 0...43 use case tgamma_negatives; 92// 93// Case 2 <= x < OVERFLOW_BOUNDARY 94// ------------------------------- 95// Here we use algorithm based on the recursive formula 96// GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval 97// [2; OVERFLOW_BOUNDARY] into intervals [8*n; 8*(n+1)] and 98// approximate GAMMA(x) by polynomial of 22th degree on each 99// [8*n; 8*n+1], recursive formula is used to expand GAMMA(x) 100// to [8*n; 8*n+1]. In other words we need to find n, i and r 101// such that x = 8 * n + i + r where n and i are integer numbers 102// and r is fractional part of x. So GAMMA(x) = GAMMA(8*n+i+r) = 103// = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) = 104// = (x-1)*(x-2)*...*(x-i)*GAMMA(8*n+r) ~ 105// ~ (x-1)*(x-2)*...*(x-i)*P12n(r). 106// 107// Step 1: Reduction 108// ----------------- 109// N = [x] with truncate 110// r = x - N, note 0 <= r < 1 111// 112// n = N & ~0xF - index of table that contains coefficient of 113// polynomial approximation 114// i = N & 0xF - is used in recursive formula 115// 116// 117// Step 2: Approximation 118// --------------------- 119// We use factorized minimax approximation polynomials 120// P12n(r) = A12*(r^2+C01(n)*r+C00(n))* 121// *(r^2+C11(n)*r+C10(n))*...*(r^2+C51(n)*r+C50(n)) 122// 123// Step 3: Recursion 124// ----------------- 125// In case when i > 0 we need to multiply P12n(r) by product 126// R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions 127// we can calculate R as follow: 128// R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is 129// even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))* 130// *(i-1) if i is odd. In both cases we need to calculate 131// R2(i,x) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) = 132// = ((x^2-x)+2*(1-x))*((x^2-x)+6*(2-x))*...*((x^2-x)+2*(2*j-1)*(j-x)) = 133// = (RA+2*RB)*(RA+6*(1-RB))*...*(RA+2*(2*j-1)*(j-1+RB)) 134// where j = 1..[i/2], RA = x^2-x, RB = 1-x. 135// 136// Step 4: Reconstruction 137// ---------------------- 138// Reconstruction is just simple multiplication i.e. 139// GAMMA(x) = P12n(r)*R(i,x) 140// 141// Case 0 < x < 2 142// -------------- 143// To calculate GAMMA(x) on this interval we do following 144// if 1.0 <= x < 1.25 than GAMMA(x) = P7(x-1) 145// if 1.25 <= x < 1.5 than GAMMA(x) = P7(x-x_min) where 146// x_min is point of local minimum on [1; 2] interval. 147// if 1.5 <= x < 1.75 than GAMMA(x) = P7(x-1.5) 148// if 1.75 <= x < 2.0 than GAMMA(x) = P7(x-1.5) 149// and 150// if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x 151// 152// Case -(i+1) < x < -i, i = 0...43 153// ---------------------------------- 154// Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and 155// so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of 156// GAMMA(x) is described above. 157// 158// Step 1: Reduction 159// ----------------- 160// Note that period of sin(PI*x) is 2 and range reduction for 161// sin(PI*x) is like to range reduction for GAMMA(x) 162// i.e rs = x - round(x) and |rs| <= 0.5. 163// 164// Step 2: Approximation 165// --------------------- 166// To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI = 167// = (-1)^n*sin(PI*rs)/PI Taylor series is used. 168// sin(PI*rs)/PI ~ S17(rs). 169// 170// Step 3: Division 171// ---------------- 172// To calculate 1/x and 1/(GAMMA(x)*S12(rs)) we use frcpa 173// instruction with following Newton-Raphson interations. 174// 175// 176//********************************************************************* 177 178GR_ad_Data = r8 179GR_TAG = r8 180GR_SignExp = r9 181GR_Sig = r10 182GR_ArgNz = r10 183GR_RqDeg = r11 184 185GR_NanBound = r14 186GR_ExpOf025 = r15 187GR_ExpOf05 = r16 188GR_ad_Co = r17 189GR_ad_Ce = r18 190GR_TblOffs = r19 191GR_Arg = r20 192GR_Exp2Ind = r21 193GR_TblOffsMask = r21 194GR_Offs = r22 195GR_OvfNzBound = r23 196GR_ZeroResBound = r24 197GR_ad_SinO = r25 198GR_ad_SinE = r26 199GR_Correction = r27 200GR_Tbl12Offs = r28 201GR_NzBound = r28 202GR_ExpOf1 = r29 203GR_fpsr = r29 204 205GR_SAVE_B0 = r33 206GR_SAVE_PFS = r34 207GR_SAVE_GP = r35 208GR_SAVE_SP = r36 209 210GR_Parameter_X = r37 211GR_Parameter_Y = r38 212GR_Parameter_RESULT = r39 213GR_Parameter_TAG = r40 214 215 216FR_X = f10 217FR_Y = f1 218FR_RESULT = f8 219 220FR_iXt = f11 221FR_Xt = f12 222FR_r = f13 223FR_r2 = f14 224FR_r4 = f15 225 226FR_C01 = f33 227FR_A7 = f33 228FR_C11 = f34 229FR_A6 = f34 230FR_C21 = f35 231FR_A5 = f35 232FR_C31 = f36 233FR_A4 = f36 234FR_C41 = f37 235FR_A3 = f37 236FR_C51 = f38 237FR_A2 = f38 238 239FR_C00 = f39 240FR_A1 = f39 241FR_C10 = f40 242FR_A0 = f40 243FR_C20 = f41 244FR_C30 = f42 245FR_C40 = f43 246FR_C50 = f44 247FR_An = f45 248FR_OvfBound = f46 249FR_InvAn = f47 250 251FR_Multplr = f48 252FR_NormX = f49 253FR_X2mX = f50 254FR_1mX = f51 255FR_Rq0 = f51 256FR_Rq1 = f52 257FR_Rq2 = f53 258FR_Rq3 = f54 259 260FR_Rcp0 = f55 261FR_Rcp1 = f56 262FR_Rcp2 = f57 263 264FR_InvNormX1 = f58 265FR_InvNormX2 = f59 266 267FR_rs = f60 268FR_rs2 = f61 269 270FR_LocalMin = f62 271FR_10 = f63 272 273FR_05 = f64 274 275FR_S32 = f65 276FR_S31 = f66 277FR_S01 = f67 278FR_S11 = f68 279FR_S21 = f69 280FR_S00 = f70 281FR_S10 = f71 282FR_S20 = f72 283 284FR_GAMMA = f73 285FR_2 = f74 286FR_6 = f75 287 288 289 290 291// Data tables 292//============================================================== 293RODATA 294.align 16 295LOCAL_OBJECT_START(tgammaf_data) 296data8 0x3FDD8B618D5AF8FE // local minimum (0.461632144968362356785) 297data8 0x4024000000000000 // 10.0 298data8 0x3E90FC992FF39E13 // S32 299data8 0xBEC144B2760626E2 // S31 300// 301//[2; 8) 302data8 0x4009EFD1BA0CB3B4 // C01 303data8 0x3FFFB35378FF4822 // C11 304data8 0xC01032270413B896 // C41 305data8 0xC01F171A4C0D6827 // C51 306data8 0x40148F8E197396AC // C20 307data8 0x401C601959F1249C // C30 308data8 0x3EE21AD881741977 // An 309data8 0x4041852200000000 // overflow boundary (35.04010009765625) 310data8 0x3FD9CE68F695B198 // C21 311data8 0xBFF8C30AC900DA03 // C31 312data8 0x400E17D2F0535C02 // C00 313data8 0x4010689240F7FAC8 // C10 314data8 0x402563147DDCCF8D // C40 315data8 0x4033406D0480A21C // C50 316// 317//[8; 16) 318data8 0x4006222BAE0B793B // C01 319data8 0x4002452733473EDA // C11 320data8 0xC0010EF3326FDDB3 // C41 321data8 0xC01492B817F99C0F // C51 322data8 0x40099C905A249B75 // C20 323data8 0x4012B972AE0E533D // C30 324data8 0x3FE6F6DB91D0D4CC // An 325data8 0x4041852200000000 // overflow boundary 326data8 0x3FF545828F7B73C5 // C21 327data8 0xBFBBD210578764DF // C31 328data8 0x4000542098F53CFC // C00 329data8 0x40032C1309AD6C81 // C10 330data8 0x401D7331E19BD2E1 // C40 331data8 0x402A06807295EF57 // C50 332// 333//[16; 24) 334data8 0x4000131002867596 // C01 335data8 0x3FFAA362D5D1B6F2 // C11 336data8 0xBFFCB6985697DB6D // C41 337data8 0xC0115BEE3BFC3B3B // C51 338data8 0x3FFE62FF83456F73 // C20 339data8 0x4007E33478A114C4 // C30 340data8 0x41E9B2B73795ED57 // An 341data8 0x4041852200000000 // overflow boundary 342data8 0x3FEEB1F345BC2769 // C21 343data8 0xBFC3BBE6E7F3316F // C31 344data8 0x3FF14E07DA5E9983 // C00 345data8 0x3FF53B76BF81E2C0 // C10 346data8 0x4014051E0269A3DC // C40 347data8 0x40229D4227468EDB // C50 348// 349//[24; 32) 350data8 0x3FFAF7BD498384DE // C01 351data8 0x3FF62AD8B4D1C3D2 // C11 352data8 0xBFFABCADCD004C32 // C41 353data8 0xC00FADE97C097EC9 // C51 354data8 0x3FF6DA9ED737707E // C20 355data8 0x4002A29E9E0C782C // C30 356data8 0x44329D5B5167C6C3 // An 357data8 0x4041852200000000 // overflow boundary 358data8 0x3FE8943CBBB4B727 // C21 359data8 0xBFCB39D466E11756 // C31 360data8 0x3FE879AF3243D8C1 // C00 361data8 0x3FEEC7DEBB14CE1E // C10 362data8 0x401017B79BA80BCB // C40 363data8 0x401E941DC3C4DE80 // C50 364// 365//[32; 40) 366data8 0x3FF7ECB3A0E8FE5C // C01 367data8 0x3FF3815A8516316B // C11 368data8 0xBFF9ABD8FCC000C3 // C41 369data8 0xC00DD89969A4195B // C51 370data8 0x3FF2E43139CBF563 // C20 371data8 0x3FFF96DC3474A606 // C30 372data8 0x46AFF4CA9B0DDDF0 // An 373data8 0x4041852200000000 // overflow boundary 374data8 0x3FE4CE76DA1B5783 // C21 375data8 0xBFD0524DB460BC4E // C31 376data8 0x3FE35852DF14E200 // C00 377data8 0x3FE8C7610359F642 // C10 378data8 0x400BCF750EC16173 // C40 379data8 0x401AC14E02EA701C // C50 380// 381//[40; 48) 382data8 0x3FF5DCE4D8193097 // C01 383data8 0x3FF1B0D8C4974FFA // C11 384data8 0xBFF8FB450194CAEA // C41 385data8 0xC00C9658E030A6C4 // C51 386data8 0x3FF068851118AB46 // C20 387data8 0x3FFBF7C7BB46BF7D // C30 388data8 0x3FF0000000000000 // An 389data8 0x4041852200000000 // overflow boundary 390data8 0x3FE231DEB11D847A // C21 391data8 0xBFD251ECAFD7E935 // C31 392data8 0x3FE0368AE288F6BF // C00 393data8 0x3FE513AE4215A70C // C10 394data8 0x4008F960F7141B8B // C40 395data8 0x40183BA08134397B // C50 396// 397//[1.0; 1.25) 398data8 0xBFD9909648921868 // A7 399data8 0x3FE96FFEEEA8520F // A6 400data8 0xBFED0800D93449B8 // A3 401data8 0x3FEFA648D144911C // A2 402data8 0xBFEE3720F7720B4D // A5 403data8 0x3FEF4857A010CA3B // A4 404data8 0xBFE2788CCD545AA4 // A1 405data8 0x3FEFFFFFFFE9209E // A0 406// 407//[1.25; 1.5) 408data8 0xBFB421236426936C // A7 409data8 0x3FAF237514F36691 // A6 410data8 0xBFC0BADE710A10B9 // A3 411data8 0x3FDB6C5465BBEF1F // A2 412data8 0xBFB7E7F83A546EBE // A5 413data8 0x3FC496A01A545163 // A4 414data8 0xBDEE86A39D8452EB // A1 415data8 0x3FEC56DC82A39AA2 // A0 416// 417//[1.5; 1.75) 418data8 0xBF94730B51795867 // A7 419data8 0x3FBF4203E3816C7B // A6 420data8 0xBFE85B427DBD23E4 // A3 421data8 0x3FEE65557AB26771 // A2 422data8 0xBFD59D31BE3AB42A // A5 423data8 0x3FE3C90CC8F09147 // A4 424data8 0xBFE245971DF735B8 // A1 425data8 0x3FEFFC613AE7FBC8 // A0 426// 427//[1.75; 2.0) 428data8 0xBF7746A85137617E // A7 429data8 0x3FA96E37D09735F3 // A6 430data8 0xBFE3C24AC40AC0BB // A3 431data8 0x3FEC56A80A977CA5 // A2 432data8 0xBFC6F0E707560916 // A5 433data8 0x3FDB262D949175BE // A4 434data8 0xBFE1C1AEDFB25495 // A1 435data8 0x3FEFEE1E644B2022 // A0 436// 437// sin(pi*x)/pi 438data8 0xC026FB0D377656CC // S01 439data8 0x3FFFB15F95A22324 // S11 440data8 0x406CE58F4A41C6E7 // S10 441data8 0x404453786302C61E // S20 442data8 0xC023D59A47DBFCD3 // S21 443data8 0x405541D7ABECEFCA // S00 444// 445// 1/An for [40; 48) 446data8 0xCAA7576DE621FCD5, 0x3F68 447LOCAL_OBJECT_END(tgammaf_data) 448 449//============================================================== 450// Code 451//============================================================== 452 453.section .text 454GLOBAL_LIBM_ENTRY(tgammaf) 455{ .mfi 456 getf.exp GR_SignExp = f8 457 fma.s1 FR_NormX = f8,f1,f0 458 addl GR_ad_Data = @ltoff(tgammaf_data), gp 459} 460{ .mfi 461 mov GR_ExpOf05 = 0xFFFE 462 fcvt.fx.trunc.s1 FR_iXt = f8 // [x] 463 mov GR_Offs = 0 // 2 <= x < 8 464};; 465{ .mfi 466 getf.d GR_Arg = f8 467 fcmp.lt.s1 p14,p15 = f8,f0 468 mov GR_Tbl12Offs = 0 469} 470{ .mfi 471 setf.exp FR_05 = GR_ExpOf05 472 fma.s1 FR_2 = f1,f1,f1 // 2 473 mov GR_Correction = 0 474};; 475{ .mfi 476 ld8 GR_ad_Data = [GR_ad_Data] 477 fclass.m p10,p0 = f8,0x1E7 // is x NaTVal, NaN, +/-0 or +/-INF? 478 tbit.z p12,p13 = GR_SignExp,16 // p13 if |x| >= 2 479} 480{ .mfi 481 mov GR_ExpOf1 = 0xFFFF 482 fcvt.fx.s1 FR_rs = f8 // round(x) 483 and GR_Exp2Ind = 7,GR_SignExp 484};; 485.pred.rel "mutex",p14,p15 486{ .mfi 487(p15) cmp.eq.unc p11,p0 = GR_ExpOf1,GR_SignExp // p11 if 1 <= x < 2 488(p14) fma.s1 FR_1mX = f1,f1,f8 // 1 - |x| 489 mov GR_Sig = 0 // if |x| < 2 490} 491{ .mfi 492(p13) cmp.eq.unc p7,p0 = 2,GR_Exp2Ind 493(p15) fms.s1 FR_1mX = f1,f1,f8 // 1 - |x| 494(p13) cmp.eq.unc p8,p0 = 3,GR_Exp2Ind 495};; 496.pred.rel "mutex",p7,p8 497{ .mfi 498(p7) mov GR_Offs = 0x7 // 8 <= |x| < 16 499 nop.f 0 500(p8) tbit.z.unc p0,p6 = GR_Arg,51 501} 502{ .mib 503(p13) cmp.lt.unc p9,p0 = 3,GR_Exp2Ind 504(p8) mov GR_Offs = 0xE // 16 <= |x| < 32 505 // jump if x is NaTVal, NaN, +/-0 or +/-INF? 506(p10) br.cond.spnt tgammaf_spec_args 507};; 508.pred.rel "mutex",p14,p15 509.pred.rel "mutex",p6,p9 510{ .mfi 511(p9) mov GR_Offs = 0x1C // 32 <= |x| 512(p14) fma.s1 FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x| 513(p9) tbit.z.unc p0,p8 = GR_Arg,50 514} 515{ .mfi 516 ldfpd FR_LocalMin,FR_10 = [GR_ad_Data],16 517(p15) fms.s1 FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x| 518(p6) add GR_Offs = 0x7,GR_Offs // 24 <= x < 32 519};; 520.pred.rel "mutex",p8,p12 521{ .mfi 522 add GR_ad_Ce = 0x50,GR_ad_Data 523(p15) fcmp.lt.unc.s1 p10,p0 = f8,f1 // p10 if 0 <= x < 1 524 mov GR_OvfNzBound = 2 525} 526{ .mib 527 ldfpd FR_S32,FR_S31 = [GR_ad_Data],16 528(p8) add GR_Offs = 0x7,GR_Offs // 40 <= |x| 529 // jump if 1 <= x < 2 530(p11) br.cond.spnt tgammaf_from_1_to_2 531};; 532{ .mfi 533 shladd GR_ad_Ce = GR_Offs,4,GR_ad_Ce 534 fcvt.xf FR_Xt = FR_iXt // [x] 535(p13) cmp.eq.unc p7,p0 = r0,GR_Offs // p7 if 2 <= |x| < 8 536} 537{ .mfi 538 shladd GR_ad_Co = GR_Offs,4,GR_ad_Data 539 fma.s1 FR_6 = FR_2,FR_2,FR_2 540 mov GR_ExpOf05 = 0x7FC 541};; 542{ .mfi 543(p13) getf.sig GR_Sig = FR_iXt // if |x| >= 2 544 frcpa.s1 FR_Rcp0,p0 = f1,FR_NormX 545(p10) shr GR_Arg = GR_Arg,51 546} 547{ .mib 548 ldfpd FR_C01,FR_C11 = [GR_ad_Co],16 549(p7) mov GR_Correction = 2 550 // jump if 0 < x < 1 551(p10) br.cond.spnt tgammaf_from_0_to_1 552};; 553{ .mfi 554 ldfpd FR_C21,FR_C31 = [GR_ad_Ce],16 555 fma.s1 FR_Rq2 = f1,f1,FR_1mX // 2 - |x| 556(p14) sub GR_Correction = r0,GR_Correction 557} 558{ .mfi 559 ldfpd FR_C41,FR_C51 = [GR_ad_Co],16 560(p14) fcvt.xf FR_rs = FR_rs 561(p14) add GR_ad_SinO = 0x3A0,GR_ad_Data 562};; 563.pred.rel "mutex",p14,p15 564{ .mfi 565 ldfpd FR_C00,FR_C10 = [GR_ad_Ce],16 566 nop.f 0 567(p14) sub GR_Sig = GR_Correction,GR_Sig 568} 569{ .mfi 570 ldfpd FR_C20,FR_C30 = [GR_ad_Co],16 571 fma.s1 FR_Rq1 = FR_1mX,FR_2,FR_X2mX // (x-1)*(x-2) 572(p15) sub GR_Sig = GR_Sig,GR_Correction 573};; 574{ .mfi 575(p14) ldfpd FR_S01,FR_S11 = [GR_ad_SinO],16 576 fma.s1 FR_Rq3 = FR_2,f1,FR_1mX // 3 - |x| 577 and GR_RqDeg = 0x6,GR_Sig 578} 579{ .mfi 580 ldfpd FR_C40,FR_C50 = [GR_ad_Ce],16 581(p14) fma.d.s0 FR_X = f0,f0,f8 // set deno flag 582 mov GR_NanBound = 0x30016 // -2^23 583};; 584.pred.rel "mutex",p14,p15 585{ .mfi 586(p14) add GR_ad_SinE = 0x3C0,GR_ad_Data 587(p15) fms.s1 FR_r = FR_NormX,f1,FR_Xt // r = x - [x] 588 cmp.eq p8,p0 = 2,GR_RqDeg 589} 590{ .mfi 591 ldfpd FR_An,FR_OvfBound = [GR_ad_Co] 592(p14) fms.s1 FR_r = FR_Xt,f1,FR_NormX // r = |x - [x]| 593 cmp.eq p9,p0 = 4,GR_RqDeg 594};; 595.pred.rel "mutex",p8,p9 596{ .mfi 597(p14) ldfpd FR_S21,FR_S00 = [GR_ad_SinE],16 598(p8) fma.s1 FR_Rq0 = FR_2,f1,FR_1mX // (3-x) 599 tbit.z p0,p6 = GR_Sig,0 600} 601{ .mfi 602(p14) ldfpd FR_S10,FR_S20 = [GR_ad_SinO],16 603(p9) fma.s1 FR_Rq0 = FR_2,FR_2,FR_1mX // (5-x) 604 cmp.eq p10,p0 = 6,GR_RqDeg 605};; 606{ .mfi 607(p14) getf.s GR_Arg = f8 608(p14) fcmp.eq.unc.s1 p13,p0 = FR_NormX,FR_Xt 609(p14) mov GR_ZeroResBound = 0xC22C // -43 610} 611{ .mfi 612(p14) ldfe FR_InvAn = [GR_ad_SinE] 613(p10) fma.s1 FR_Rq0 = FR_6,f1,FR_1mX // (7-x) 614 cmp.eq p7,p0 = r0,GR_RqDeg 615};; 616{ .mfi 617(p14) cmp.ge.unc p11,p0 = GR_SignExp,GR_NanBound 618 fma.s1 FR_Rq2 = FR_Rq2,FR_6,FR_X2mX // (x-3)*(x-4) 619(p14) shl GR_ZeroResBound = GR_ZeroResBound,16 620} 621{ .mfb 622(p14) mov GR_OvfNzBound = 0x802 623(p14) fms.s1 FR_rs = FR_rs,f1,FR_NormX // rs = round(x) - x 624 // jump if x < -2^23 i.e. x is negative integer 625(p11) br.cond.spnt tgammaf_singularity 626};; 627{ .mfi 628 nop.m 0 629(p7) fma.s1 FR_Rq1 = f0,f0,f1 630(p14) shl GR_OvfNzBound = GR_OvfNzBound,20 631} 632{ .mfb 633 nop.m 0 634 fma.s1 FR_Rq3 = FR_Rq3,FR_10,FR_X2mX // (x-5)*(x-6) 635 // jump if x is negative integer such that -2^23 < x < 0 636(p13) br.cond.spnt tgammaf_singularity 637};; 638{ .mfi 639 nop.m 0 640 fma.s1 FR_C01 = FR_C01,f1,FR_r 641(p14) mov GR_ExpOf05 = 0xFFFE 642} 643{ .mfi 644(p14) cmp.eq.unc p7,p0 = GR_Arg,GR_OvfNzBound 645 fma.s1 FR_C11 = FR_C11,f1,FR_r 646(p14) cmp.ltu.unc p11,p0 = GR_Arg,GR_OvfNzBound 647};; 648{ .mfi 649 nop.m 0 650 fma.s1 FR_C21 = FR_C21,f1,FR_r 651(p14) cmp.ltu.unc p9,p0 = GR_ZeroResBound,GR_Arg 652} 653{ .mfb 654 nop.m 0 655 fma.s1 FR_C31 = FR_C31,f1,FR_r 656 // jump if argument is close to 0 negative 657(p11) br.cond.spnt tgammaf_overflow 658};; 659{ .mfi 660 nop.m 0 661 fma.s1 FR_C41 = FR_C41,f1,FR_r 662 nop.i 0 663} 664{ .mfb 665 nop.m 0 666 fma.s1 FR_C51 = FR_C51,f1,FR_r 667 // jump if x is negative noninteger such that -2^23 < x < -43 668(p9) br.cond.spnt tgammaf_underflow 669};; 670{ .mfi 671 nop.m 0 672(p14) fma.s1 FR_rs2 = FR_rs,FR_rs,f0 673 nop.i 0 674} 675{ .mfb 676 nop.m 0 677(p14) fma.s1 FR_S01 = FR_rs,FR_rs,FR_S01 678 // jump if argument is 0x80200000 679(p7) br.cond.spnt tgammaf_overflow_near0_bound 680};; 681{ .mfi 682 nop.m 0 683(p6) fnma.s1 FR_Rq1 = FR_Rq1,FR_Rq0,f0 684 nop.i 0 685} 686{ .mfi 687 nop.m 0 688(p10) fma.s1 FR_Rq2 = FR_Rq2,FR_Rq3,f0 689 and GR_Sig = 0x7,GR_Sig 690};; 691{ .mfi 692 nop.m 0 693 fma.s1 FR_C01 = FR_C01,FR_r,FR_C00 694 nop.i 0 695} 696{ .mfi 697 nop.m 0 698 fma.s1 FR_C11 = FR_C11,FR_r,FR_C10 699 cmp.eq p6,p7 = r0,GR_Sig // p6 if |x| from one of base intervals 700};; 701{ .mfi 702 nop.m 0 703 fma.s1 FR_C21 = FR_C21,FR_r,FR_C20 704 nop.i 0 705} 706{ .mfi 707 nop.m 0 708 fma.s1 FR_C31 = FR_C31,FR_r,FR_C30 709(p7) cmp.lt.unc p9,p0 = 2,GR_RqDeg 710};; 711{ .mfi 712 nop.m 0 713(p14) fma.s1 FR_S11 = FR_rs,FR_rs,FR_S11 714 nop.i 0 715} 716{ .mfi 717 nop.m 0 718(p14) fma.s1 FR_S21 = FR_rs,FR_rs,FR_S21 719 nop.i 0 720};; 721{ .mfi 722 nop.m 0 723 fma.s1 FR_C41 = FR_C41,FR_r,FR_C40 724 nop.i 0 725} 726{ .mfi 727 nop.m 0 728(p14) fma.s1 FR_S32 = FR_rs2,FR_S32,FR_S31 729 nop.i 0 730};; 731{ .mfi 732 nop.m 0 733(p9) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0 734 nop.i 0 735} 736{ .mfi 737 nop.m 0 738 fma.s1 FR_C51 = FR_C51,FR_r,FR_C50 739 nop.i 0 740};; 741{ .mfi 742(p14) getf.exp GR_SignExp = FR_rs 743 fma.s1 FR_C01 = FR_C01,FR_C11,f0 744 nop.i 0 745} 746{ .mfi 747 nop.m 0 748(p14) fma.s1 FR_S01 = FR_S01,FR_rs2,FR_S00 749 nop.i 0 750};; 751{ .mfi 752 nop.m 0 753 fma.s1 FR_C21 = FR_C21,FR_C31,f0 754 nop.i 0 755} 756{ .mfi 757 nop.m 0 758 // NR-iteration 759(p14) fnma.s1 FR_InvNormX1 = FR_Rcp0,FR_NormX,f1 760 nop.i 0 761};; 762{ .mfi 763 nop.m 0 764(p14) fma.s1 FR_S11 = FR_S11,FR_rs2,FR_S10 765(p14) tbit.z.unc p11,p12 = GR_SignExp,17 766} 767{ .mfi 768 nop.m 0 769(p14) fma.s1 FR_S21 = FR_S21,FR_rs2,FR_S20 770 nop.i 0 771};; 772{ .mfi 773 nop.m 0 774(p15) fcmp.lt.unc.s1 p0,p13 = FR_NormX,FR_OvfBound 775 nop.i 0 776} 777{ .mfi 778 nop.m 0 779(p14) fma.s1 FR_S32 = FR_rs2,FR_S32,f0 780 nop.i 0 781};; 782{ .mfi 783 nop.m 0 784 fma.s1 FR_C41 = FR_C41,FR_C51,f0 785 nop.i 0 786} 787{ .mfi 788 nop.m 0 789(p7) fma.s1 FR_An = FR_Rq1,FR_An,f0 790 nop.i 0 791};; 792{ .mfb 793 nop.m 0 794 nop.f 0 795 // jump if x > 35.04010009765625 796(p13) br.cond.spnt tgammaf_overflow 797};; 798{ .mfi 799 nop.m 0 800 // NR-iteration 801(p14) fma.s1 FR_InvNormX1 = FR_Rcp0,FR_InvNormX1,FR_Rcp0 802 nop.i 0 803};; 804{ .mfi 805 nop.m 0 806(p14) fma.s1 FR_S01 = FR_S01,FR_S11,f0 807 nop.i 0 808};; 809{ .mfi 810 nop.m 0 811(p14) fma.s1 FR_S21 = FR_S21,FR_S32,f0 812 nop.i 0 813};; 814{ .mfi 815(p14) getf.exp GR_SignExp = FR_NormX 816 fma.s1 FR_C01 = FR_C01,FR_C21,f0 817 nop.i 0 818} 819{ .mfi 820 nop.m 0 821 fma.s1 FR_C41 = FR_C41,FR_An,f0 822(p14) mov GR_ExpOf1 = 0x2FFFF 823};; 824{ .mfi 825 nop.m 0 826 // NR-iteration 827(p14) fnma.s1 FR_InvNormX2 = FR_InvNormX1,FR_NormX,f1 828 nop.i 0 829};; 830.pred.rel "mutex",p11,p12 831{ .mfi 832 nop.m 0 833(p12) fnma.s1 FR_S01 = FR_S01,FR_S21,f0 834 nop.i 0 835} 836{ .mfi 837 nop.m 0 838(p11) fma.s1 FR_S01 = FR_S01,FR_S21,f0 839 nop.i 0 840};; 841 842{ .mfi 843 nop.m 0 844(p14) fma.s1 FR_GAMMA = FR_C01,FR_C41,f0 845(p14) tbit.z.unc p6,p7 = GR_Sig,0 846} 847{ .mfb 848 nop.m 0 849(p15) fma.s.s0 f8 = FR_C01,FR_C41,f0 850(p15) br.ret.spnt b0 // exit for positives 851};; 852.pred.rel "mutex",p11,p12 853{ .mfi 854 nop.m 0 855(p12) fms.s1 FR_S01 = FR_rs,FR_S01,FR_rs 856 nop.i 0 857} 858{ .mfi 859 nop.m 0 860(p11) fma.s1 FR_S01 = FR_rs,FR_S01,FR_rs 861 nop.i 0 862};; 863{ .mfi 864 nop.m 0 865 // NR-iteration 866 fma.s1 FR_InvNormX2 = FR_InvNormX1,FR_InvNormX2,FR_InvNormX1 867 cmp.eq p10,p0 = 0x23,GR_Offs 868};; 869.pred.rel "mutex",p6,p7 870{ .mfi 871 nop.m 0 872(p6) fma.s1 FR_GAMMA = FR_S01,FR_GAMMA,f0 873 cmp.gtu p8,p0 = GR_SignExp,GR_ExpOf1 874} 875{ .mfi 876 nop.m 0 877(p7) fnma.s1 FR_GAMMA = FR_S01,FR_GAMMA,f0 878 cmp.eq p9,p0 = GR_SignExp,GR_ExpOf1 879};; 880{ .mfi 881 nop.m 0 882 // NR-iteration 883 fnma.s1 FR_InvNormX1 = FR_InvNormX2,FR_NormX,f1 884 nop.i 0 885} 886{ .mfi 887 nop.m 0 888(p10) fma.s1 FR_InvNormX2 = FR_InvNormX2,FR_InvAn,f0 889 nop.i 0 890};; 891{ .mfi 892 nop.m 0 893 frcpa.s1 FR_Rcp0,p0 = f1,FR_GAMMA 894 nop.i 0 895};; 896{ .mfi 897 nop.m 0 898 fms.s1 FR_Multplr = FR_NormX,f1,f1 // x - 1 899 nop.i 0 900};; 901{ .mfi 902 nop.m 0 903 // NR-iteration 904 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1 905 nop.i 0 906};; 907.pred.rel "mutex",p8,p9 908{ .mfi 909 nop.m 0 910 // 1/x or 1/(An*x) 911(p8) fma.s1 FR_Multplr = FR_InvNormX2,FR_InvNormX1,FR_InvNormX2 912 nop.i 0 913} 914{ .mfi 915 nop.m 0 916(p9) fma.s1 FR_Multplr = f1,f1,f0 917 nop.i 0 918};; 919{ .mfi 920 nop.m 0 921 // NR-iteration 922 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 923 nop.i 0 924};; 925{ .mfi 926 nop.m 0 927 // NR-iteration 928 fnma.s1 FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1 929 nop.i 0 930} 931{ .mfi 932 nop.m 0 933 // NR-iteration 934 fma.s1 FR_Rcp1 = FR_Rcp1,FR_Multplr,f0 935 nop.i 0 936};; 937{ .mfb 938 nop.m 0 939 fma.s.s0 f8 = FR_Rcp1,FR_Rcp2,FR_Rcp1 940 br.ret.sptk b0 941};; 942 943// here if 0 < x < 1 944//-------------------------------------------------------------------- 945.align 32 946tgammaf_from_0_to_1: 947{ .mfi 948 cmp.lt p7,p0 = GR_Arg,GR_ExpOf05 949 // NR-iteration 950 fnma.s1 FR_Rcp1 = FR_Rcp0,FR_NormX,f1 951 cmp.eq p8,p0 = GR_Arg,GR_ExpOf05 952} 953{ .mfi 954 cmp.gt p9,p0 = GR_Arg,GR_ExpOf05 955 fma.s1 FR_r = f0,f0,FR_NormX // reduced arg for (0;1) 956 mov GR_ExpOf025 = 0x7FA 957};; 958{ .mfi 959 getf.s GR_ArgNz = f8 960 fma.d.s0 FR_X = f0,f0,f8 // set deno flag 961 shl GR_OvfNzBound = GR_OvfNzBound,20 962} 963{ .mfi 964(p8) mov GR_Tbl12Offs = 0x80 // 0.5 <= x < 0.75 965 nop.f 0 966(p7) cmp.ge.unc p6,p0 = GR_Arg,GR_ExpOf025 967};; 968.pred.rel "mutex",p6,p9 969{ .mfi 970(p9) mov GR_Tbl12Offs = 0xC0 // 0.75 <= x < 1 971 nop.f 0 972(p6) mov GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5 973} 974{ .mfi 975 add GR_ad_Ce = 0x2C0,GR_ad_Data 976 nop.f 0 977 add GR_ad_Co = 0x2A0,GR_ad_Data 978};; 979{ .mfi 980 add GR_ad_Co = GR_ad_Co,GR_Tbl12Offs 981 nop.f 0 982 cmp.lt p12,p0 = GR_ArgNz,GR_OvfNzBound 983} 984{ .mib 985 add GR_ad_Ce = GR_ad_Ce,GR_Tbl12Offs 986 cmp.eq p7,p0 = GR_ArgNz,GR_OvfNzBound 987 // jump if argument is 0x00200000 988(p7) br.cond.spnt tgammaf_overflow_near0_bound 989};; 990{ .mmb 991 ldfpd FR_A7,FR_A6 = [GR_ad_Co],16 992 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16 993 // jump if argument is close to 0 positive 994(p12) br.cond.spnt tgammaf_overflow 995};; 996{ .mfi 997 ldfpd FR_A3,FR_A2 = [GR_ad_Co],16 998 // NR-iteration 999 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 1000 nop.i 0 1001} 1002{ .mfb 1003 ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16 1004 nop.f 0 1005 br.cond.sptk tgamma_from_0_to_2 1006};; 1007 1008// here if 1 < x < 2 1009//-------------------------------------------------------------------- 1010.align 32 1011tgammaf_from_1_to_2: 1012{ .mfi 1013 add GR_ad_Co = 0x2A0,GR_ad_Data 1014 fms.s1 FR_r = f0,f0,FR_1mX 1015 shr GR_TblOffs = GR_Arg,47 1016} 1017{ .mfi 1018 add GR_ad_Ce = 0x2C0,GR_ad_Data 1019 nop.f 0 1020 mov GR_TblOffsMask = 0x18 1021};; 1022{ .mfi 1023 nop.m 0 1024 nop.f 0 1025 and GR_TblOffs = GR_TblOffs,GR_TblOffsMask 1026};; 1027{ .mfi 1028 shladd GR_ad_Co = GR_TblOffs,3,GR_ad_Co 1029 nop.f 0 1030 nop.i 0 1031} 1032{ .mfi 1033 shladd GR_ad_Ce = GR_TblOffs,3,GR_ad_Ce 1034 nop.f 0 1035 cmp.eq p6,p7 = 8,GR_TblOffs 1036};; 1037{ .mmi 1038 ldfpd FR_A7,FR_A6 = [GR_ad_Co],16 1039 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16 1040 nop.i 0 1041};; 1042{ .mmi 1043 ldfpd FR_A3,FR_A2 = [GR_ad_Co],16 1044 ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16 1045 nop.i 0 1046};; 1047 1048.align 32 1049tgamma_from_0_to_2: 1050{ .mfi 1051 nop.m 0 1052(p6) fms.s1 FR_r = FR_r,f1,FR_LocalMin 1053 nop.i 0 1054};; 1055{ .mfi 1056 nop.m 0 1057 // NR-iteration 1058(p10) fnma.s1 FR_Rcp2 = FR_Rcp1,FR_NormX,f1 1059 nop.i 0 1060};; 1061{ .mfi 1062 nop.m 0 1063 fms.s1 FR_r2 = FR_r,FR_r,f0 1064 nop.i 0 1065};; 1066{ .mfi 1067 nop.m 0 1068 fma.s1 FR_A7 = FR_A7,FR_r,FR_A6 1069 nop.i 0 1070} 1071{ .mfi 1072 nop.m 0 1073 fma.s1 FR_A5 = FR_A5,FR_r,FR_A4 1074 nop.i 0 1075};; 1076{ .mfi 1077 nop.m 0 1078 fma.s1 FR_A3 = FR_A3,FR_r,FR_A2 1079 nop.i 0 1080} 1081{ .mfi 1082 nop.m 0 1083 fma.s1 FR_A1 = FR_A1,FR_r,FR_A0 1084 nop.i 0 1085};; 1086{ .mfi 1087 nop.m 0 1088 // NR-iteration 1089(p10) fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1 1090 nop.i 0 1091};; 1092{ .mfi 1093 nop.m 0 1094 fma.s1 FR_A7 = FR_A7,FR_r2,FR_A5 1095 nop.i 0 1096} 1097{ .mfi 1098 nop.m 0 1099 fma.s1 FR_r4 = FR_r2,FR_r2,f0 1100 nop.i 0 1101};; 1102{ .mfi 1103 nop.m 0 1104 fma.s1 FR_A3 = FR_A3,FR_r2,FR_A1 1105 nop.i 0 1106};; 1107{ .mfi 1108 nop.m 0 1109(p10) fma.s1 FR_GAMMA = FR_A7,FR_r4,FR_A3 1110 nop.i 0 1111} 1112{ .mfi 1113 nop.m 0 1114(p11) fma.s.s0 f8 = FR_A7,FR_r4,FR_A3 1115 nop.i 0 1116};; 1117{ .mfb 1118 nop.m 0 1119(p10) fma.s.s0 f8 = FR_GAMMA,FR_Rcp2,f0 1120 br.ret.sptk b0 1121};; 1122 1123 1124// overflow 1125//-------------------------------------------------------------------- 1126.align 32 1127tgammaf_overflow_near0_bound: 1128.pred.rel "mutex",p14,p15 1129{ .mfi 1130 mov GR_fpsr = ar.fpsr 1131 nop.f 0 1132(p15) mov r8 = 0x7f8 1133} 1134{ .mfi 1135 nop.m 0 1136 nop.f 0 1137(p14) mov r8 = 0xff8 1138};; 1139{ .mfi 1140 nop.m 0 1141 nop.f 0 1142 shl r8 = r8,20 1143};; 1144{ .mfi 1145 sub r8 = r8,r0,1 1146 nop.f 0 1147 extr.u GR_fpsr = GR_fpsr,10,2 // rounding mode 1148};; 1149.pred.rel "mutex",p14,p15 1150{ .mfi 1151 // set p8 to 0 in case of overflow and to 1 otherwise 1152 // for negative arg: 1153 // no overflow if rounding mode either Z or +Inf, i.e. 1154 // GR_fpsr > 1 1155(p14) cmp.lt p8,p0 = 1,GR_fpsr 1156 nop.f 0 1157 // for positive arg: 1158 // no overflow if rounding mode either Z or -Inf, i.e. 1159 // (GR_fpsr & 1) == 0 1160(p15) tbit.z p0,p8 = GR_fpsr,0 1161};; 1162{ .mib 1163(p8) setf.s f8 = r8 // set result to 0x7f7fffff without 1164 // OVERFLOW flag raising 1165 nop.i 0 1166(p8) br.ret.sptk b0 1167};; 1168 1169.align 32 1170tgammaf_overflow: 1171{ .mfi 1172 nop.m 0 1173 nop.f 0 1174 mov r8 = 0x1FFFE 1175};; 1176{ .mfi 1177 setf.exp f9 = r8 1178 fmerge.s FR_X = f8,f8 1179 nop.i 0 1180};; 1181.pred.rel "mutex",p14,p15 1182{ .mfi 1183 nop.m 0 1184(p14) fnma.s.s0 f8 = f9,f9,f0 // set I,O and -INF result 1185 mov GR_TAG = 261 // overflow 1186} 1187{ .mfb 1188 nop.m 0 1189(p15) fma.s.s0 f8 = f9,f9,f0 // set I,O and +INF result 1190 br.cond.sptk tgammaf_libm_err 1191};; 1192 1193// x is negative integer or +/-0 1194//-------------------------------------------------------------------- 1195.align 32 1196tgammaf_singularity: 1197{ .mfi 1198 nop.m 0 1199 fmerge.s FR_X = f8,f8 1200 mov GR_TAG = 262 // negative 1201} 1202{ .mfb 1203 nop.m 0 1204 frcpa.s0 f8,p0 = f0,f0 1205 br.cond.sptk tgammaf_libm_err 1206};; 1207// x is negative noninteger with big absolute value 1208//-------------------------------------------------------------------- 1209.align 32 1210tgammaf_underflow: 1211{ .mfi 1212 mov r8 = 0x00001 1213 nop.f 0 1214 tbit.z p6,p7 = GR_Sig,0 1215};; 1216{ .mfi 1217 setf.exp f9 = r8 1218 nop.f 0 1219 nop.i 0 1220};; 1221.pred.rel "mutex",p6,p7 1222{ .mfi 1223 nop.m 0 1224(p6) fms.s.s0 f8 = f9,f9,f9 1225 nop.i 0 1226} 1227{ .mfb 1228 nop.m 0 1229(p7) fma.s.s0 f8 = f9,f9,f9 1230 br.ret.sptk b0 1231};; 1232 1233// x for natval, nan, +/-inf or +/-0 1234//-------------------------------------------------------------------- 1235.align 32 1236tgammaf_spec_args: 1237{ .mfi 1238 nop.m 0 1239 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf 1240 nop.i 0 1241};; 1242{ .mfi 1243 nop.m 0 1244 fclass.m p7,p8 = f8,0x7 // +/-0 1245 nop.i 0 1246};; 1247{ .mfi 1248 nop.m 0 1249 fmerge.s FR_X = f8,f8 1250 nop.i 0 1251} 1252{ .mfb 1253 nop.m 0 1254(p6) fma.s.s0 f8 = f8,f1,f8 1255(p6) br.ret.spnt b0 1256};; 1257.pred.rel "mutex",p7,p8 1258{ .mfi 1259(p7) mov GR_TAG = 262 // negative 1260(p7) frcpa.s0 f8,p0 = f1,f8 1261 nop.i 0 1262} 1263{ .mib 1264 nop.m 0 1265 nop.i 0 1266(p8) br.cond.spnt tgammaf_singularity 1267};; 1268 1269.align 32 1270tgammaf_libm_err: 1271{ .mfi 1272 alloc r32 = ar.pfs,1,4,4,0 1273 nop.f 0 1274 mov GR_Parameter_TAG = GR_TAG 1275};; 1276 1277GLOBAL_LIBM_END(tgammaf) 1278libm_alias_float_other (tgamma, tgamma) 1279 1280LOCAL_LIBM_ENTRY(__libm_error_region) 1281.prologue 1282{ .mfi 1283 add GR_Parameter_Y=-32,sp // Parameter 2 value 1284 nop.f 0 1285.save ar.pfs,GR_SAVE_PFS 1286 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 1287} 1288{ .mfi 1289.fframe 64 1290 add sp=-64,sp // Create new stack 1291 nop.f 0 1292 mov GR_SAVE_GP=gp // Save gp 1293};; 1294{ .mmi 1295 stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack 1296 add GR_Parameter_X = 16,sp // Parameter 1 address 1297.save b0, GR_SAVE_B0 1298 mov GR_SAVE_B0=b0 // Save b0 1299};; 1300.body 1301{ .mib 1302 stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack 1303 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 1304 nop.b 0 1305} 1306{ .mib 1307 stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack 1308 add GR_Parameter_Y = -16,GR_Parameter_Y 1309 br.call.sptk b0=__libm_error_support# // Call error handling function 1310};; 1311{ .mmi 1312 nop.m 0 1313 nop.m 0 1314 add GR_Parameter_RESULT = 48,sp 1315};; 1316{ .mmi 1317 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack 1318.restore sp 1319 add sp = 64,sp // Restore stack pointer 1320 mov b0 = GR_SAVE_B0 // Restore return address 1321};; 1322{ .mib 1323 mov gp = GR_SAVE_GP // Restore gp 1324 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 1325 br.ret.sptk b0 // Return 1326};; 1327 1328LOCAL_LIBM_END(__libm_error_region) 1329.type __libm_error_support#,@function 1330.global __libm_error_support# 1331