1.file "libm_lgamma.s"
2
3
4// Copyright (c) 2002 - 2005, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
25// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
28// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code,and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39//*********************************************************************
40//
41// History:
42// 01/10/02  Initial version
43// 01/25/02  Corrected error tag numbers
44// 02/04/02  Added support of SIGN(GAMMA(x)) calculation
45// 05/20/02  Cleaned up namespace and sf0 syntax
46// 09/15/02  Fixed bug on the branch lgamma_negrecursion
47// 10/21/02  Now it returns SIGN(GAMMA(x))=-1 for negative zero
48// 02/10/03  Reordered header: .section, .global, .proc, .align
49// 07/22/03  Reformatted some data tables
50// 03/31/05  Reformatted delimiters between data tables
51//
52//*********************************************************************
53//
54//*********************************************************************
55//
56// Function: __libm_lgamma(double x, int* signgam, int szsigngam)
57// computes the principle value of the logarithm of the GAMMA function
58// of x. Signum of GAMMA(x) is stored to memory starting at the address
59// specified by the signgam.
60//
61//*********************************************************************
62//
63// Resources Used:
64//
65//    Floating-Point Registers: f6-f15
66//                              f32-f122
67//
68//    General Purpose Registers:
69//      r8-r11
70//      r14-r31
71//      r32-r36
72//      r37-r40 (Used to pass arguments to error handling routine)
73//
74//    Predicate Registers:      p6-p15
75//
76//*********************************************************************
77//
78// IEEE Special Conditions:
79//
80//    __libm_lgamma(+inf) = +inf
81//    __libm_lgamma(-inf) = QNaN
82//    __libm_lgamma(+/-0) = +inf
83//    __libm_lgamma(x<0, x - integer) = +inf
84//    __libm_lgamma(SNaN) = QNaN
85//    __libm_lgamma(QNaN) = QNaN
86//
87//*********************************************************************
88//
89// Overview
90//
91// The method consists of three cases.
92//
93// If      512 <= x < OVERFLOW_BOUNDARY   use case lgamma_pstirling;
94// else if 1 < x < 512                    use case lgamma_regular;
95// else if -17 < x < 1                    use case lgamma_negrecursion;
96// else if -512 <  x < -17                use case lgamma_negpoly;
97// else if x < -512                       use case lgamma_negstirling;
98// else if x is close to negative
99//         roots of ln(GAMMA(x))          use case lgamma_negroots;
100//
101//
102// Case 512 <= x < OVERFLOW_BOUNDARY
103// ---------------------------------
104//   Here we use algorithm based on the Stirling formula:
105//   ln(GAMMA(x)) = ln(sqrt(2*Pi)) + (x-0.5)ln(x) - x + (W2 + W4/x^2)/x
106//
107// Case 1 < x < 512
108// ----------------
109//   To calculate GAMMA(x) on this interval we use polynomial approximation
110//   on following intervals [0.875; 1.25), [1.25; 1.75), [1.75, 2.25),
111//   [2.25; 4), [2^i; 2^(i+1)), i=2..8
112//
113//   Following variants of approximation and argument reduction are used:
114//    1. [0.875; 1.25)
115//       ln(GAMMA(x)) ~ (x-1.0)*P17(x-1.0)
116//
117//    2. [1.25; 1.75)
118//       ln(GAMMA(x)) ~ (x-LocalMinimun)*P17(x-LocalMinimun)
119//
120//    3. [1.75, 2.25)
121//       ln(GAMMA(x)) ~ (x-2.0)*P17(x-2.0)
122//
123//    4. [2.25; 4)
124//       ln(GAMMA(x)) ~ P22(x)
125//
126//    5. [2^i; 2^(i+1)), i=2..8
127//       ln(GAMMA(x)) ~ P22((x-2^i)/2^i)
128//
129// Case -17 < x < 1
130// ----------------
131//   Here we use the recursive formula:
132//   ln(GAMMA(x)) = ln(GAMMA(x+1)) - ln(x)
133//
134//   Using this formula we reduce argument to base interval [1.0; 2.0]
135//
136// Case -512 <  x < -17
137// --------------------
138//   Here we use the formula:
139//   ln(GAMMA(-x)) = ln(Pi/(x*GAMMA(x)*sin(Pi*x))) =
140//   = -ln(x) - ln((GAMMA(x)) - ln(sin(Pi*r)/(Pi*r)) - ln(|r|)
141//   where r = x - rounded_to_nearest(x), i.e |r| <= 0.5 and
142//   ln(sin(Pi*r)/(Pi*r)) is approximated by 14-degree polynomial of r^2
143//
144//
145// Case x < -512
146// -------------
147//   Here we use algorithm based on the Stirling formula:
148//   ln(GAMMA(-x)) = -ln(sqrt(2*Pi)) + (-x-0.5)ln(x) + x - (W2 + W4/x^2)/x -
149//   - ln(sin(Pi*r)/(Pi*r)) - ln(|r|)
150//   where r = x - rounded_to_nearest(x).
151//
152// Neighbourhoods of negative roots
153// --------------------------------
154//   Here we use polynomial approximation
155//   ln(GAMMA(x-x0)) = ln(GAMMA(x0)) + (x-x0)*P14(x-x0),
156//   where x0 is a root of ln(GAMMA(x)) rounded to nearest double
157//   precision number.
158//
159
160//*********************************************************************
161
162FR_X                   = f10
163FR_Y                   = f1 // __libm_lgamma is single argument function
164FR_RESULT              = f8
165
166FR_B11                 = f6
167FR_B10                 = f7
168
169FR_int_N               = f9
170FR_N                   = f10
171FR_P5                  = f11
172FR_P4                  = f12
173FR_P3                  = f13
174FR_P2                  = f14
175FR_NormX               = f15
176
177FR_Ln2                 = f32
178FR_C01                 = f33
179FR_A17                 = f33
180FR_C00                 = f34
181FR_Xp2                 = f34
182FR_A00                 = f34
183FR_A16                 = f34
184FR_C11                 = f35
185FR_A15                 = f35
186FR_C10                 = f36
187FR_Xp3                 = f36
188FR_A14                 = f36
189FR_B1                  = f36
190FR_C21                 = f37
191FR_A13                 = f37
192FR_PR01                = f37
193FR_C20                 = f38
194FR_Xp6                 = f38
195FR_A12                 = f38
196FR_C31                 = f39
197FR_Xp7                 = f39
198FR_B0                  = f39
199FR_A11                 = f39
200FR_C30                 = f40
201FR_Xp8                 = f40
202FR_A10                 = f40
203FR_PR00                = f40
204FR_C41                 = f41
205FR_Xp9                 = f41
206FR_A9                  = f41
207FR_PR11                = f41
208FR_C40                 = f42
209FR_A8                  = f42
210FR_C51                 = f43
211FR_Xp11                = f43
212FR_A7                  = f43
213FR_C50                 = f44
214FR_C                   = f44
215FR_Xp12                = f44
216FR_A6                  = f44
217FR_Xm2                 = f45
218FR_Xp13                = f45
219FR_A5                  = f45
220FR_PR10                = f45
221FR_C61                 = f46
222FR_Xp14                = f46
223FR_A4                  = f46
224FR_PR21                = f46
225FR_C60                 = f47
226FR_Xp15                = f47
227FR_A3                  = f47
228FR_PR20                = f47
229FR_C71                 = f48
230FR_Xp16                = f48
231FR_A2                  = f48
232FR_PR31                = f48
233FR_C70                 = f49
234FR_Xp17                = f49
235FR_A1                  = f49
236FR_PR30                = f49
237FR_C81                 = f50
238FR_B17                 = f50
239FR_A0                  = f50
240FR_C80                 = f51
241FR_B16                 = f51
242FR_C91                 = f52
243FR_B15                 = f52
244FR_C90                 = f53
245FR_B14                 = f53
246FR_CA1                 = f54
247FR_B13                 = f54
248FR_CA0                 = f55
249FR_B12                 = f55
250FR_CN                  = f56
251FR_Qlo                 = f56
252FR_PRN                 = f56
253FR_B7                  = f57
254FR_B6                  = f58
255FR_Qhi                 = f59
256FR_x                   = f60
257FR_x2                  = f61
258FR_TpNxLn2             = f62
259FR_W2                  = f63
260FR_x4                  = f64
261FR_r4                  = f64
262FR_x8                  = f65
263FR_r8                  = f65
264FR_r05                 = f66
265FR_Xm05                = f66
266FR_B5                  = f66
267FR_LnSqrt2Pi           = f67
268FR_B4                  = f67
269FR_InvX                = f68
270FR_B3                  = f68
271FR_InvX2               = f69
272FR_B2                  = f69
273FR_W4                  = f70
274FR_OvfBound            = f71
275FR_05                  = f72
276FR_LocalMin            = f73
277FR_tmp                 = f73
278FR_LnX                 = f74
279FR_Xf                  = f75
280FR_InvXf               = f76
281FR_rf                  = f77
282FR_rf2                 = f78
283FR_P54f                = f79
284FR_P32f                = f80
285FR_rf3                 = f81
286FR_P10f                = f82
287FR_TpNxLn2f            = f83
288FR_Nf                  = f84
289FR_LnXf                = f85
290FR_int_Nf              = f86
291FR_Tf                  = f87
292FR_Xf2                 = f88
293FR_Xp10                = f89
294FR_w3                  = f90
295FR_S28                 = f90
296FR_w2                  = f91
297FR_S26                 = f91
298FR_w6                  = f92
299FR_S24                 = f92
300FR_w4                  = f93
301FR_S22                 = f93
302FR_w                   = f94
303FR_S20                 = f94
304FR_Q8                  = f95
305FR_S18                 = f95
306FR_Q7                  = f96
307FR_S16                 = f96
308FR_Q4                  = f97
309FR_S14                 = f97
310FR_Q3                  = f98
311FR_S12                 = f98
312FR_Q6                  = f99
313FR_S10                 = f99
314FR_Q5                  = f100
315FR_S8                  = f100
316FR_Q2                  = f101
317FR_S6                  = f101
318FR_Root                = f101
319FR_S4                  = f102
320FR_Q1                  = f102
321FR_S2                  = f103
322FR_Xp1                 = f104
323FR_Xf4                 = f105
324FR_Xf8                 = f106
325FR_Xfr                 = f107
326FR_Xf6                 = f108
327FR_Ntrunc              = f109
328FR_B9                  = f110
329FR_2                   = f110
330FR_B8                  = f111
331FR_3                   = f111
332FR_5                   = f112
333FR_Xp4                 = f113
334FR_Xp5                 = f114
335FR_P54                 = f115
336FR_P32                 = f116
337FR_P10                 = f117
338FR_r                   = f118
339FR_r2                  = f119
340FR_r3                  = f120
341FR_T                   = f121
342FR_int_Ntrunc          = f122
343
344//===================================
345
346GR_TAG                 = r8
347GR_ExpMask             = r8
348GR_ExpBias             = r9
349GR_ad_Roots            = r9
350GR_Expf                = r10
351GR_Arg                 = r10
352GR_SignExp             = r11
353GR_ArgXfr              = r11
354
355GR_Exp                 = r14
356GR_Arg125              = r14
357GR_RootInd             = r14
358GR_ArgAsIs             = r15
359GR_Arg175              = r15
360GR_Sig                 = r16
361GR_Ind                 = r17
362GR_ad_Dx               = r17
363GR_ad_1                = r18
364GR_SignExp_w           = r19
365GR_2_25                = r19
366GR_Arg025              = r19
367GR_Arg15               = r19
368GR_Arg17               = r19
369GR_Exp_w               = r19//21
370GR_ad_2                = r20
371GR_2xDx                = r21
372GR_SignOfGamma         = r21
373GR_fff9                = r22
374GR_Offs                = r22
375GR_ad_Co7              = r23
376GR_Arg075              = r23
377GR_Arg0875             = r23
378GR_ad_T                = r24
379GR_ad_Root             = r24
380GR_Ind                 = r24
381GR_ad_Co               = r25
382GR_ad_Ce               = r26
383GR_ad_Ce7              = r27
384GR_Arg05               = r27
385GR_Offs7               = r28
386GR_ArgXfrAsIs          = r28
387GR_ExpOf2              = r29
388GR_ad_LnT              = r29
389GR_Dx                  = r29
390GR_ExpOf256            = r30
391GR_0x30033             = r30
392GR_Root                = r30
393GR_PseudoRoot          = r30
394GR_ad_Data             = r31
395GR_ad_SignGam          = r31
396
397
398GR_SAVE_B0             = r33
399GR_SAVE_PFS            = r34
400GR_SAVE_GP             = r35
401GR_SAVE_SP             = r36
402
403GR_Parameter_X         = r37
404GR_Parameter_Y         = r38
405GR_Parameter_RESULT    = r39
406GR_Parameter_TAG       = r40
407
408
409
410// Data tables
411//==============================================================
412
413RODATA
414.align 16
415LOCAL_OBJECT_START(lgamma_data)
416// polynomial approximation of ln(GAMMA(x)), 2.25 <= x < 512
417// [2.25; 4)
418data8 0xF888E8D7892718A2,0xC001 // C01
419data8 0xF62F273BA12A4639,0x3FFD // C11
420data8 0xA93AC50A37EC8D38,0xBFFC // C21
421data8 0xB4CC43D2C161E057,0xBFFF // C31
422data8 0xC6AC672F0C1392C7,0xC000 // C41
423data8 0xA292B9AE3276942E,0xC001 // C51
424data8 0xE554E4CCCA6C7B7B,0xC001 // C61
425data8 0x92F0F55FBC87F860,0xC002 // C71
426data8 0xAF60D0112843F6C1,0xC002 // C81
427data8 0xC5956500FA3D92E7,0xC002 // C91
428data8 0xD3B22CCBD8587750,0xC002 // CA1
429data8 0xD888B6CF34159B54,0x4001 // C00
430data8 0xBCB79C8329FD9F44,0x3FFE // C10
431data8 0xCB8896FAD69C455D,0x4000 // C20
432data8 0xE510A424639EBF5E,0x4001 // C30
433data8 0xC65ED41B097486B3,0x4002 // C40
434// [4; 8)
435data8 0x9F1F3C822D03080E,0xC001 // C01
436data8 0x941CACFA9C0FA8A6,0xC001 // C11
437data8 0xFE34336391D99CB7,0xC000 // C21
438data8 0xC40BAEAA165F81A1,0xC000 // C31
439data8 0xFE3AE166E9B4DE8F,0xBFFF // C41
440data8 0xD744F91AF7DAF873,0xBFFE // C51
441data8 0x87871851E9C32D02,0x3FFD // C61
442data8 0x9C93C03C502E808F,0x3FFF // C71
443data8 0xF78BED07501D6A8E,0x3FFF // C81
444data8 0x92FE41BA8BEADF70,0x4000 // C91
445data8 0xA021878E1903A2C6,0x3FFF // CA1
446data8 0xC85EFAC379FAFEE2,0x4001 // C00
447data8 0xC10D7AAB7CEC7FF2,0x4001 // C10
448data8 0xB3537BDF603E454C,0x4001 // C20
449data8 0xA0D44E3D5BBE44C4,0x4001 // C30
450data8 0x8B9C229B6241E7B3,0x4001 // C40
451// [8; 16)
452data8 0xD16AB33AEC220DF6,0x3FFF // C01
453data8 0x987483646E150BCD,0x4000 // C11
454data8 0x80C10A24C863999B,0x4000 // C21
455data8 0xA39A8EB6F8AACE75,0x3FFF // C31
456data8 0x93E04A1379BEC764,0x3FFD // C41
457data8 0xD9F59C4BD3A69BD1,0xBFFE // C51
458data8 0x82094EC891179B1A,0xC000 // C61
459data8 0xC90CFE3A24F70659,0xC000 // C71
460data8 0x827984EA7C155184,0xC001 // C81
461data8 0x981BFDF79D1E0D80,0xC001 // C91
462data8 0xA37209A8B97D230D,0xC001 // CA1
463data8 0xAA1989737D6BA66D,0x3FFE // C00
464data8 0xDBC013A351630AF8,0x3FFF // C10
465data8 0x8B8D47698299389D,0x4000 // C20
466data8 0xACCDD1315DE06EB0,0x4000 // C30
467data8 0xD3414A5AC81BBB2D,0x4000 // C40
468// [16; 32)
469data8 0xECB2B0BE75C5F995,0x3FFF // C01
470data8 0x9DD28BD6DBC96500,0x4000 // C11
471data8 0x8521431B99C6244F,0x4000 // C21
472data8 0xA95F92612B8413C3,0x3FFF // C31
473data8 0x9C76E643B22D9544,0x3FFD // C41
474data8 0xDD90EA99417C8038,0xBFFE // C51
475data8 0x84EA6B6D32E5F906,0xC000 // C61
476data8 0xCDBFE499E05AA622,0xC000 // C71
477data8 0x8594A7DE35427100,0xC001 // C81
478data8 0x9BC1CB2C10DC702F,0xC001 // C91
479data8 0xA7602268762666B0,0xC001 // CA1
480data8 0xDA082BCC6BDB8F7B,0x3FFE // C00
481data8 0xEEBFE1C99322B85E,0x3FFF // C10
482data8 0x96FED4C785361946,0x4000 // C20
483data8 0xB9E3A7207C16B2FE,0x4000 // C30
484data8 0xE1E8170CED48E2C7,0x4000 // C40
485// [32; 64)
486data8 0xFD481EB9AEDD53E7,0x3FFF // C01
487data8 0xA216FB66AC8C53E1,0x4000 // C11
488data8 0x885FF935787553BA,0x4000 // C21
489data8 0xAD471CD89A313327,0x3FFF // C31
490data8 0x9FF13FBA139D21E0,0x3FFD // C41
491data8 0xE25E1663A6EE0266,0xBFFE // C51
492data8 0x87BE51DD5D262FA2,0xC000 // C61
493data8 0xD211A9D4CCE55696,0xC000 // C71
494data8 0x885BEFC29FDED3C9,0xC001 // C81
495data8 0x9EFA48E6367A67F6,0xC001 // C91
496data8 0xAAD3978FC0791297,0xC001 // CA1
497data8 0xF96D210DF37A0AEA,0x3FFE // C00
498data8 0xFE11DC6783917C82,0x3FFF // C10
499data8 0x9FFCD928291B7DDE,0x4000 // C20
500data8 0xC4518F4A80E09AE1,0x4000 // C30
501data8 0xEDDFE9E0FD297C63,0x4000 // C40
502// [64; 128)
503data8 0x840E2E62609B0AD3,0x4000 // C01
504data8 0xA5275A0DD0D3DDF8,0x4000 // C11
505data8 0x8AADC6ABFC441731,0x4000 // C21
506data8 0xB041C6696BE90E50,0x3FFF // C31
507data8 0xA4A8C9153F4B037E,0x3FFD // C41
508data8 0xE3C6A461A7B86736,0xBFFE // C51
509data8 0x89047681C6DE7673,0xC000 // C61
510data8 0xD42DF77A480092DF,0xC000 // C71
511data8 0x89C25D17F086FB20,0xC001 // C81
512data8 0xA09F907D02E34EC7,0xC001 // C91
513data8 0xAC998A9CB79805B7,0xC001 // CA1
514data8 0x875CC9B69AE964CC,0x3FFF // C00
515data8 0x847836BA85DD4C12,0x4000 // C10
516data8 0xA5F3CB2B32E74936,0x4000 // C20
517data8 0xCAE2197C96CB5A0F,0x4000 // C30
518data8 0xF50F7EB60DE5CD09,0x4000 // C40
519// [128; 256)
520data8 0x87D9065DD1876926,0x4000 // C01
521data8 0xA781C28FDAD7CC25,0x4000 // C11
522data8 0x8C6A4FCE35A7EC8D,0x4000 // C21
523data8 0xB27BA081728354F9,0x3FFF // C31
524data8 0xA82FEA7124B0EB2B,0x3FFD // C41
525data8 0xE4C996E42ECBF77A,0xBFFE // C51
526data8 0x89F1A92C84FA538F,0xC000 // C61
527data8 0xD5B6CFF7DB7F6070,0xC000 // C71
528data8 0x8AC6B561FAE38B66,0xC001 // C81
529data8 0xA1D1505C438D8F46,0xC001 // C91
530data8 0xADE2DC1C924FEC81,0xC001 // CA1
531data8 0x8EF6CC62A7E0EB5A,0x3FFF // C00
532data8 0x88A2FFC0ABCB00C0,0x4000 // C10
533data8 0xAA6EA8FCB75B065B,0x4000 // C20
534data8 0xCFC4B82B3D5C9363,0x4000 // C30
535data8 0xFA60FD85DE861771,0x4000 // C40
536// [256; 512)
537data8 0x8AAA7CE4ED5C1EFD,0x4000 // C01
538data8 0xA9679234FB56F1E1,0x4000 // C11
539data8 0x8DCE02287789D841,0x4000 // C21
540data8 0xB44328EF30A8DE7E,0x3FFF // C31
541data8 0xAB0DC564BFA1AB12,0x3FFD // C41
542data8 0xE5882B16FCF2D3CB,0xBFFE // C51
543data8 0x8AA7F48993006A86,0xC000 // C61
544data8 0xD6E63752D192750D,0xC000 // C71
545data8 0x8B90080B17853295,0xC001 // C81
546data8 0xA2BDD4253128D1AB,0xC001 // C91
547data8 0xAEE1A042F96B8121,0xC001 // CA1
548data8 0x94A9C37A42E43BA7,0x3FFF // C00
549data8 0x8BFA54E703878F5A,0x4000 // C10
550data8 0xADFA426DDF14647B,0x4000 // C20
551data8 0xD39C7F7B3958EAF0,0x4000 // C30
552data8 0xFE8C3987853C01E3,0x4000 // C40
553//
554// [2.25; 4)
555data8 0x943AF77763601441,0x4003 // C50
556data8 0xC8A93F9ECB06E891,0x4003 // C60
557data8 0xFC2E5A4AD33DE19D,0x4003 // C70
558data8 0x9526B75B38670119,0x4004 // C80
559data8 0xA7675879D68B587E,0x4004 // C90
560data8 0xB31DFA672D7FB8C0,0x4004 // CA0
561data8 0x83A27775D86F9A81,0xBFD7 // CN
562// [4; 8)
563data8 0xEB8049BA5E79ADA3,0x4000 // C50
564data8 0xC20C95EA99037228,0x4000 // C60
565data8 0x9D4A8C864053CEB8,0x4000 // C70
566data8 0xFC7716544AB0C5C9,0x3FFF // C80
567data8 0xC7EB985259EABA5F,0x3FFF // C90
568data8 0xC042FB3B4C95096D,0x3FFD // CA0
569data8 0xCC2A7F930856177B,0x3FEE // CN
570// [8; 16)
571data8 0xFE1903679D078C7A,0x4000 // C50
572data8 0x957C221AB90171F1,0x4001 // C60
573data8 0xAB2C53B2A78F4031,0x4001 // C70
574data8 0xBE080AE6063AE387,0x4001 // C80
575data8 0xCC019A0311605CB9,0x4001 // C90
576data8 0xD3739D85A12C8ADF,0x4001 // CA0
577data8 0x81FA4D2B7BD7A82D,0x3FEF // CN
578// [16; 32)
579data8 0x871F69E2DD221F02,0x4001 // C50
580data8 0x9E3EF2D477442A9C,0x4001 // C60
581data8 0xB48733582B3C82C5,0x4001 // C70
582data8 0xC7DB9B3C25854A2A,0x4001 // C80
583data8 0xD628B87975BE898F,0x4001 // C90
584data8 0xDDC569C321FF119C,0x4001 // CA0
585data8 0xB27B65560DF7ADA7,0x3FEF // CN
586// [32; 64)
587data8 0x8DE4127349719B22,0x4001 // C50
588data8 0xA5C30A7760F5FBB2,0x4001 // C60
589data8 0xBCB4096055AA2A4E,0x4001 // C70
590data8 0xD08F5F2FB4E7B899,0x4001 // C80
591data8 0xDF39ED39DC91F9CF,0x4001 // C90
592data8 0xE7063E45322F072E,0x4001 // CA0
593data8 0x85A9E11DDDDE67C8,0x3FF0 // CN
594// [64; 128)
595data8 0x91CA191EB80E8893,0x4001 // C50
596data8 0xA9F1D5A55397334A,0x4001 // C60
597data8 0xC1222710295094E3,0x4001 // C70
598data8 0xD52FFABBA6CBE5C6,0x4001 // C80
599data8 0xE3FD9D5282052E1D,0x4001 // C90
600data8 0xEBDBE47BB662F3EF,0x4001 // CA0
601data8 0xEF889F489D88FD31,0x3FF0 // CN
602// [128; 256)
603data8 0x94AA029C2286F8D2,0x4001 // C50
604data8 0xAD0549E55A72389F,0x4001 // C60
605data8 0xC4628899DAF94BA4,0x4001 // C70
606data8 0xD89432A4161C72CB,0x4001 // C80
607data8 0xE77ABA75E9C38F3A,0x4001 // C90
608data8 0xEF65BFFFF71347FF,0x4001 // CA0
609data8 0xE2627460064D918D,0x3FF1 // CN
610// [256; 512)
611data8 0x96E9890D722C2FC1,0x4001 // C50
612data8 0xAF6C2236F6A1CEC4,0x4001 // C60
613data8 0xC6EBB8C9F987D20D,0x4001 // C70
614data8 0xDB38CEFD5EF328CC,0x4001 // C80
615data8 0xEA3265DC66C9A0B4,0x4001 // C90
616data8 0xF2272D6B368C70B1,0x4001 // CA0
617data8 0xDBFF93ECEBCEF1F3,0x3FF2 // CN
618//
619data8 0x3FDD8B618D5AF8FE // point of local minimum on [1;2]
620data8 0x3FE0000000000000 // 0.5
621data8 0xBFC5555DA7212371 // P5
622data8 0x3FC999A19EEF5826 // P4
623data8 0xb17217f7d1cf79ac,0x3ffe // ln(2)
624data8 0xEB3F8E4325F5A535,0x3FFE // ln(sqrt(4*arcsin(1)))
625//
626data8 0xBFCFFFFFFFFEF009 // P3
627data8 0x3FD555555554ECB2 // P2
628data8 0xBF66C16C16C16C17 // W4=B4/12=-1/360
629data8 0x7F5754D9278B51A8 // overflow boundary (first inf result)
630data8 0xAAAAAAAAAAAAAAAB,0x3FFB // W2=B2/2=1/12
631//
632data8 0x3FBC756AC654273B // Q8
633data8 0xBFC001A42489AB4D // Q7
634data8 0x3FC99999999A169B // Q4
635data8 0xBFD00000000019AC // Q3
636data8 0x3FC2492479AA0DF8 // Q6
637data8 0xBFC5555544986F52 // Q5
638data8 0x3FD5555555555555 // Q2
639data8 0xBFE0000000000000 // Q1, P1 = -0.5
640//
641data8 0x80200aaeac44ef38,0x3ff6 // ln(1/frcpa(1+  0/2^-8))
642data8 0xc09090a2c35aa070,0x3ff7 // ln(1/frcpa(1+  1/2^-8))
643data8 0xa0c94fcb41977c75,0x3ff8 // ln(1/frcpa(1+  2/2^-8))
644data8 0xe18b9c263af83301,0x3ff8 // ln(1/frcpa(1+  3/2^-8))
645data8 0x8d35c8d6399c30ea,0x3ff9 // ln(1/frcpa(1+  4/2^-8))
646data8 0xadd4d2ecd601cbb8,0x3ff9 // ln(1/frcpa(1+  5/2^-8))
647data8 0xce95403a192f9f01,0x3ff9 // ln(1/frcpa(1+  6/2^-8))
648data8 0xeb59392cbcc01096,0x3ff9 // ln(1/frcpa(1+  7/2^-8))
649data8 0x862c7d0cefd54c5d,0x3ffa // ln(1/frcpa(1+  8/2^-8))
650data8 0x94aa63c65e70d499,0x3ffa // ln(1/frcpa(1+  9/2^-8))
651data8 0xa54a696d4b62b382,0x3ffa // ln(1/frcpa(1+ 10/2^-8))
652data8 0xb3e4a796a5dac208,0x3ffa // ln(1/frcpa(1+ 11/2^-8))
653data8 0xc28c45b1878340a9,0x3ffa // ln(1/frcpa(1+ 12/2^-8))
654data8 0xd35c55f39d7a6235,0x3ffa // ln(1/frcpa(1+ 13/2^-8))
655data8 0xe220f037b954f1f5,0x3ffa // ln(1/frcpa(1+ 14/2^-8))
656data8 0xf0f3389b036834f3,0x3ffa // ln(1/frcpa(1+ 15/2^-8))
657data8 0xffd3488d5c980465,0x3ffa // ln(1/frcpa(1+ 16/2^-8))
658data8 0x87609ce2ed300490,0x3ffb // ln(1/frcpa(1+ 17/2^-8))
659data8 0x8ede9321e8c85927,0x3ffb // ln(1/frcpa(1+ 18/2^-8))
660data8 0x96639427f2f8e2f4,0x3ffb // ln(1/frcpa(1+ 19/2^-8))
661data8 0x9defad3e8f73217b,0x3ffb // ln(1/frcpa(1+ 20/2^-8))
662data8 0xa582ebd50097029c,0x3ffb // ln(1/frcpa(1+ 21/2^-8))
663data8 0xac06dbe75ab80fee,0x3ffb // ln(1/frcpa(1+ 22/2^-8))
664data8 0xb3a78449b2d3ccca,0x3ffb // ln(1/frcpa(1+ 23/2^-8))
665data8 0xbb4f79635ab46bb2,0x3ffb // ln(1/frcpa(1+ 24/2^-8))
666data8 0xc2fec93a83523f3f,0x3ffb // ln(1/frcpa(1+ 25/2^-8))
667data8 0xc99af2eaca4c4571,0x3ffb // ln(1/frcpa(1+ 26/2^-8))
668data8 0xd1581106472fa653,0x3ffb // ln(1/frcpa(1+ 27/2^-8))
669data8 0xd8002560d4355f2e,0x3ffb // ln(1/frcpa(1+ 28/2^-8))
670data8 0xdfcb43b4fe508632,0x3ffb // ln(1/frcpa(1+ 29/2^-8))
671data8 0xe67f6dff709d4119,0x3ffb // ln(1/frcpa(1+ 30/2^-8))
672data8 0xed393b1c22351280,0x3ffb // ln(1/frcpa(1+ 31/2^-8))
673data8 0xf5192bff087bcc35,0x3ffb // ln(1/frcpa(1+ 32/2^-8))
674data8 0xfbdf4ff6dfef2fa3,0x3ffb // ln(1/frcpa(1+ 33/2^-8))
675data8 0x81559a97f92f9cc7,0x3ffc // ln(1/frcpa(1+ 34/2^-8))
676data8 0x84be72bce90266e8,0x3ffc // ln(1/frcpa(1+ 35/2^-8))
677data8 0x88bc74113f23def2,0x3ffc // ln(1/frcpa(1+ 36/2^-8))
678data8 0x8c2ba3edf6799d11,0x3ffc // ln(1/frcpa(1+ 37/2^-8))
679data8 0x8f9dc92f92ea08b1,0x3ffc // ln(1/frcpa(1+ 38/2^-8))
680data8 0x9312e8f36efab5a7,0x3ffc // ln(1/frcpa(1+ 39/2^-8))
681data8 0x968b08643409ceb6,0x3ffc // ln(1/frcpa(1+ 40/2^-8))
682data8 0x9a062cba08a1708c,0x3ffc // ln(1/frcpa(1+ 41/2^-8))
683data8 0x9d845b3abf95485c,0x3ffc // ln(1/frcpa(1+ 42/2^-8))
684data8 0xa06fd841bc001bb4,0x3ffc // ln(1/frcpa(1+ 43/2^-8))
685data8 0xa3f3a74652fbe0db,0x3ffc // ln(1/frcpa(1+ 44/2^-8))
686data8 0xa77a8fb2336f20f5,0x3ffc // ln(1/frcpa(1+ 45/2^-8))
687data8 0xab0497015d28b0a0,0x3ffc // ln(1/frcpa(1+ 46/2^-8))
688data8 0xae91c2be6ba6a615,0x3ffc // ln(1/frcpa(1+ 47/2^-8))
689data8 0xb189d1b99aebb20b,0x3ffc // ln(1/frcpa(1+ 48/2^-8))
690data8 0xb51cced5de9c1b2c,0x3ffc // ln(1/frcpa(1+ 49/2^-8))
691data8 0xb819bee9e720d42f,0x3ffc // ln(1/frcpa(1+ 50/2^-8))
692data8 0xbbb2a0947b093a5d,0x3ffc // ln(1/frcpa(1+ 51/2^-8))
693data8 0xbf4ec1505811684a,0x3ffc // ln(1/frcpa(1+ 52/2^-8))
694data8 0xc2535bacfa8975ff,0x3ffc // ln(1/frcpa(1+ 53/2^-8))
695data8 0xc55a3eafad187eb8,0x3ffc // ln(1/frcpa(1+ 54/2^-8))
696data8 0xc8ff2484b2c0da74,0x3ffc // ln(1/frcpa(1+ 55/2^-8))
697data8 0xcc0b1a008d53ab76,0x3ffc // ln(1/frcpa(1+ 56/2^-8))
698data8 0xcfb6203844b3209b,0x3ffc // ln(1/frcpa(1+ 57/2^-8))
699data8 0xd2c73949a47a19f5,0x3ffc // ln(1/frcpa(1+ 58/2^-8))
700data8 0xd5daae18b49d6695,0x3ffc // ln(1/frcpa(1+ 59/2^-8))
701data8 0xd8f08248cf7e8019,0x3ffc // ln(1/frcpa(1+ 60/2^-8))
702data8 0xdca7749f1b3e540e,0x3ffc // ln(1/frcpa(1+ 61/2^-8))
703data8 0xdfc28e033aaaf7c7,0x3ffc // ln(1/frcpa(1+ 62/2^-8))
704data8 0xe2e012a5f91d2f55,0x3ffc // ln(1/frcpa(1+ 63/2^-8))
705data8 0xe600064ed9e292a8,0x3ffc // ln(1/frcpa(1+ 64/2^-8))
706data8 0xe9226cce42b39f60,0x3ffc // ln(1/frcpa(1+ 65/2^-8))
707data8 0xec4749fd97a28360,0x3ffc // ln(1/frcpa(1+ 66/2^-8))
708data8 0xef6ea1bf57780495,0x3ffc // ln(1/frcpa(1+ 67/2^-8))
709data8 0xf29877ff38809091,0x3ffc // ln(1/frcpa(1+ 68/2^-8))
710data8 0xf5c4d0b245cb89be,0x3ffc // ln(1/frcpa(1+ 69/2^-8))
711data8 0xf8f3afd6fcdef3aa,0x3ffc // ln(1/frcpa(1+ 70/2^-8))
712data8 0xfc2519756be1abc7,0x3ffc // ln(1/frcpa(1+ 71/2^-8))
713data8 0xff59119f503e6832,0x3ffc // ln(1/frcpa(1+ 72/2^-8))
714data8 0x8147ce381ae0e146,0x3ffd // ln(1/frcpa(1+ 73/2^-8))
715data8 0x82e45f06cb1ad0f2,0x3ffd // ln(1/frcpa(1+ 74/2^-8))
716data8 0x842f5c7c573cbaa2,0x3ffd // ln(1/frcpa(1+ 75/2^-8))
717data8 0x85ce471968c8893a,0x3ffd // ln(1/frcpa(1+ 76/2^-8))
718data8 0x876e8305bc04066d,0x3ffd // ln(1/frcpa(1+ 77/2^-8))
719data8 0x891012678031fbb3,0x3ffd // ln(1/frcpa(1+ 78/2^-8))
720data8 0x8a5f1493d766a05f,0x3ffd // ln(1/frcpa(1+ 79/2^-8))
721data8 0x8c030c778c56fa00,0x3ffd // ln(1/frcpa(1+ 80/2^-8))
722data8 0x8da85df17e31d9ae,0x3ffd // ln(1/frcpa(1+ 81/2^-8))
723data8 0x8efa663e7921687e,0x3ffd // ln(1/frcpa(1+ 82/2^-8))
724data8 0x90a22b6875c6a1f8,0x3ffd // ln(1/frcpa(1+ 83/2^-8))
725data8 0x91f62cc8f5d24837,0x3ffd // ln(1/frcpa(1+ 84/2^-8))
726data8 0x93a06cfc3857d980,0x3ffd // ln(1/frcpa(1+ 85/2^-8))
727data8 0x94f66d5e6fd01ced,0x3ffd // ln(1/frcpa(1+ 86/2^-8))
728data8 0x96a330156e6772f2,0x3ffd // ln(1/frcpa(1+ 87/2^-8))
729data8 0x97fb3582754ea25b,0x3ffd // ln(1/frcpa(1+ 88/2^-8))
730data8 0x99aa8259aad1bbf2,0x3ffd // ln(1/frcpa(1+ 89/2^-8))
731data8 0x9b0492f6227ae4a8,0x3ffd // ln(1/frcpa(1+ 90/2^-8))
732data8 0x9c5f8e199bf3a7a5,0x3ffd // ln(1/frcpa(1+ 91/2^-8))
733data8 0x9e1293b9998c1daa,0x3ffd // ln(1/frcpa(1+ 92/2^-8))
734data8 0x9f6fa31e0b41f308,0x3ffd // ln(1/frcpa(1+ 93/2^-8))
735data8 0xa0cda11eaf46390e,0x3ffd // ln(1/frcpa(1+ 94/2^-8))
736data8 0xa22c8f029cfa45aa,0x3ffd // ln(1/frcpa(1+ 95/2^-8))
737data8 0xa3e48badb7856b34,0x3ffd // ln(1/frcpa(1+ 96/2^-8))
738data8 0xa5459a0aa95849f9,0x3ffd // ln(1/frcpa(1+ 97/2^-8))
739data8 0xa6a79c84480cfebd,0x3ffd // ln(1/frcpa(1+ 98/2^-8))
740data8 0xa80a946d0fcb3eb2,0x3ffd // ln(1/frcpa(1+ 99/2^-8))
741data8 0xa96e831a3ea7b314,0x3ffd // ln(1/frcpa(1+100/2^-8))
742data8 0xaad369e3dc544e3b,0x3ffd // ln(1/frcpa(1+101/2^-8))
743data8 0xac92e9588952c815,0x3ffd // ln(1/frcpa(1+102/2^-8))
744data8 0xadfa035aa1ed8fdc,0x3ffd // ln(1/frcpa(1+103/2^-8))
745data8 0xaf6219eae1ad6e34,0x3ffd // ln(1/frcpa(1+104/2^-8))
746data8 0xb0cb2e6d8160f753,0x3ffd // ln(1/frcpa(1+105/2^-8))
747data8 0xb2354249ad950f72,0x3ffd // ln(1/frcpa(1+106/2^-8))
748data8 0xb3a056e98ef4a3b4,0x3ffd // ln(1/frcpa(1+107/2^-8))
749data8 0xb50c6dba52c6292a,0x3ffd // ln(1/frcpa(1+108/2^-8))
750data8 0xb679882c33876165,0x3ffd // ln(1/frcpa(1+109/2^-8))
751data8 0xb78c07429785cedc,0x3ffd // ln(1/frcpa(1+110/2^-8))
752data8 0xb8faeb8dc4a77d24,0x3ffd // ln(1/frcpa(1+111/2^-8))
753data8 0xba6ad77eb36ae0d6,0x3ffd // ln(1/frcpa(1+112/2^-8))
754data8 0xbbdbcc915e9bee50,0x3ffd // ln(1/frcpa(1+113/2^-8))
755data8 0xbd4dcc44f8cf12ef,0x3ffd // ln(1/frcpa(1+114/2^-8))
756data8 0xbec0d81bf5b531fa,0x3ffd // ln(1/frcpa(1+115/2^-8))
757data8 0xc034f19c139186f4,0x3ffd // ln(1/frcpa(1+116/2^-8))
758data8 0xc14cb69f7c5e55ab,0x3ffd // ln(1/frcpa(1+117/2^-8))
759data8 0xc2c2abbb6e5fd56f,0x3ffd // ln(1/frcpa(1+118/2^-8))
760data8 0xc439b2c193e6771e,0x3ffd // ln(1/frcpa(1+119/2^-8))
761data8 0xc553acb9d5c67733,0x3ffd // ln(1/frcpa(1+120/2^-8))
762data8 0xc6cc96e441272441,0x3ffd // ln(1/frcpa(1+121/2^-8))
763data8 0xc8469753eca88c30,0x3ffd // ln(1/frcpa(1+122/2^-8))
764data8 0xc962cf3ce072b05c,0x3ffd // ln(1/frcpa(1+123/2^-8))
765data8 0xcadeba8771f694aa,0x3ffd // ln(1/frcpa(1+124/2^-8))
766data8 0xcc5bc08d1f72da94,0x3ffd // ln(1/frcpa(1+125/2^-8))
767data8 0xcd7a3f99ea035c29,0x3ffd // ln(1/frcpa(1+126/2^-8))
768data8 0xcef93860c8a53c35,0x3ffd // ln(1/frcpa(1+127/2^-8))
769data8 0xd0192f68a7ed23df,0x3ffd // ln(1/frcpa(1+128/2^-8))
770data8 0xd19a201127d3c645,0x3ffd // ln(1/frcpa(1+129/2^-8))
771data8 0xd2bb92f4061c172c,0x3ffd // ln(1/frcpa(1+130/2^-8))
772data8 0xd43e80b2ee8cc8fc,0x3ffd // ln(1/frcpa(1+131/2^-8))
773data8 0xd56173601fc4ade4,0x3ffd // ln(1/frcpa(1+132/2^-8))
774data8 0xd6e6637efb54086f,0x3ffd // ln(1/frcpa(1+133/2^-8))
775data8 0xd80ad9f58f3c8193,0x3ffd // ln(1/frcpa(1+134/2^-8))
776data8 0xd991d1d31aca41f8,0x3ffd // ln(1/frcpa(1+135/2^-8))
777data8 0xdab7d02231484a93,0x3ffd // ln(1/frcpa(1+136/2^-8))
778data8 0xdc40d532cde49a54,0x3ffd // ln(1/frcpa(1+137/2^-8))
779data8 0xdd685f79ed8b265e,0x3ffd // ln(1/frcpa(1+138/2^-8))
780data8 0xde9094bbc0e17b1d,0x3ffd // ln(1/frcpa(1+139/2^-8))
781data8 0xe01c91b78440c425,0x3ffd // ln(1/frcpa(1+140/2^-8))
782data8 0xe14658f26997e729,0x3ffd // ln(1/frcpa(1+141/2^-8))
783data8 0xe270cdc2391e0d23,0x3ffd // ln(1/frcpa(1+142/2^-8))
784data8 0xe3ffce3a2aa64922,0x3ffd // ln(1/frcpa(1+143/2^-8))
785data8 0xe52bdb274ed82887,0x3ffd // ln(1/frcpa(1+144/2^-8))
786data8 0xe6589852e75d7df6,0x3ffd // ln(1/frcpa(1+145/2^-8))
787data8 0xe786068c79937a7d,0x3ffd // ln(1/frcpa(1+146/2^-8))
788data8 0xe91903adad100911,0x3ffd // ln(1/frcpa(1+147/2^-8))
789data8 0xea481236f7d35bb0,0x3ffd // ln(1/frcpa(1+148/2^-8))
790data8 0xeb77d48c692e6b14,0x3ffd // ln(1/frcpa(1+149/2^-8))
791data8 0xeca84b83d7297b87,0x3ffd // ln(1/frcpa(1+150/2^-8))
792data8 0xedd977f4962aa158,0x3ffd // ln(1/frcpa(1+151/2^-8))
793data8 0xef7179a22f257754,0x3ffd // ln(1/frcpa(1+152/2^-8))
794data8 0xf0a450d139366ca7,0x3ffd // ln(1/frcpa(1+153/2^-8))
795data8 0xf1d7e0524ff9ffdb,0x3ffd // ln(1/frcpa(1+154/2^-8))
796data8 0xf30c29036a8b6cae,0x3ffd // ln(1/frcpa(1+155/2^-8))
797data8 0xf4412bc411ea8d92,0x3ffd // ln(1/frcpa(1+156/2^-8))
798data8 0xf576e97564c8619d,0x3ffd // ln(1/frcpa(1+157/2^-8))
799data8 0xf6ad62fa1b5f172f,0x3ffd // ln(1/frcpa(1+158/2^-8))
800data8 0xf7e499368b55c542,0x3ffd // ln(1/frcpa(1+159/2^-8))
801data8 0xf91c8d10abaffe22,0x3ffd // ln(1/frcpa(1+160/2^-8))
802data8 0xfa553f7018c966f3,0x3ffd // ln(1/frcpa(1+161/2^-8))
803data8 0xfb8eb13e185d802c,0x3ffd // ln(1/frcpa(1+162/2^-8))
804data8 0xfcc8e3659d9bcbed,0x3ffd // ln(1/frcpa(1+163/2^-8))
805data8 0xfe03d6d34d487fd2,0x3ffd // ln(1/frcpa(1+164/2^-8))
806data8 0xff3f8c7581e9f0ae,0x3ffd // ln(1/frcpa(1+165/2^-8))
807data8 0x803e029e280173ae,0x3ffe // ln(1/frcpa(1+166/2^-8))
808data8 0x80dca10cc52d0757,0x3ffe // ln(1/frcpa(1+167/2^-8))
809data8 0x817ba200632755a1,0x3ffe // ln(1/frcpa(1+168/2^-8))
810data8 0x821b05f3b01d6774,0x3ffe // ln(1/frcpa(1+169/2^-8))
811data8 0x82bacd623ff19d06,0x3ffe // ln(1/frcpa(1+170/2^-8))
812data8 0x835af8c88e7a8f47,0x3ffe // ln(1/frcpa(1+171/2^-8))
813data8 0x83c5f8299e2b4091,0x3ffe // ln(1/frcpa(1+172/2^-8))
814data8 0x8466cb43f3d87300,0x3ffe // ln(1/frcpa(1+173/2^-8))
815data8 0x850803a67c80ca4b,0x3ffe // ln(1/frcpa(1+174/2^-8))
816data8 0x85a9a1d11a23b461,0x3ffe // ln(1/frcpa(1+175/2^-8))
817data8 0x864ba644a18e6e05,0x3ffe // ln(1/frcpa(1+176/2^-8))
818data8 0x86ee1182dcc432f7,0x3ffe // ln(1/frcpa(1+177/2^-8))
819data8 0x875a925d7e48c316,0x3ffe // ln(1/frcpa(1+178/2^-8))
820data8 0x87fdaa109d23aef7,0x3ffe // ln(1/frcpa(1+179/2^-8))
821data8 0x88a129ed4becfaf2,0x3ffe // ln(1/frcpa(1+180/2^-8))
822data8 0x89451278ecd7f9cf,0x3ffe // ln(1/frcpa(1+181/2^-8))
823data8 0x89b29295f8432617,0x3ffe // ln(1/frcpa(1+182/2^-8))
824data8 0x8a572ac5a5496882,0x3ffe // ln(1/frcpa(1+183/2^-8))
825data8 0x8afc2d0ce3b2dadf,0x3ffe // ln(1/frcpa(1+184/2^-8))
826data8 0x8b6a69c608cfd3af,0x3ffe // ln(1/frcpa(1+185/2^-8))
827data8 0x8c101e106e899a83,0x3ffe // ln(1/frcpa(1+186/2^-8))
828data8 0x8cb63de258f9d626,0x3ffe // ln(1/frcpa(1+187/2^-8))
829data8 0x8d2539c5bd19e2b1,0x3ffe // ln(1/frcpa(1+188/2^-8))
830data8 0x8dcc0e064b29e6f1,0x3ffe // ln(1/frcpa(1+189/2^-8))
831data8 0x8e734f45d88357ae,0x3ffe // ln(1/frcpa(1+190/2^-8))
832data8 0x8ee30cef034a20db,0x3ffe // ln(1/frcpa(1+191/2^-8))
833data8 0x8f8b0515686d1d06,0x3ffe // ln(1/frcpa(1+192/2^-8))
834data8 0x90336bba039bf32f,0x3ffe // ln(1/frcpa(1+193/2^-8))
835data8 0x90a3edd23d1c9d58,0x3ffe // ln(1/frcpa(1+194/2^-8))
836data8 0x914d0de2f5d61b32,0x3ffe // ln(1/frcpa(1+195/2^-8))
837data8 0x91be0c20d28173b5,0x3ffe // ln(1/frcpa(1+196/2^-8))
838data8 0x9267e737c06cd34a,0x3ffe // ln(1/frcpa(1+197/2^-8))
839data8 0x92d962ae6abb1237,0x3ffe // ln(1/frcpa(1+198/2^-8))
840data8 0x9383fa6afbe2074c,0x3ffe // ln(1/frcpa(1+199/2^-8))
841data8 0x942f0421651c1c4e,0x3ffe // ln(1/frcpa(1+200/2^-8))
842data8 0x94a14a3845bb985e,0x3ffe // ln(1/frcpa(1+201/2^-8))
843data8 0x954d133857f861e7,0x3ffe // ln(1/frcpa(1+202/2^-8))
844data8 0x95bfd96468e604c4,0x3ffe // ln(1/frcpa(1+203/2^-8))
845data8 0x9632d31cafafa858,0x3ffe // ln(1/frcpa(1+204/2^-8))
846data8 0x96dfaabd86fa1647,0x3ffe // ln(1/frcpa(1+205/2^-8))
847data8 0x9753261fcbb2a594,0x3ffe // ln(1/frcpa(1+206/2^-8))
848data8 0x9800c11b426b996d,0x3ffe // ln(1/frcpa(1+207/2^-8))
849data8 0x9874bf4d45ae663c,0x3ffe // ln(1/frcpa(1+208/2^-8))
850data8 0x99231f5ee9a74f79,0x3ffe // ln(1/frcpa(1+209/2^-8))
851data8 0x9997a18a56bcad28,0x3ffe // ln(1/frcpa(1+210/2^-8))
852data8 0x9a46c873a3267e79,0x3ffe // ln(1/frcpa(1+211/2^-8))
853data8 0x9abbcfc621eb6cb6,0x3ffe // ln(1/frcpa(1+212/2^-8))
854data8 0x9b310cb0d354c990,0x3ffe // ln(1/frcpa(1+213/2^-8))
855data8 0x9be14cf9e1b3515c,0x3ffe // ln(1/frcpa(1+214/2^-8))
856data8 0x9c5710b8cbb73a43,0x3ffe // ln(1/frcpa(1+215/2^-8))
857data8 0x9ccd0abd301f399c,0x3ffe // ln(1/frcpa(1+216/2^-8))
858data8 0x9d7e67f3bdce8888,0x3ffe // ln(1/frcpa(1+217/2^-8))
859data8 0x9df4ea81a99daa01,0x3ffe // ln(1/frcpa(1+218/2^-8))
860data8 0x9e6ba405a54514ba,0x3ffe // ln(1/frcpa(1+219/2^-8))
861data8 0x9f1e21c8c7bb62b3,0x3ffe // ln(1/frcpa(1+220/2^-8))
862data8 0x9f956593f6b6355c,0x3ffe // ln(1/frcpa(1+221/2^-8))
863data8 0xa00ce1092e5498c3,0x3ffe // ln(1/frcpa(1+222/2^-8))
864data8 0xa0c08309c4b912c1,0x3ffe // ln(1/frcpa(1+223/2^-8))
865data8 0xa1388a8c6faa2afa,0x3ffe // ln(1/frcpa(1+224/2^-8))
866data8 0xa1b0ca7095b5f985,0x3ffe // ln(1/frcpa(1+225/2^-8))
867data8 0xa22942eb47534a00,0x3ffe // ln(1/frcpa(1+226/2^-8))
868data8 0xa2de62326449d0a3,0x3ffe // ln(1/frcpa(1+227/2^-8))
869data8 0xa357690f88bfe345,0x3ffe // ln(1/frcpa(1+228/2^-8))
870data8 0xa3d0a93f45169a4b,0x3ffe // ln(1/frcpa(1+229/2^-8))
871data8 0xa44a22f7ffe65f30,0x3ffe // ln(1/frcpa(1+230/2^-8))
872data8 0xa500c5e5b4c1aa36,0x3ffe // ln(1/frcpa(1+231/2^-8))
873data8 0xa57ad064eb2ebbc2,0x3ffe // ln(1/frcpa(1+232/2^-8))
874data8 0xa5f5152dedf4384e,0x3ffe // ln(1/frcpa(1+233/2^-8))
875data8 0xa66f9478856233ec,0x3ffe // ln(1/frcpa(1+234/2^-8))
876data8 0xa6ea4e7cca02c32e,0x3ffe // ln(1/frcpa(1+235/2^-8))
877data8 0xa765437325341ccf,0x3ffe // ln(1/frcpa(1+236/2^-8))
878data8 0xa81e21e6c75b4020,0x3ffe // ln(1/frcpa(1+237/2^-8))
879data8 0xa899ab333fe2b9ca,0x3ffe // ln(1/frcpa(1+238/2^-8))
880data8 0xa9157039c51ebe71,0x3ffe // ln(1/frcpa(1+239/2^-8))
881data8 0xa991713433c2b999,0x3ffe // ln(1/frcpa(1+240/2^-8))
882data8 0xaa0dae5cbcc048b3,0x3ffe // ln(1/frcpa(1+241/2^-8))
883data8 0xaa8a27ede5eb13ad,0x3ffe // ln(1/frcpa(1+242/2^-8))
884data8 0xab06de228a9e3499,0x3ffe // ln(1/frcpa(1+243/2^-8))
885data8 0xab83d135dc633301,0x3ffe // ln(1/frcpa(1+244/2^-8))
886data8 0xac3fb076adc7fe7a,0x3ffe // ln(1/frcpa(1+245/2^-8))
887data8 0xacbd3cbbe47988f1,0x3ffe // ln(1/frcpa(1+246/2^-8))
888data8 0xad3b06b1a5dc57c3,0x3ffe // ln(1/frcpa(1+247/2^-8))
889data8 0xadb90e94af887717,0x3ffe // ln(1/frcpa(1+248/2^-8))
890data8 0xae3754a218f7c816,0x3ffe // ln(1/frcpa(1+249/2^-8))
891data8 0xaeb5d9175437afa2,0x3ffe // ln(1/frcpa(1+250/2^-8))
892data8 0xaf349c322e9c7cee,0x3ffe // ln(1/frcpa(1+251/2^-8))
893data8 0xafb39e30d1768d1c,0x3ffe // ln(1/frcpa(1+252/2^-8))
894data8 0xb032df51c2c93116,0x3ffe // ln(1/frcpa(1+253/2^-8))
895data8 0xb0b25fd3e6035ad9,0x3ffe // ln(1/frcpa(1+254/2^-8))
896data8 0xb1321ff67cba178c,0x3ffe // ln(1/frcpa(1+255/2^-8))
897//
898data8 0xC7DC2985D3B44557,0x3FCA // A00
899//
900// polynomial approximation of ln(GAMMA(x)), 1 <= x < 2.25
901// [0.875,1.25)
902data8 0xBF9A04F7E40C8498,0x3FAB79D8D9380F03 // C17,C16
903data8 0xBFB3B63609CA0CBD,0x3FB5564EA1675539 // C13,C12
904data8 0xBFBC806766F48C41,0x3FC010B36CDA773A // C9,C8
905data8 0xD45CE0BD54BE3D67,0xBFFC // C5
906data8 0xCD26AADF559676D0,0xBFFD // C3
907data8 0x93C467E37DB0C7A7,0xBFFE // C1
908data8 0xBFB10C251723B123,0x3FB2669DAD69A12D // C15,C14
909data8 0xBFB748A3CFCE4717,0x3FB9A01DEE29966A // C11,C10
910data8 0xBFC2703A1D85497E,0x3FC5B40CB0FD353C // C7,C6
911data8 0x8A8991563ECBBA5D,0x3FFD // C4
912data8 0xD28D3312983E9844,0x3FFE // C2
913data8 0,0                       // C0
914// [1.25,1.75)
915data8 0xBF12680486396DE6,0x3F23C51FC332CD9D // C17,C16
916data8 0xBF422633DA3A1496,0x3F4CC70680768857 // C13,C12
917data8 0xBF6E2F1A1F804B5D,0x3F78FCE02A032428 // C9,C8
918data8 0x864D46FA895985C1,0xBFFA // C5
919data8 0x97213C6E35E12043,0xBFFC // C3
920data8 0x8A8A42A401D979B7,0x3FC7 // C1
921data8 0xBF2E098A8A2332A8,0x3F370E61B73B205C // C15,C14
922data8 0xBF56F9849D3BC6CC,0x3F6283126F58D7F4 // C11,C10
923data8 0xBF851F9F9516A98F,0x3F9266E797A1433F // C7,C6
924data8 0x845A14A6A81B0638,0x3FFB // C4
925data8 0xF7B95E4771C55C99,0x3FFD // C2
926data8 0xF8CDCDE61C520E0F,0xBFFB // C0
927// [1.75,2.25)
928data8 0xBEA01D7AFA5D8F52,0x3EB1010986E60253 // C17,C16
929data8 0xBEE3CBEDB4C918AA,0x3EF580F6D9D0F72D // C13,C12
930data8 0xBF2D3FD4C7F68563,0x3F40B36AF884AE9A // C9,C8
931data8 0xF2027E10C7B051EC,0xBFF7 // C5
932data8 0x89F000D2ABB03401,0xBFFB // C3
933data8 0xD8773039049E70B6,0x3FFD // C1
934data8 0xBEC112CD07CFC31A,0x3ED2528A428D30E1 // C15,C14
935data8 0xBF078DE5618D8C9F,0x3F1A127AD811A53D // C11,C10
936data8 0xBF538AC5C2BF540D,0x3F67ADD6EADB5718 // C7,C6
937data8 0xA8991563EC243383,0x3FF9 // C4
938data8 0xA51A6625307D3230,0x3FFD // C2
939data8 0,0                       // C0
940//
941// polynomial approximation of ln(sin(Pi*x)/(Pi*x)), 9 <= x <= 0.5
942data8 0xBFDC1BF0931AE591,0x3FD36D6D6CE263D7 //S28,S26
943data8 0xBFBD516F4FD9FB18,0xBFBBE1703F315086 //S20,S18
944data8 0xAAB5A3CCEFCD3628,0xBFFC //S12
945data8 0x80859B5C318E19A5,0xBFFD //S8
946data8 0x8A8991563EC7EB33,0xBFFE //S4
947data8 0xBFD23AB9E6CC88AC,0xBF9957F5146FC7AF //S24,S22
948data8 0xBFC007B324E23040,0xBFC248DEC29CAC4A //S16,S14
949data8 0xCD00EFF2F8F86899,0xBFFC //S10
950data8 0xADA06587FACD668B,0xBFFD //S6
951data8 0xD28D3312983E98A0,0xBFFF //S2
952//
953data8 0x8090F777D7942F73,0x4001 // PR01
954data8 0xE5B521193CF61E63,0x4000 // PR11
955data8 0xC02C000000001939 // (-15;-14)
956data8 0x0000000000000233 // (-15;-14)
957data8 0xC02A000000016124 // (-14;-13)
958data8 0x0000000000002BFB // (-14;-13)
959data8 0xC02800000011EED9 // (-13;-12)
960data8 0x0000000000025CBB // (-13;-12)
961data8 0xC026000000D7322A // (-12;-11)
962data8 0x00000000001E1095 // (-12;-11)
963data8 0xC0240000093F2777 // (-11;-10)
964data8 0x00000000013DD3DC // (-11;-10)
965data8 0xC02200005C7768FB // (-10;-9)
966data8 0x000000000C9539B9 // (-10;-9)
967data8 0xC02000034028B3F9 // (-9;-8)
968data8 0x000000007570C565 // (-9;-8)
969data8 0xC01C0033FDEDFE1F // (-8;-7)
970data8 0x00000007357E670E // (-8;-7)
971data8 0xC018016B25897C8D // (-7;-6)
972data8 0x000000346DC5D639 // (-7;-6)
973data8 0xC014086A57F0B6D9 // (-6;-5)
974data8 0x0000010624DD2F1B // (-6;-5)
975data8 0xC010284E78599581 // (-5;-4)
976data8 0x0000051EB851EB85 // (-5;-4)
977data8 0xC009260DBC9E59AF // (-4;-3)
978data8 0x000028F5C28F5C29 // (-4;-3)
979data8 0xC003A7FC9600F86C // (-3;-2)
980data8 0x0000666666666666 // (-3;-2)
981data8 0xCC15879606130890,0x4000 // PR21
982data8 0xB42FE3281465E1CC,0x4000 // PR31
983//
984data8 0x828185F0B95C9916,0x4001 // PR00
985//
986data8 0xD4D3C819E4E5654B,0x4000 // PR10
987data8 0xA82FBBA4FCC75298,0x4000 // PR20
988data8 0xC02DFFFFFFFFFE52 // (-15;-14)
989data8 0x000000000000001C // (-15;-14)
990data8 0xC02BFFFFFFFFE6C7 // (-14;-13)
991data8 0x00000000000001A6 // (-14;-13)
992data8 0xC029FFFFFFFE9EDC // (-13;-12)
993data8 0x0000000000002BFB // (-13;-12)
994data8 0xC027FFFFFFEE1127 // (-12;-11)
995data8 0x000000000001EEC8 // (-12;-11)
996data8 0xC025FFFFFF28CDD4 // (-11;-10)
997data8 0x00000000001E1095 // (-11;-10)
998data8 0xC023FFFFF6C0D7C0 // (-10;-9)
999data8 0x000000000101B2B3 // (-10;-9)
1000data8 0xC021FFFFA3884BD0 // (-9;-8)
1001data8 0x000000000D6BF94D // (-9;-8)
1002data8 0xC01FFFF97F8159CF // (-8;-7)
1003data8 0x00000000C9539B89 // (-8;-7)
1004data8 0xC01BFFCBF76B86F0 // (-7;-6)
1005data8 0x00000007357E670E // (-7;-6)
1006data8 0xC017FE92F591F40D // (-6;-5)
1007data8 0x000000346DC5D639 // (-6;-5)
1008data8 0xC013F7577A6EEAFD // (-5;-4)
1009data8 0x00000147AE147AE1 // (-5;-4)
1010data8 0xC00FA471547C2FE5 // (-4;-3)
1011data8 0x00000C49BA5E353F // (-4;-3)
1012data8 0xC005FB410A1BD901 // (-3;-2)
1013data8 0x000053F7CED91687 // (-3;-2)
1014data8 0x80151BB918A293AA,0x4000 // PR30
1015data8 0xB3C9F8F47422A314,0x400B // PRN
1016//
1017// right negative roots
1018//(-3;-2)
1019data8 0x40BFCF8B90BE7F6B,0x40B237623345EFC3 // A15,A14
1020data8 0x407A92EFB03B281E,0x40728700C7819759 // A11,A10
1021data8 0x403809F04EF4D0F2,0x4038D32F682D9593 // A7,A6
1022data8 0xB4A5302C53C2F2D8,0x3FFF // A3
1023data8 0xC1FF4B357A9B0383,0x3FFF // A1
1024data8 0x409C46632EB4B2D3,0x4091A72AFA2148F5 // A13,A12
1025data8 0x4059297AC79A88DB,0x40548EAA7BE7FA6B // A9,A8
1026data8 0x4017339FE04B227F,0x4021718D7CA09E02 // A5,A4
1027data8 0x9B775D8017AAE668,0x4001 // A2
1028data8 0x8191DB68FF4366A1,0x3FC9 // A0
1029//(-4;-3)
1030data8 0x425260910D35307B,0x422668F5BE7983BB // A15,A14
1031data8 0x41A4454DBE4BEE43,0x41799CA93F6EA817 // A11,A10
1032data8 0x40FBB97AA1400F31,0x40D293C3F7ADAB15 // A7,A6
1033data8 0xE089B8926AE4517B,0x4005 // A3
1034data8 0xF90532F97D630C69,0x4001 // A1
1035data8 0x41F9F0CF98C5F2EA,0x41D026336C6BF394 // A13,A12
1036data8 0x415057F61156D5B8,0x41251EA3055CB754 // A9,A8
1037data8 0x40A99A6337D9FC2B,0x408267203D776151 // A5,A4
1038data8 0xCEA694BB8A8827A9,0x4003 // A2
1039data8 0xF4B02F1D73D30EED,0x3FCD // A0
1040//(-5;-4)
1041data8 0x4412365489340979,0x43C86441BAFDEE39 // A15,A14
1042data8 0x42ED68FCB19352DD,0x42A45FCE3905CD6F // A11,A10
1043data8 0x41CD14FE49FD4FCA,0x41855E3DBFA89744 // A7,A6
1044data8 0xAACD88D954E0EC16,0x400B // A3
1045data8 0xD652E7A490B0DCDF,0x4003 // A1
1046data8 0x437F52608E0E752A,0x433560E0633E33D5 // A13,A12
1047data8 0x425C83998976DE3D,0x421433DCCD3B473B // A9,A8
1048data8 0x4140261EB5732106,0x40F96D18E21AE6CC // A5,A4
1049data8 0xA220AE6C09FA8A0E,0x4007 // A2
1050data8 0xCC1682D17A2B5A58,0xBFCF // A0
1051//(-6;-5)
1052data8 0x4630E41D6386CF5A,0x45C2E7992C628C8C // A15,A14
1053data8 0x447AABEC714F913A,0x440EDCAB45339F3A // A11,A10
1054data8 0x42C9A8D00C97E3CE,0x425F7D8D5BEAB44D // A7,A6
1055data8 0x929EC2B1FB95BB5B,0x4012 // A3
1056data8 0xF6B970414D717D38,0x4005 // A1
1057data8 0x45545E578976F6A2,0x44E738288DD52686 // A13,A12
1058data8 0x43A20921FEC49492,0x433557FD7C6A41B3 // A9,A8
1059data8 0x41F3E01773761DB4,0x418A225DF2DA6C47 // A5,A4
1060data8 0xE7661976117F9312,0x400B // A2
1061data8 0xC33C13FEE07494DE,0x3FCF // A0
1062//(-7;-6)
1063data8 0x4898F1E6133305AD,0x4802C5306FE4A850 // A15,A14
1064data8 0x463FD37946B44094,0x45A8D489B784C2DD // A11,A10
1065data8 0x43E9500995815F06,0x4354F21E2FEE6DF5 // A7,A6
1066data8 0xEF281D1E1BBE10BD,0x4019 // A3
1067data8 0xB4EF24F1D78C2029,0x4008 // A1
1068data8 0x476AB1D5930011E5,0x46D4867E77BFB622 // A13,A12
1069data8 0x45139151ECDEF7C5,0x447F3A2BC6BF466F // A9,A8
1070data8 0x42C1D3D50713FA40,0x422F9C7B52556A1B // A5,A4
1071data8 0xFE711A4267CEA83A,0x4010 // A2
1072data8 0xD11E91B3FF8F4B94,0xBFD2 // A0
1073//(-8;-7)
1074data8 0x4B39E57569811B6E,0x4A7656073EB1FA21 // A15,A14
1075data8 0x482C9B24A516B0BB,0x47698FF55139C62B // A11,A10
1076data8 0x452393E2BC8E8D04,0x44628E1C710DA478 // A7,A6
1077data8 0x9F2A95AF1B7A773F,0x4022 // A3
1078data8 0x9DA03D51C303C918,0x400B // A1
1079data8 0x49B24C241A3D5BCB,0x48F01CB936ECDA67 // A13,A12
1080data8 0x46A712B3425C6797,0x45E5164114BD6DA1 // A9,A8
1081data8 0x43A216A356069D01,0x42E25E42A45E2108 // A5,A4
1082data8 0xC1F42ED57BBC2529,0x4016 // A2
1083data8 0xB1C7B615A7DCA8A9,0xBFD7 // A0
1084//(-9;-8)
1085data8 0x4E09D478E5EE857D,0x4D1647782106E9AB // A15,A14
1086data8 0x4A3C7F4D51927548,0x49497954796D743A // A11,A10
1087data8 0x467387BD6AF0CBDF,0x4582843E134111D2 // A7,A6
1088data8 0x9F003C6DE9666513,0x402B // A3
1089data8 0x9D8447F6BF99950A,0x400E // A1
1090data8 0x4C22364D238C61A9,0x4B300B18050AB940 // A13,A12
1091data8 0x4857004D64215772,0x4765074E448C3C9A // A9,A8
1092data8 0x44920E9EA07BF624,0x43A257BEC94BBF48 // A5,A4
1093data8 0xC1D1C49AC5B2A4B4,0x401C // A2
1094data8 0x9A749AF9F2D2E688,0x3FDB // A0
1095//(-10;-9)
1096data8 0x5102C7C43EA26C83,0x4FDCD174DEB0426B // A15,A14
1097data8 0x4C6A036195CD5BAD,0x4B44ABB52B65628A // A11,A10
1098data8 0x47D6439374B98FED,0x46B2C3903EF44D7D // A7,A6
1099data8 0xE25BAF73AB8A7DB3,0x4034 // A3
1100data8 0xB130901CA6D81B61,0x4011 // A1
1101data8 0x4EB50BB0726AE206,0x4D907A96E6D2B6E2 // A13,A12
1102data8 0x4A20975D78EAF01A,0x48FAF79C9C3E7908 // A9,A8
1103data8 0x459044144129A247,0x446D6043FA3150A3 // A5,A4
1104data8 0xF547997E083D9BA7,0x4022 // A2
1105data8 0x977AF525A6ECA1BC,0x3FDC // A0
1106//(-11;-10)
1107data8 0x5420A5D5E90C6D73,0x52C4710A503DC67A // A15,A14
1108data8 0x4EB2ED07BA88D2A8,0x4D581001ED9A5ECE // A11,A10
1109data8 0x494A8A28E9E3DFEF,0x47F1E4E1E476793E // A7,A6
1110data8 0xDD0C97E12D4A3378,0x403E // A3
1111data8 0xDD7C12D5182FD543,0x4014 // A1
1112data8 0x5167ED536877A072,0x500DF9AF21DDC0B6 // A13,A12
1113data8 0x4BFEE6F04BC34FF8,0x4AA4175CEF736A5E // A9,A8
1114data8 0x4698D1B4388FEC78,0x4541EDE7607A600D // A5,A4
1115data8 0xBF9F645F282AC552,0x4029 // A2
1116data8 0xAE1BBE4D3CDACCF4,0x3FE1 // A0
1117//(-12;-11)
1118data8 0x575F0EEF5FB7D4C0,0x55CBB7302B211A7C // A15,A14
1119data8 0x5113A4F1825C7CB2,0x4F822A0D46E0605A // A11,A10
1120data8 0x4ACED38FC8BE069A,0x493E3B56D2649F18 // A7,A6
1121data8 0x8FA8FF5DF8B72D5E,0x4049 // A3
1122data8 0x9845417E8598D642,0x4018 // A1
1123data8 0x5437780541C3F2D3,0x52A56279B563C1B2 // A13,A12
1124data8 0x4DF0F71A48C50188,0x4C600B358988DEBF // A9,A8
1125data8 0x47AE7EE95BDA3DE9,0x46200599DC16B18F // A5,A4
1126data8 0xB5249F914932E55D,0x4030 // A2
1127data8 0xEAE760CD2C086094,0x3FE5 // A0
1128//(-13;-12)
1129data8 0x5ABA5848651F6D18,0x58EF60D8A817650B // A15,A14
1130data8 0x538A8CA86E13EFB1,0x51C05DBD4D01076D // A11,A10
1131data8 0x4C607594C339D259,0x4A9585BD5BF932BB // A7,A6
1132data8 0xF26D282C36EC3611,0x4053 // A3
1133data8 0xE467DF4810EE7EEE,0x401B // A1
1134data8 0x5721D9BA485E8CC3,0x5555AF2CCFB2104D // A13,A12
1135data8 0x4FF4619A17B14EA6,0x4E29B2F29EB9F8C4 // A9,A8
1136data8 0x48CCF27629D46E79,0x47044715F991A63D // A5,A4
1137data8 0xCBC92FB9BDAA95A9,0x4037 // A2
1138data8 0xFB743A426163665B,0xBFE6 // A0
1139//(-14;-13)
1140data8 0x5E3295B24B353EAA,0x5C2B447E29796F20 // A15,A14
1141data8 0x5615A35CB5EAFAE5,0x54106AB089C95CAF // A11,A10
1142data8 0x4DFEC7D93501900A,0x4BF8C4C685F01B83 // A7,A6
1143data8 0x820899603D9A74D5,0x405F // A3
1144data8 0xB9949919933821CB,0x401F // A1
1145data8 0x5A23373DB9A995AC,0x581CBA0AF7F53009 // A13,A12
1146data8 0x520929836BB304CD,0x500386409A7076DA // A9,A8
1147data8 0x49F480173FEAF90B,0x47F1ACB14B810793 // A5,A4
1148data8 0x86881B8674DBF205,0x403F // A2
1149data8 0x8CF3CC35AA2C5F90,0x3FED // A0
1150//(-15;-14)
1151data8 0x61C37D53BE0029D6,0x5F80667CD9D68354 // A15,A14
1152data8 0x58B3F01898E6605B,0x567149652116DB6A // A11,A10
1153data8 0x4FA82FA4F5D35B00,0x4D663DB00832DF8F // A7,A6
1154data8 0xAE426731C9B94996,0x406A // A3
1155data8 0xA264C84BE3708F3F,0x4023 // A1
1156data8 0x5D3B254BC1C806A8,0x5AF72E736048B553 // A13,A12
1157data8 0x542E476505104BB0,0x51EAD96CDC4FB48F // A9,A8
1158data8 0x4B25095F498DB134,0x48E4B9FDEBFE24AB // A5,A4
1159data8 0xCE076A5A116C1D34,0x4046 // A2
1160data8 0x940013871A15050B,0x3FF1 // A0
1161//
1162// left negative roots
1163//(-3;-2)
1164data8 0x41AEB7998DBE2B2C,0xC19053D8FAC05DF7 // A16,A15
1165data8 0x4133197BF1ADEAF9,0xC1150728B9B82072 // A12,A11
1166data8 0x40BDBA65E74F4526,0xC0A12239BEEF8F72 // A8,A7
1167data8 0xFA8256664F99E2AA,0x4004 // A4
1168data8 0x9933F9E132D2A5DB,0x4002 // A2
1169data8 0x416FFB167B85F77C,0xC15166AE0ACCF87C // A14,A13
1170data8 0x40F75815106322C0,0xC0DA2D23C59C348D // A10,A9
1171data8 0x4084373F7CC42043,0xC0685884581F8C61 // A6,A5
1172data8 0xA0C2D6186460FF9D,0xC003 // A3
1173data8 0xF5096D48258CA0AD,0xBFFF // A1
1174//(-4;-3)
1175data8 0xC3E5BD233016D4B9,0x43A084DAD2D94AB1 // A15,A14
1176data8 0xC2CCFFF5E5AED722,0x4286D143AC7D29A6 // A11,A10
1177data8 0xC1B7DBBE0680D07B,0x4173E8F3ABB79CED // A7,A6
1178data8 0xE929ACEA59799BAF,0xC00A // A3
1179data8 0xA5CCECB362B21E1C,0xC003 // A1
1180data8 0xC357EED873871B81,0x43128E0B873204FC // A13,A12
1181data8 0xC242225FA76E8450,0x41FD2F76AE7386CE // A9,A8
1182data8 0xC13116F7806D0C7A,0x40EE8F829F141025 // A5,A4
1183data8 0xFBB6F57021B5B397,0x4006 // A2
1184data8 0xEEE019B4C05AC269,0xBFCB // A0
1185//(-5;-4)
1186data8 0xC626A52FE8AAA100,0x45B9FD1F4DDFE31E // A15,A14
1187data8 0xC473812A5675F08B,0x440738530AECC254 // A11,A10
1188data8 0xC2C5068B3F94AC27,0x425A8C5C539A500B // A7,A6
1189data8 0x869FBFF732F20C3A,0xC012 // A3
1190data8 0xE91251F7CF25A655,0xC005 // A1
1191data8 0xC54C18CB48E5DA0F,0x44E07BD36FF561DF // A13,A12
1192data8 0xC39BEC120D2FEBEA,0x4330FFA5388435BE // A9,A8
1193data8 0xC1F13D5D163B7FB5,0x418752A6F5AC0F39 // A5,A4
1194data8 0xDA99E33C51D360F0,0x400B // A2
1195data8 0x9F47A66A2F53D9B9,0x3FD1 // A0
1196//(-6;-5)
1197data8 0xC8970DAC16B6D59E,0x480170728306FD76 // A15,A14
1198data8 0xC63E0E5030604CF3,0x45A7924D74D57C65 // A11,A10
1199data8 0xC3E8684E41730FC6,0x43544D54EA2E5B9A // A7,A6
1200data8 0xEB7404450C47C5F4,0xC019 // A3
1201data8 0xB30FB521D2C19F8B,0xC008 // A1
1202data8 0xC768F34D35DF6320,0x46D348B3BB2E68B8 // A13,A12
1203data8 0xC512AC2FE5EA638E,0x447DF44BC7FC5E17 // A9,A8
1204data8 0xC2C15EA6B0AAFEF9,0x422EF5D308DBC420 // A5,A4
1205data8 0xFBCEE5BCA70FD3A3,0x4010 // A2
1206data8 0x8589A7CFFE0A3E86,0xBFD5 // A0
1207//(-7;-6)
1208data8 0xCB3995A0CC961E5A,0x4A7615C6C7116ADD // A15,A14
1209data8 0xC82C5AFE0BF9C427,0x47695BD2F367668B // A11,A10
1210data8 0xC52377E70BA14CF5,0x4462775E859E4392 // A7,A6
1211data8 0x9EC8ED6E4C3D4DBE,0xC022 // A3
1212data8 0x9D5FBD2E75520E65,0xC00B // A1
1213data8 0xC9B21BB881A4DDF8,0x48EFEAB06FBA0207 // A13,A12
1214data8 0xC6A6E8550CBC188F,0x45E4F3D26238B099 // A9,A8
1215data8 0xC3A20427DF1B110A,0x42E24F3D636F2E4E // A5,A4
1216data8 0xC1A4D12A82280CFB,0x4016 // A2
1217data8 0xEF46D8DCCA9E8197,0x3FD2 // A0
1218//(-8;-7)
1219data8 0xCE0946982B27DE5B,0x4D15DBC6664E2DD2 // A15,A14
1220data8 0xCA3C769F6B3B2B93,0x49497251CD0C4363 // A11,A10
1221data8 0xC67384066C47F489,0x458281393433AB28 // A7,A6
1222data8 0x9EF3459926D0F14F,0xC02B // A3
1223data8 0x9D7BB7F2600DFF0B,0xC00E // A1
1224data8 0xCC22351326C939A7,0x4B3009431C4F1D3F // A13,A12
1225data8 0xC856FAADDD48815D,0x476502BC3ECA040C // A9,A8
1226data8 0xC4920C2A84173810,0x43A255C052525F99 // A5,A4
1227data8 0xC1C73B6554011EFA,0x401C // A2
1228data8 0x954612700ADF8317,0xBFD8 // A0
1229//(-9;-8)
1230data8 0xD102F5CC7B590D3A,0x4FDD0F1C30E4EB22 // A15,A14
1231data8 0xCC6A02912B0DF650,0x4B44AB18E4FCC159 // A11,A10
1232data8 0xC7D64314B4A2FAAB,0x46B2C334AE5E2D34 // A7,A6
1233data8 0xE2598724F7E28E99,0xC034 // A3
1234data8 0xB12F6FE2E195452C,0xC011 // A1
1235data8 0xCEB507747AF9356A,0x4D907802C08BA48F // A13,A12
1236data8 0xCA2096E3DC29516F,0x48FAF6ED046A1DB7 // A9,A8
1237data8 0xC59043D21BA5EE56,0x446D5FE468B30450 // A5,A4
1238data8 0xF5460A8196B59C83,0x4022 // A2
1239data8 0xB108F35A8EDA92D5,0xBFDD // A0
1240//(-10;-9)
1241data8 0xD420430D91F8265B,0x52C406CAAAC9E0EE // A15,A14
1242data8 0xCEB2ECDDDAA3DAD1,0x4D580FDA97F92E3A // A11,A10
1243data8 0xC94A8A192341B5D4,0x47F1E4D8C690D07B // A7,A6
1244data8 0xDD0C5F920C2F0D2B,0xC03E // A3
1245data8 0xDD7BED3631657B48,0xC014 // A1
1246data8 0xD167F410E64E90A4,0x500DFFED20F714A7 // A13,A12
1247data8 0xCBFEE6D9043169E9,0x4AA4174F64B40AA7 // A9,A8
1248data8 0xC698D1A9AF0AB9C2,0x4541EDE14987A887 // A5,A4
1249data8 0xBF9F43D461B3DE6E,0x4029 // A2
1250data8 0xF3891A50642FAF26,0x3FE1 // A0
1251//(-11;-10)
1252data8 0xD75F0EEAF769D42A,0x55CBB72C8869183A // A15,A14
1253data8 0xD113A4EF80394F77,0x4F822A0B96B3ECA9 // A11,A10
1254data8 0xCACED38DC75763CB,0x493E3B5522D2D028 // A7,A6
1255data8 0x8FA8FB5C92533701,0xC049 // A3
1256data8 0x98453EDB9339C24E,0xC018 // A1
1257data8 0xD43778026CCD4B20,0x52A5627753273B9B // A13,A12
1258data8 0xCDF0F718DD7E1214,0x4C600B34582911EB // A9,A8
1259data8 0xC7AE7EE7F112362C,0x46200599439C264F // A5,A4
1260data8 0xB5249C335342B5BC,0x4030 // A2
1261data8 0x881550711D143475,0x3FE4 // A0
1262//(-12;-11)
1263data8 0xDAB9C724EEEE2BBB,0x58EEC971340EDDBA // A15,A14
1264data8 0xD38A8C8AE63BD8BF,0x51C05DB21CEE00D3 // A11,A10
1265data8 0xCC607594C311C12D,0x4A9585BD5BE6AB57 // A7,A6
1266data8 0xF26D282C36EC0E66,0xC053 // A3
1267data8 0xE467DF1FA674BFAE,0xC01B // A1
1268data8 0xD721DE506999AA9C,0x5555B34F71B45132 // A13,A12
1269data8 0xCFF4619A476BF76F,0x4E29B2F2BBE7A67E // A9,A8
1270data8 0xC8CCF27629D48EDC,0x47044715F991AB46 // A5,A4
1271data8 0xCBC92FB9BDAA928D,0x4037 // A2
1272data8 0xCE27C4F01CF53284,0xBFE6 // A0
1273//(-13;-12)
1274data8 0xDE3295B24355C5A1,0x5C2B447E298B562D // A15,A14
1275data8 0xD615A35CB5E92103,0x54106AB089C95E8C // A11,A10
1276data8 0xCDFEC7D935019005,0x4BF8C4C685F01B83 // A7,A6
1277data8 0x820899603D9A74D5,0xC05F // A3
1278data8 0xB9949916F8DF4AC4,0xC01F // A1
1279data8 0xDA23373DBA0B7548,0x581CBA0AF7F45C01 // A13,A12
1280data8 0xD20929836BB30934,0x500386409A7076D6 // A9,A8
1281data8 0xC9F480173FEAF90B,0x47F1ACB14B810793 // A5,A4
1282data8 0x86881B8674DBF205,0x403F // A2
1283data8 0x8CFAFA9A142C1FF0,0x3FED // A0
1284//(-14;-13)
1285data8 0xE1C33F356FA2C630,0x5F8038B8AA919DD7 // A15,A14
1286data8 0xD8B3F0167E14982D,0x5671496400BAE0DB // A11,A10
1287data8 0xCFA82FA4F5D25C3E,0x4D663DB008328C58 // A7,A6
1288data8 0xAE426731C9B94980,0xC06A // A3
1289data8 0xA264C84BB8A66F86,0xC023 // A1
1290data8 0xDD3B26E34762ED1E,0x5AF72F76E3C1B793 // A13,A12
1291data8 0xD42E476507E3D06E,0x51EAD96CDD881DFA // A9,A8
1292data8 0xCB25095F498DB15F,0x48E4B9FDEBFE24B5 // A5,A4
1293data8 0xCE076A5A116C1D32,0x4046 // A2
1294data8 0x94001BF5A24966F5,0x3FF1 // A0
1295//(-15;-14)
1296data8 0xE56DB8B72D7156FF,0x62EAB0CDB22539BE // A15,A14
1297data8 0xDB63D76B0D3457E7,0x58E254823D0AE4FF // A11,A10
1298data8 0xD15F060BF548404A,0x4EDE65C20CD4E961 // A7,A6
1299data8 0x900DA565ED76C19D,0xC076 // A3
1300data8 0x9868C809852DA712,0xC027 // A1
1301data8 0xE067CCDA0408AAF0,0x5DE5A79C5C5C54AF // A13,A12
1302data8 0xD6611ADBF5958ED0,0x53E0294092BE9677 // A9,A8
1303data8 0xCC5EA28D90EE8C5D,0x49E014930EF336EE // A5,A4
1304data8 0xB57930DCE7A61AE8,0x404E // A2
1305data8 0x976BEC1F30DF151C,0x3FF5 // A0
1306LOCAL_OBJECT_END(lgamma_data)
1307
1308
1309.section .text
1310GLOBAL_LIBM_ENTRY(__libm_lgamma)
1311
1312{ .mfi
1313      getf.exp      GR_SignExp = f8
1314      frcpa.s1      FR_C,p9 = f1,f8
1315      mov           GR_ExpMask = 0x1ffff
1316}
1317{ .mfi
1318      addl          GR_ad_Data = @ltoff(lgamma_data),gp
1319      fcvt.fx.s1    FR_int_N = f8
1320      mov           GR_2_25 = 0x4002 // 2.25
1321};;
1322{ .mfi
1323      getf.d        GR_ArgAsIs = f8
1324      fclass.m      p13,p0 = f8,0x1EF // is x NaTVal, NaN,
1325                                      // +/-0, +/-INF or +/-deno?
1326      mov           GR_ExpBias = 0xFFFF
1327}
1328{ .mfi
1329      ld8           GR_ad_Data = [GR_ad_Data]
1330      fcvt.fx.trunc.s1 FR_int_Ntrunc = f8
1331      mov           GR_ExpOf256 = 0x10007
1332};;
1333{ .mfi
1334      mov           GR_ExpOf2 = 0x10000
1335      fcmp.lt.s1    p14,p15 = f8,f0 // p14 if x<0
1336      dep.z         GR_Ind = GR_SignExp,8,4
1337}
1338{ .mfi
1339      and           GR_Exp = GR_SignExp,GR_ExpMask
1340      fma.s1        FR_2 = f1,f1,f1
1341      cmp.lt        p10,p0 = GR_SignExp,GR_ExpBias
1342};;
1343{ .mfi
1344      add           GR_ad_1 = 0xB80,GR_ad_Data
1345      fnorm.s1      FR_NormX = f8
1346      shr.u         GR_Arg = GR_ArgAsIs,48
1347}
1348{ .mib
1349      add           GR_ad_Co = GR_Ind,GR_ad_Data
1350      add           GR_ad_Ce = 0x10,GR_ad_Data
1351      // jump if the input argument is NaTVal, NaN, +/-0, +/-INF or +/-deno
1352(p13) br.cond.spnt  lgamma_spec
1353};;
1354lgamma_common:
1355{ .mfi
1356      ldfpd         FR_LocalMin,FR_05 = [GR_ad_1],16
1357      fmerge.se     FR_x = f1,f8
1358      add           GR_ad_2 = 0xBC0,GR_ad_Data
1359}
1360{ .mfb
1361      add           GR_ad_Ce = GR_Ind,GR_ad_Ce
1362      fms.s1        FR_w = f8,f1,f1 // x-1
1363      // jump if the input argument is positive and  less than 1.0
1364(p10) br.cond.spnt  lgamma_0_1
1365};;
1366{ .mfi
1367      ldfe          FR_C01 = [GR_ad_Co],32
1368      fnma.s1       FR_InvX = FR_C,f8,f1 // NR iteration #1
1369(p15) cmp.lt.unc    p8,p0 = GR_ExpOf256,GR_SignExp
1370}
1371{ .mib
1372      ldfe          FR_C11 = [GR_ad_Ce],32
1373(p15) cmp.lt.unc    p11,p0 = GR_Arg,GR_2_25
1374      // jump if the input argument isn't less than 512.0
1375(p8)  br.cond.spnt  lgamma_pstirling
1376};;
1377{ .mfi
1378      ldfe          FR_C21 = [GR_ad_Co],32
1379(p14) fms.s1        FR_r = FR_C,f8,f1 // reduced arg for log(x)
1380(p14) cmp.lt.unc    p0,p9 = GR_Exp,GR_ExpOf256
1381}
1382{ .mib
1383      ldfe          FR_C31 = [GR_ad_Ce],32
1384      add           GR_ad_Co7 = 0x12C0,GR_ad_2
1385      // jump if the input argument is from range [1.0; 2.25)
1386(p11) br.cond.spnt  lgamma_1_2
1387};;
1388{ .mfi
1389      ldfe          FR_C41 = [GR_ad_Co],32
1390      fcvt.xf       FR_N = FR_int_N
1391      add           GR_ad_Ce7 = 0x1310,GR_ad_2
1392}
1393{ .mfb
1394      ldfe          FR_C51 = [GR_ad_Ce],32
1395(p14) fma.s1        FR_5 = FR_2,FR_2,f1
1396      // jump if the input argument is less or equal to -512.0
1397(p9)  br.cond.spnt  lgamma_negstirling
1398};;
1399{ .mfi
1400      ldfe          FR_C61 = [GR_ad_Co],32
1401(p14) fcvt.xf       FR_Ntrunc = FR_int_Ntrunc
1402      shr           GR_Ind = GR_Ind,4
1403}
1404{ .mfi
1405      ldfe          FR_C71 = [GR_ad_Ce],32
1406(p14) fma.s1        FR_Xp1 = f1,f1,FR_NormX // x+1
1407      cmp.eq        p6,p7 = GR_ExpOf2,GR_SignExp
1408};;
1409.pred.rel "mutex",p6,p7
1410{ .mfi
1411      ldfe          FR_C81 = [GR_ad_Co],32
1412(p6)  fma.s1        FR_x = f0,f0,FR_NormX
1413      shladd        GR_Offs7 = GR_Ind,2,GR_Ind // (ind*16)*5
1414}
1415{ .mfi
1416      ldfe          FR_C91 = [GR_ad_Ce],32
1417(p7)  fms.s1        FR_x = FR_x,f1,f1
1418      add           GR_ad_Co7 = 0x800,GR_ad_Data
1419};;
1420{ .mfi
1421      ldfe          FR_CA1 = [GR_ad_Co],32
1422(p14) fma.s1        FR_3 = f1,f1,FR_2
1423      shladd        GR_Offs7 = GR_Ind,1,GR_Offs7 // (ind*16)*7
1424}
1425{ .mfi
1426      ldfe          FR_C00 = [GR_ad_Ce],32
1427(p14) fma.s1        FR_Xp4 = FR_2,FR_2,FR_NormX
1428      add           GR_ad_Ce7 = 0x810,GR_ad_Data
1429};;
1430{ .mfi
1431      ldfe          FR_C10 = [GR_ad_Co],32
1432(p6)  fms.s1        FR_Xm2 = FR_w,f1,f1
1433      add           GR_ad_Co7 = GR_ad_Co7,GR_Offs7
1434}
1435{ .mfi
1436      ldfe          FR_C20 = [GR_ad_Ce],32
1437(p14) fma.s1        FR_r2 = FR_r,FR_r,f0 // log(x)
1438      add           GR_ad_Ce7 = GR_ad_Ce7,GR_Offs7
1439};;
1440{ .mfi
1441      ldfe          FR_C30 = [GR_ad_Co],32
1442(p14) fms.s1        FR_Xf = FR_NormX,f1,FR_N  // xf = x - [x]
1443(p14) mov           GR_Arg17 = 0xC031 // -17
1444}
1445{ .mfi
1446      ldfe          FR_C40 = [GR_ad_Ce],32
1447(p14) fma.s1        FR_Xp5 = FR_5,f1,FR_NormX
1448(p14) sub           GR_Exp = GR_Exp,GR_ExpBias
1449};;
1450{ .mfi
1451      ldfe          FR_C50 = [GR_ad_Co7],32
1452(p14) fms.s1        FR_Xfr = FR_Xp1,f1,FR_Ntrunc // xfr = (x+1) - [x]
1453(p14) cmp.lt.unc    p13,p0 = GR_Arg,GR_Arg17
1454}
1455{ .mfb
1456      ldfe          FR_C60 = [GR_ad_Ce7],32
1457(p14) fma.s1        FR_Xp10 = FR_5,FR_2,FR_NormX
1458      // jump if the input argument is negative and great than -17.0
1459(p13) br.cond.spnt  lgamma_negrecursion
1460};;
1461{ .mfi
1462      ldfe          FR_C70 = [GR_ad_Co7],32
1463      fma.s1        FR_C01 = FR_x,f1,FR_C01
1464(p14) add           GR_ad_Ce = 0x1310,GR_ad_2
1465}
1466{ .mfi
1467      ldfe          FR_C80 = [GR_ad_Ce7],32
1468      fma.s1        FR_C11 = FR_x,f1,FR_C11
1469(p14) add           GR_ad_Co = 0x12C0,GR_ad_2
1470};;
1471{ .mfi
1472      ldfe          FR_C90 = [GR_ad_Co7],32
1473      fma.s1        FR_C21 = FR_x,f1,FR_C21
1474      nop.i         0
1475}
1476{ .mfi
1477      ldfe          FR_CA0 = [GR_ad_Ce7],32
1478      fma.s1        FR_C31 = FR_x,f1,FR_C31
1479      nop.i         0
1480};;
1481{ .mfi
1482      ldfe          FR_CN = [GR_ad_Co7],32
1483      fma.s1        FR_C41 = FR_x,f1,FR_C41
1484      nop.i         0
1485}
1486{ .mfi
1487(p14) ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
1488      fma.s1        FR_C51 = FR_x,f1,FR_C51
1489      nop.i         0
1490};;
1491{ .mfi
1492(p14) ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
1493      fma.s1        FR_C61 = FR_x,f1,FR_C61
1494      nop.i         0
1495}
1496{ .mfi
1497(p14) ldfe          FR_Ln2 = [GR_ad_1]
1498      fma.s1        FR_C71 = FR_x,f1,FR_C71
1499      nop.i         0
1500};;
1501{ .mfi
1502(p14) ldfpd         FR_S28,FR_S26 = [GR_ad_Co],16
1503      fma.s1        FR_C81 = FR_x,f1,FR_C81
1504      add           GR_ad_2 = 0x60,GR_ad_2
1505}
1506{ .mfi
1507(p14) ldfpd         FR_S24,FR_S22 = [GR_ad_Ce],16
1508      fma.s1        FR_C91 = FR_x,f1,FR_C91
1509      nop.i         0
1510};;
1511{ .mfi
1512(p14) ldfpd         FR_S20,FR_S18 = [GR_ad_Co],16
1513      fma.s1        FR_CA1 = FR_x,f1,FR_CA1
1514      nop.i         0
1515}
1516{ .mfi
1517(p14) ldfpd         FR_S16,FR_S14 = [GR_ad_Ce],16
1518      fma.s1        FR_C01 = FR_C01,FR_x,FR_C00
1519      nop.i         0
1520};;
1521{ .mfi
1522(p14) getf.exp      GR_SignExp = FR_Xf
1523      fma.s1        FR_C11 = FR_C11,FR_x,FR_C10
1524      nop.i         0
1525}
1526{ .mfi
1527(p14) ldfe          FR_S12 = [GR_ad_Co],16
1528      fma.s1        FR_C21 = FR_C21,FR_x,FR_C20
1529      nop.i         0
1530};;
1531{ .mfi
1532(p14) getf.sig      GR_Sig = FR_Xf
1533(p14) frcpa.s1      FR_InvXf,p0 = f1,FR_Xf
1534      nop.i         0
1535}
1536{ .mfi
1537(p14) ldfe          FR_S10 = [GR_ad_Ce],16
1538      fma.s1        FR_C41 = FR_C41,FR_x,FR_C40
1539      nop.i         0
1540};;
1541{ .mfi
1542(p14) ldfe          FR_S8 = [GR_ad_Co],16
1543      fma.s1        FR_C51 = FR_C51,FR_x,FR_C50
1544      nop.i         0
1545}
1546{ .mfi
1547(p14) ldfe          FR_S6 = [GR_ad_Ce],16
1548      fma.s1        FR_C61 = FR_C61,FR_x,FR_C60
1549(p14) and           GR_Expf = GR_SignExp,GR_ExpMask
1550};;
1551{ .mfi
1552(p14) sub           GR_Expf = GR_Expf,GR_ExpBias
1553      fma.s1        FR_C71 = FR_C71,FR_x,FR_C70
1554(p14) shl           GR_Ind = GR_Sig,1
1555}
1556{ .mfi
1557(p14) ldfe          FR_S4 = [GR_ad_Co],16
1558      fma.s1        FR_C81 = FR_C81,FR_x,FR_C80
1559(p14) cmp.eq.unc    p8,p0 = 0,GR_Sig
1560};;
1561{ .mfi
1562(p14) setf.sig      FR_int_Nf = GR_Expf
1563      fma.s1        FR_C91 = FR_C91,FR_x,FR_C90
1564(p14) shr.u         GR_Ind = GR_Ind,56
1565}
1566{ .mfb
1567(p14) ldfe          FR_S2 = [GR_ad_Ce],16
1568      fma.s1        FR_CA1 = FR_CA1,FR_x,FR_CA0
1569      // jump if the input argument is integer number from range (-512.0;-17.0]
1570(p8)  br.cond.spnt  lgamma_singularity
1571};;
1572{ .mfi
1573(p14) getf.sig      GR_Sig = FR_int_Ntrunc
1574      fma.s1        FR_C01 = FR_C01,FR_C11,f0
1575      nop.i         0
1576}
1577{ .mfi
1578(p14) shladd        GR_ad_T = GR_Ind,4,GR_ad_2
1579      fma.s1        FR_C31 = FR_C31,FR_x,FR_C30
1580      nop.i         0
1581};;
1582{ .mfi
1583(p14) ldfe          FR_Tf = [GR_ad_T]
1584(p14) fms.s1        FR_rf = FR_InvXf,FR_Xf,f1 // reduced arg for log({x})
1585(p14) extr.u        GR_Ind = GR_ArgAsIs,44,8
1586}
1587{ .mfi
1588      // set p9  if signgum is 32-bit int
1589      // set p10 if signgum is 64-bit int
1590      cmp.eq        p10,p9 = 8,r34
1591      fma.s1        FR_C21 = FR_C21,FR_C41,f0
1592      mov           GR_SignOfGamma = 1
1593};;
1594{ .mfi
1595      nop.m         0
1596      fma.s1        FR_C51 = FR_C51,FR_C61,f0
1597(p14) tbit.z.unc    p8,p0 = GR_Sig,0
1598}
1599{ .mfi
1600(p14) shladd        GR_ad_T = GR_Ind,4,GR_ad_2
1601(p6)  fma.s1        FR_CN = FR_CN,FR_Xm2,f0
1602      nop.i         0
1603};;
1604{ .mfi
1605(p14) setf.sig      FR_int_N = GR_Exp
1606      fma.s1        FR_C71 = FR_C71,FR_C81,f0
1607(p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
1608}
1609{ .mfi
1610      nop.m         0
1611(p14) fma.s1        FR_Xf2 = FR_Xf,FR_Xf,f0
1612      nop.i         0
1613};;
1614{ .mfi
1615(p14) ldfe          FR_T = [GR_ad_T]
1616      fma.s1        FR_C91 = FR_C91,FR_CA1,f0
1617      nop.i         0
1618}
1619{ .mfi
1620      nop.m         0
1621(p14) fma.s1        FR_r2 = FR_r,FR_r,f0
1622      nop.i         0
1623};;
1624.pred.rel "mutex",p9,p10
1625{ .mfi
1626      // store sign of gamma(x) as 32-bit int
1627(p9)  st4           [r33] = GR_SignOfGamma
1628      fma.s1        FR_C01 = FR_C01,FR_C31,f0
1629      nop.i         0
1630}
1631{ .mfi
1632      // store sign of gamma(x) as 64-bit int
1633(p10) st8           [r33] = GR_SignOfGamma
1634(p14) fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
1635      nop.i         0
1636};;
1637{ .mfi
1638      nop.m         0
1639(p14) fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
1640      nop.i         0
1641}
1642{ .mfb
1643      nop.m         0
1644(p14) fma.s1        FR_P54f = FR_P5,FR_rf,FR_P4
1645      // jump if the input argument is non-integer from range (-512.0;-17.0]
1646(p14) br.cond.spnt  lgamma_negpoly
1647};;
1648{ .mfi
1649      nop.m         0
1650      fma.s1        FR_C21 = FR_C21,FR_C51,f0
1651      nop.i         0
1652};;
1653{ .mfi
1654      nop.m         0
1655      fma.s1        FR_C71 = FR_C71,FR_C91,f0
1656      nop.i         0
1657};;
1658{ .mfi
1659      nop.m         0
1660      fma.s1        FR_CN  = FR_C01,FR_CN,f0
1661      nop.i         0
1662};;
1663{ .mfi
1664      nop.m         0
1665      fma.s1        FR_C21 = FR_C21,FR_C71,f0
1666      nop.i         0
1667};;
1668{ .mfb
1669      nop.m         0
1670      fma.d.s0      f8 = FR_C21,FR_CN,f0
1671      br.ret.sptk   b0 // exit for arguments from range [2.25; 512.0)
1672};;
1673// branch for calculating of ln(GAMMA(x)) for -512 < x < -17
1674//---------------------------------------------------------------------
1675.align 32
1676lgamma_negpoly:
1677{ .mfi
1678      nop.m         0
1679      fma.s1        FR_Xf4 = FR_Xf2,FR_Xf2,f0
1680      nop.i         0
1681}
1682{ .mfi
1683      nop.m         0
1684      fma.s1        FR_S28 = FR_S28,FR_Xf2,FR_S26
1685      nop.i         0
1686};;
1687{ .mfi
1688      nop.m         0
1689      fma.s1        FR_S24 = FR_S24,FR_Xf2,FR_S22
1690      nop.i         0
1691}
1692{ .mfi
1693      nop.m         0
1694      fma.s1        FR_S20 = FR_S20,FR_Xf2,FR_S18
1695      nop.i         0
1696};;
1697{ .mfi
1698      nop.m         0
1699      fma.s1        FR_S16 = FR_S16,FR_Xf2,FR_S14
1700      nop.i         0
1701}
1702{ .mfi
1703      nop.m         0
1704      fma.s1        FR_S12 = FR_S12,FR_Xf2,FR_S10
1705      nop.i         0
1706};;
1707{ .mfi
1708      nop.m         0
1709      fma.s1        FR_S8 = FR_S8,FR_Xf2,FR_S6
1710      nop.i         0
1711}
1712{ .mfi
1713      nop.m         0
1714      fma.s1        FR_S4 = FR_S4,FR_Xf2,FR_S2
1715      nop.i         0
1716};;
1717{ .mfi
1718      nop.m         0
1719      fma.s1        FR_rf2 = FR_rf,FR_rf,f0
1720      nop.i         0
1721}
1722{ .mfi
1723      nop.m         0
1724      fma.s1        FR_P32f = FR_P3,FR_rf,FR_P2 // log(x)
1725      nop.i         0
1726};;
1727{ .mfi
1728      nop.m         0
1729      fma.s1        FR_r3 = FR_r2,FR_r,f0 // log(x)
1730      nop.i         0
1731}
1732{ .mfi
1733      nop.m         0
1734      fcvt.xf       FR_Nf = FR_int_Nf // log({x})
1735      nop.i         0
1736};;
1737{ .mfi
1738      nop.m         0
1739      fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S24
1740      nop.i         0
1741}
1742{ .mfi
1743      nop.m         0
1744      fma.s1        FR_Xf8 = FR_Xf4,FR_Xf4,f0
1745      nop.i         0
1746};;
1747{ .mfi
1748      nop.m         0
1749      fma.s1        FR_S20 = FR_S20,FR_Xf4,FR_S16
1750      nop.i         0
1751}
1752{ .mfi
1753      nop.m         0
1754      fma.s1        FR_C21 = FR_C21,FR_C51,f0
1755      nop.i         0
1756};;
1757{ .mfi
1758      nop.m         0
1759      fma.s1        FR_S12 = FR_S12,FR_Xf4,FR_S8
1760      nop.i         0
1761}
1762{ .mfi
1763      nop.m         0
1764      fma.s1        FR_C71 = FR_C71,FR_C91,f0
1765      nop.i         0
1766};;
1767{ .mfi
1768      nop.m         0
1769      fnma.s1       FR_P10 = FR_r2,FR_05,FR_r // log(x)
1770      nop.i         0
1771}
1772{ .mfi
1773      nop.m         0
1774      fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32 // log(x)
1775      nop.i         0
1776};;
1777{ .mfi
1778      nop.m         0
1779      fnma.s1       FR_P10f = FR_rf2,FR_05,FR_rf // log({x})
1780      nop.i         0
1781}
1782{ .mfi
1783      nop.m         0
1784      fcvt.xf       FR_N = FR_int_N // log(x)
1785      nop.i         0
1786};;
1787{ .mfi
1788      nop.m         0
1789      fma.s1        FR_rf3 = FR_rf2,FR_rf,f0 // log({x})
1790      nop.i         0
1791}
1792{ .mfi
1793      nop.m         0
1794      fma.s1        FR_P54f = FR_P54f,FR_rf2,FR_P32f // log({x})
1795      nop.i         0
1796};;
1797{ .mfi
1798      nop.m         0
1799      fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S20
1800      nop.i         0
1801}
1802{ .mfi
1803      nop.m         0
1804      fma.s1        FR_TpNxLn2f = FR_Nf,FR_Ln2,FR_Tf // log({x})
1805      nop.i         0
1806};;
1807{ .mfi
1808      nop.m         0
1809      fma.s1        FR_CN  = FR_C01,FR_CN,f0
1810      nop.i         0
1811}
1812{ .mfi
1813      nop.m         0
1814      fma.s1        FR_C21 = FR_C21,FR_C71,f0
1815      nop.i         0
1816};;
1817{ .mfi
1818      nop.m         0
1819      fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10 // log(x)
1820      nop.i         0
1821};;
1822{ .mfi
1823      nop.m         0
1824      fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T // log(x)
1825      nop.i         0
1826};;
1827{ .mfi
1828      nop.m         0
1829      fma.s1        FR_P54f = FR_P54f,FR_rf3,FR_P10f // log({x})
1830      nop.i         0
1831};;
1832{ .mfi
1833      nop.m         0
1834      fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S12
1835      nop.i         0
1836};;
1837{ .mfi
1838      nop.m         0
1839      fnma.s1       FR_C21 = FR_C21,FR_CN,f0
1840      nop.i         0
1841};;
1842{ .mfi
1843      nop.m         0
1844      fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54 // log(x)
1845      nop.i         0
1846};;
1847{ .mfi
1848      nop.m         0
1849      fma.s1        FR_LnXf = FR_TpNxLn2f,f1,FR_P54f // log({x})
1850      nop.i         0
1851};;
1852{ .mfi
1853      nop.m         0
1854      fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S4
1855      nop.i         0
1856};;
1857{ .mfi
1858      nop.m         0
1859      fma.s1        FR_LnX = FR_LnX,f1,FR_LnXf
1860      nop.i         0
1861};;
1862{ .mfi
1863      nop.m         0
1864      fnma.s1       FR_S28 = FR_S28,FR_Xf2,FR_C21
1865      nop.i         0
1866};;
1867{ .mfb
1868      nop.m         0
1869      fms.d.s0      f8 = FR_S28,f1,FR_LnX
1870      br.ret.sptk   b0
1871};;
1872// branch for calculating of ln(GAMMA(x)) for x >= 512
1873//---------------------------------------------------------------------
1874.align 32
1875lgamma_pstirling:
1876{ .mfi
1877      ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
1878      nop.f         0
1879      and           GR_Exp = GR_SignExp,GR_ExpMask
1880}
1881{ .mfi
1882      ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
1883      fma.s1        FR_InvX = FR_C,FR_InvX,FR_C // NR iteration #1
1884      mov           GR_ExpBias = 0xffff
1885};;
1886{ .mfi
1887      ldfe          FR_Ln2 = [GR_ad_1],16
1888      nop.f         0
1889      sub           GR_Exp = GR_Exp,GR_ExpBias
1890};;
1891{ .mfi
1892      ldfpd         FR_W4,FR_OvfBound = [GR_ad_2],16
1893      nop.f         0
1894      nop.i         0
1895};;
1896{ .mfi
1897      setf.sig      FR_int_N = GR_Exp
1898      fms.s1        FR_r = FR_C,f8,f1
1899      nop.i         0
1900};;
1901{ .mmf
1902      getf.sig      GR_Sig = FR_NormX
1903      ldfe          FR_LnSqrt2Pi = [GR_ad_1],16
1904      nop.f         0
1905};;
1906{ .mmf
1907      ldfe          FR_W2 = [GR_ad_2],16
1908      nop.m         0
1909      fnma.s1       FR_InvX2 = FR_InvX,FR_NormX,f1 // NR iteration #2
1910};;
1911{ .mfi
1912      add           GR_ad_2 = 0x40,GR_ad_2
1913      nop.f         0
1914      shl           GR_Ind = GR_Sig,1
1915};;
1916{ .mfi
1917      mov           GR_SignOfGamma = 1
1918      nop.f         0
1919      shr.u         GR_Ind = GR_Ind,56
1920};;
1921{ .mfi
1922      shladd        GR_ad_2 = GR_Ind,4,GR_ad_2
1923      fma.s1        FR_r2 = FR_r,FR_r,f0
1924      // set p9  if signgum is 32-bit int
1925      // set p10 if signgum is 64-bit int
1926      cmp.eq        p10,p9 = 8,r34
1927};;
1928{ .mfi
1929      ldfe          FR_T = [GR_ad_2]
1930      fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
1931      nop.i         0
1932}
1933{ .mfi
1934      nop.m         0
1935      fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
1936      nop.i         0
1937};;
1938{ .mfi
1939      nop.m         0
1940      fcmp.le.s1    p6,p0 = FR_OvfBound,FR_NormX
1941      nop.i         0
1942}
1943{ .mfi
1944      nop.m         0
1945      fma.s1        FR_InvX2 = FR_InvX,FR_InvX2,FR_InvX // NR iteration #2
1946      nop.i         0
1947};;
1948{ .mfi
1949      nop.m         0
1950      fcvt.xf       FR_N = FR_int_N
1951      nop.i         0
1952}
1953{ .mfb
1954      nop.m         0
1955      nop.f         0
1956      // jump if x is great than OVERFLOW_BOUNDARY
1957(p6)  br.cond.spnt  lgamma_overflow
1958};;
1959.pred.rel "mutex",p9,p10
1960{ .mfi
1961      // store sign of gamma(x) as 32-bit int
1962(p9)  st4           [r33] = GR_SignOfGamma
1963      fma.s1        FR_r3 = FR_r2,FR_r,f0
1964      nop.i         0
1965}
1966{ .mfi
1967      // store sign of gamma(x) as 64-bit int
1968(p10) st8           [r33] = GR_SignOfGamma
1969      fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
1970      nop.i         0
1971};;
1972{ .mfi
1973      nop.m         0
1974      fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
1975      nop.i         0
1976};;
1977{ .mfi
1978      nop.m         0
1979      fnma.s1       FR_InvX = FR_InvX2,FR_NormX,f1 // NR iteration #3
1980      nop.i         0
1981};;
1982{ .mfi
1983      nop.m         0
1984      fms.s1        FR_Xm05 = FR_NormX,f1,FR_05 // (x-1/2)
1985      nop.i         0
1986};;
1987{ .mfi
1988      nop.m         0
1989      fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
1990      nop.i         0
1991};;
1992{ .mfi
1993      nop.m         0
1994      fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
1995      nop.i         0
1996};;
1997{ .mfi
1998      nop.m         0
1999      fma.s1        FR_InvX = FR_InvX2,FR_InvX,FR_InvX2 // NR iteration #3
2000      nop.i         0
2001}
2002{ .mfi
2003      nop.m         0
2004      fms.s1        FR_LnSqrt2Pi = FR_LnSqrt2Pi,f1,FR_NormX // ln(sqrt(2*Pi))-x
2005      nop.i         0
2006};;
2007{ .mfi
2008      nop.m         0
2009      fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
2010      nop.i         0
2011};;
2012{ .mfi
2013      nop.m         0
2014      fma.s1        FR_InvX2 = FR_InvX,FR_InvX,f0
2015      nop.i         0
2016};;
2017{ .mfi
2018      nop.m         0
2019      // (x-1/2)*ln(x)+ln(sqrt(2*Pi))-x
2020      fma.s1        FR_LnX = FR_LnX,FR_Xm05,FR_LnSqrt2Pi
2021      nop.i         0
2022};;
2023{ .mfi
2024      nop.m         0
2025      fma.s1        FR_W2 = FR_W4,FR_InvX2,FR_W2 // W2 + W4/x^2
2026      nop.i         0
2027};;
2028{ .mfb
2029      nop.m         0
2030      fma.d.s0      f8 = FR_InvX,FR_W2,FR_LnX
2031      br.ret.sptk   b0
2032};;
2033// branch for calculating of ln(GAMMA(x)) for x < -512
2034//---------------------------------------------------------------------
2035.align 32
2036lgamma_negstirling:
2037{ .mfi
2038      ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
2039      fms.s1        FR_Xf = FR_NormX,f1,FR_N  // xf = x - [x]
2040      and           GR_Exp = GR_SignExp,GR_ExpMask
2041}
2042{ .mfi
2043      ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
2044      fma.s1        FR_InvX = FR_C,FR_InvX,FR_C // NR iteration #1
2045      mov           GR_0x30033 = 0x30033
2046};;
2047{ .mfi
2048      ldfe          FR_Ln2 = [GR_ad_1],16
2049      nop.f         0
2050      extr.u        GR_Ind = GR_ArgAsIs,44,8
2051}
2052{ .mib
2053      ldfd          FR_W4 = [GR_ad_2],16
2054      // jump if x is less or equal to -2^52, i.e. x is big negative integer
2055      cmp.leu.unc   p7,p0 = GR_0x30033,GR_SignExp
2056(p7)  br.cond.spnt  lgamma_singularity
2057};;
2058{ .mfi
2059      ldfpd         FR_S28,FR_S26 = [GR_ad_Co7],16
2060      nop.f         0
2061      add           GR_ad_LnT = 0x50,GR_ad_2
2062}
2063{ .mfi
2064      ldfpd         FR_S24,FR_S22 = [GR_ad_Ce7],16
2065      nop.f         0
2066      mov           GR_ExpBias = 0xffff
2067};;
2068{ .mfi
2069      ldfpd         FR_S20,FR_S18 = [GR_ad_Co7],16
2070      nop.f         0
2071      shladd        GR_ad_T = GR_Ind,4,GR_ad_LnT
2072}
2073{ .mfi
2074      ldfpd         FR_S16,FR_S14 = [GR_ad_Ce7],16
2075      nop.f         0
2076      sub           GR_Exp = GR_Exp,GR_ExpBias
2077};;
2078{ .mfi
2079      ldfe          FR_S12 = [GR_ad_Co7],16
2080      nop.f         0
2081      nop.i         0
2082}
2083{ .mfi
2084      ldfe          FR_S10 = [GR_ad_Ce7],16
2085      fms.s1        FR_r = FR_C,f8,f1
2086      nop.i         0
2087};;
2088{ .mmf
2089      ldfe          FR_S8 = [GR_ad_Co7],16
2090      ldfe          FR_S6 = [GR_ad_Ce7],16
2091      nop.f         0
2092};;
2093{ .mfi
2094      ldfe          FR_S4 = [GR_ad_Co7],16
2095      fma.s1        FR_Xf2 = FR_Xf,FR_Xf,f0
2096      nop.i         0
2097}
2098{ .mfi
2099      ldfe          FR_S2 = [GR_ad_Ce7],16
2100      fnma.s1       FR_InvX2 = FR_InvX,FR_NormX,f1 // NR iteration #2
2101      nop.i         0
2102};;
2103{ .mfi
2104      setf.sig      FR_int_N = GR_Exp
2105      frcpa.s1      FR_InvXf,p9 = f1,FR_Xf // 1/xf
2106      nop.i         0
2107}
2108{ .mfi
2109      ldfe          FR_LnSqrt2Pi = [GR_ad_1],16
2110      nop.f         0
2111      nop.i         0
2112};;
2113{ .mfi
2114      getf.exp      GR_SignExp = FR_Xf
2115      nop.f         0
2116      nop.i         0
2117}
2118{ .mfi
2119      ldfe          FR_W2 = [GR_ad_2],16
2120      nop.f         0
2121      nop.i         0
2122};;
2123{ .mfi
2124      getf.sig      GR_Sig = FR_Xf
2125      fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
2126      nop.i         0
2127}
2128{ .mfi
2129      ldfe          FR_T = [GR_ad_T]
2130      fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
2131      nop.i         0
2132};;
2133{ .mfi
2134      and           GR_Exp = GR_SignExp,GR_ExpMask
2135      fma.s1        FR_r2 = FR_r,FR_r,f0
2136      nop.i         0
2137}
2138{ .mfi
2139      nop.m         0
2140      fms.s1        FR_Xm05 = FR_NormX,f1,FR_05 // (x-1/2)
2141      nop.i         0
2142};;
2143{ .mfi
2144      nop.m         0
2145      fma.s1        FR_InvX2 = FR_InvX,FR_InvX2,FR_InvX // NR iteration #2
2146      extr.u        GR_Ind = GR_Sig,55,8
2147}
2148{ .mfi
2149      sub           GR_Exp = GR_Exp,GR_ExpBias
2150      fma.s1        FR_Xf4 = FR_Xf2,FR_Xf2,f0
2151      cmp.eq        p6,p0 = 0,GR_Sig
2152};;
2153{ .mfi
2154      setf.sig      FR_int_Nf = GR_Exp
2155      fma.s1        FR_S28 = FR_S28,FR_Xf2,FR_S26
2156      shladd        GR_ad_T = GR_Ind,4,GR_ad_LnT
2157}
2158{ .mfb
2159      nop.m         0
2160      fma.s1        FR_S24 = FR_S24,FR_Xf2,FR_S22
2161      // jump if the input argument is integer number from range (-512.0;-17.0]
2162(p6)  br.cond.spnt  lgamma_singularity
2163};;
2164{ .mfi
2165      getf.sig      GR_Sig = FR_int_Ntrunc
2166      fma.s1        FR_S20 = FR_S20,FR_Xf2,FR_S18
2167      nop.i         0
2168}
2169{ .mfi
2170      nop.m         0
2171      fma.s1        FR_S16 = FR_S16,FR_Xf2,FR_S14
2172      nop.i         0
2173};;
2174{ .mfi
2175      ldfe          FR_Tf = [GR_ad_T]
2176      fma.s1        FR_S12 = FR_S12,FR_Xf2,FR_S10
2177      nop.i         0
2178}
2179{ .mfi
2180      nop.m         0
2181      fma.s1        FR_S8 = FR_S8,FR_Xf2,FR_S6
2182      mov           GR_SignOfGamma = 1
2183};;
2184{ .mfi
2185      nop.m         0
2186      fms.s1        FR_rf = FR_InvXf,FR_Xf,f1 // reduced arg rf
2187      tbit.z        p8,p0 = GR_Sig,0
2188}
2189{ .mfi
2190      nop.m         0
2191      fma.s1        FR_r3 = FR_r2,FR_r,f0
2192      // set p9  if signgum is 32-bit int
2193      // set p10 if signgum is 64-bit int
2194      cmp.eq        p10,p9 = 8,r34
2195};;
2196{ .mfi
2197      nop.m         0
2198      fcvt.xf       FR_N = FR_int_N
2199(p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
2200}
2201{ .mfi
2202      nop.m         0
2203      fnma.s1       FR_InvX = FR_InvX2,FR_NormX,f1 // NR iteration #3
2204      nop.i         0
2205};;
2206.pred.rel "mutex",p9,p10
2207{ .mfi
2208      // store sign of gamma(x) as 32-bit int
2209(p9)  st4           [r33] = GR_SignOfGamma
2210      fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
2211      nop.i         0
2212}
2213{ .mfi
2214      // store sign of gamma(x) as 64-bit int
2215(p10) st8           [r33] = GR_SignOfGamma
2216      fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
2217      nop.i         0
2218};;
2219{ .mfi
2220      nop.m         0
2221      fma.s1        FR_Xf8 = FR_Xf4,FR_Xf4,f0
2222      nop.i         0
2223}
2224{ .mfi
2225      nop.m         0
2226      fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S24
2227      nop.i         0
2228};;
2229{ .mfi
2230      nop.m         0
2231      fma.s1        FR_S20 = FR_S20,FR_Xf4,FR_S16
2232      nop.i         0
2233}
2234{ .mfi
2235      nop.m         0
2236      fma.s1        FR_S12 = FR_S12,FR_Xf4,FR_S8
2237      nop.i         0
2238};;
2239{ .mfi
2240      nop.m         0
2241      fma.s1        FR_rf2 = FR_rf,FR_rf,f0
2242      nop.i         0
2243}
2244{ .mfi
2245      nop.m         0
2246      fma.s1        FR_P54f = FR_P5,FR_rf,FR_P4
2247      nop.i         0
2248};;
2249{ .mfi
2250      nop.m         0
2251      fma.s1        FR_P32f = FR_P3,FR_rf,FR_P2
2252      nop.i         0
2253}
2254{ .mfi
2255      nop.m         0
2256      fma.s1        FR_InvX = FR_InvX2,FR_InvX,FR_InvX2 // NR iteration #3
2257      nop.i         0
2258};;
2259{ .mfi
2260      nop.m         0
2261      fcvt.xf       FR_Nf = FR_int_Nf
2262      nop.i         0
2263}
2264{ .mfi
2265      nop.m         0
2266      fma.s1        FR_LnSqrt2Pi = FR_NormX,f1,FR_LnSqrt2Pi // x+ln(sqrt(2*Pi))
2267      nop.i         0
2268};;
2269{ .mfi
2270      nop.m         0
2271      fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
2272      nop.i         0
2273};;
2274{ .mfi
2275      nop.m         0
2276      fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S20
2277      nop.i         0
2278};;
2279{ .mfi
2280      nop.m         0
2281      fma.s1        FR_rf3 = FR_rf2,FR_rf,f0
2282      nop.i         0
2283}
2284{ .mfi
2285      nop.m         0
2286      fnma.s1       FR_P10f = FR_rf2,FR_05,FR_rf
2287      nop.i         0
2288};;
2289{ .mfi
2290      nop.m         0
2291      fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
2292      nop.i         0
2293}
2294{ .mfi
2295      nop.m         0
2296      fma.s1        FR_P54f = FR_P54f,FR_rf2,FR_P32f
2297      nop.i         0
2298};;
2299{ .mfi
2300      nop.m         0
2301      fma.s1        FR_InvX2 = FR_InvX,FR_InvX,f0
2302      nop.i         0
2303};;
2304{ .mfi
2305      nop.m         0
2306      fma.s1        FR_S28 = FR_S28,FR_Xf8,FR_S12
2307      nop.i         0
2308}
2309{ .mfi
2310      nop.m         0
2311      fma.s1        FR_S4 = FR_S4,FR_Xf2,FR_S2
2312      nop.i         0
2313};;
2314{ .mfi
2315      nop.m         0
2316      fma.s1        FR_P54f = FR_P54f,FR_rf3,FR_P10f
2317      nop.i         0
2318}
2319{ .mfi
2320      nop.m         0
2321      fma.s1        FR_TpNxLn2f = FR_Nf,FR_Ln2,FR_Tf
2322      nop.i         0
2323};;
2324{ .mfi
2325      nop.m         0
2326      fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
2327      nop.i         0
2328}
2329{ .mfi
2330      nop.m         0
2331      fma.s1        FR_W2 = FR_W4,FR_InvX2,FR_W2
2332      nop.i         0
2333};;
2334{ .mfi
2335      nop.m         0
2336      fma.s1        FR_S28 = FR_S28,FR_Xf4,FR_S4
2337      nop.i         0
2338};;
2339{ .mfi
2340      nop.m         0
2341      fma.s1        FR_LnXf = FR_TpNxLn2f,f1,FR_P54f
2342      nop.i         0
2343};;
2344{ .mfi
2345      nop.m         0
2346      fms.s1        FR_LnX = FR_LnX,FR_Xm05,FR_LnSqrt2Pi
2347      nop.i         0
2348};;
2349{ .mfi
2350      nop.m         0
2351      fma.s1        FR_LnX = FR_InvX,FR_W2,FR_LnX
2352      nop.i         0
2353};;
2354{ .mfi
2355      nop.m         0
2356      fnma.s1       FR_LnX = FR_S28,FR_Xf2,FR_LnX
2357      nop.i         0
2358};;
2359{ .mfb
2360      nop.m         0
2361      fms.d.s0      f8 = FR_LnX,f1,FR_LnXf
2362      br.ret.sptk   b0
2363};;
2364// branch for calculating of ln(GAMMA(x)) for 0 <= x < 1
2365//---------------------------------------------------------------------
2366.align 32
2367lgamma_0_1:
2368{ .mfi
2369      ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
2370      fms.s1        FR_x = FR_NormX,f1,f0 // x
2371      mov           GR_Arg025 = 0x3FD0
2372}
2373{ .mfi
2374      ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
2375      nop.f         0
2376      add           GR_ad_Co = 0x1C40,GR_ad_Data
2377};;
2378{ .mfi
2379      ldfe          FR_Ln2 = [GR_ad_1],0x50
2380      nop.f         0
2381      // p6 if arg < 0.25
2382      cmp.lt        p6,p9 = GR_Arg,GR_Arg025
2383}
2384{ .mfi
2385      add           GR_ad_2 = 0x40,GR_ad_2
2386      nop.f         0
2387      mov           GR_Arg075 = 0x3FE8
2388};;
2389{ .mfi
2390      ldfpd         FR_Q8,FR_Q7 = [GR_ad_1],16
2391      fma.s1        FR_w2 = FR_w,FR_w,f0
2392      // p7 if 0.25 <= arg < 0.75
2393      // p8 if 0.75 <= arg < 1.0
2394(p9)  cmp.lt.unc    p7,p8 = GR_Arg,GR_Arg075
2395}
2396{ .mfi
2397      mov           GR_Arg0875 = 0x3FEC
2398      nop.f         0
2399      sub           GR_Exp = GR_Exp,GR_ExpBias
2400};;
2401{ .mfi
2402      ldfpd         FR_Q6,FR_Q5 = [GR_ad_2],16
2403      nop.f         0
2404(p8)  cmp.lt        p9,p0 = GR_Arg,GR_Arg0875
2405}
2406{ .mfi
2407      ldfpd         FR_Q4,FR_Q3 = [GR_ad_1],16
2408      nop.f         0
2409      add           GR_ad_Ce = 0x60,GR_ad_Co
2410};;
2411.pred.rel "mutex",p7,p8
2412{ .mfi
2413      ldfd          FR_Q2 = [GR_ad_2],16
2414      fms.s1        FR_r = FR_C,f8,f1
2415(p7)  mov           GR_Offs = 0xC0
2416}
2417{ .mfi
2418      setf.sig      FR_int_N = GR_Exp
2419      nop.f         0
2420(p8)  mov           GR_Offs = 0x180
2421};;
2422.pred.rel "mutex",p6,p7
2423{ .mfi
2424(p9)  add           GR_ad_Co = GR_Offs,GR_ad_Co
2425(p8)  fms.s1        FR_x = FR_NormX,f1,f1 // x-1
2426      nop.i         0
2427}
2428{ .mfi
2429(p9)  add           GR_ad_Ce = GR_Offs,GR_ad_Ce
2430(p7)  fms.s1        FR_x = FR_NormX,f1,FR_LocalMin // x-LocalMin
2431      cmp.lt        p10,p0 = GR_Arg,GR_Arg0875
2432};;
2433lgamma_common_0_2:
2434{ .mfi
2435      ldfpd         FR_A17,FR_A16 = [GR_ad_Co],16
2436      nop.f         0
2437      nop.i         0
2438}
2439{ .mfi
2440      ldfpd         FR_A15,FR_A14 = [GR_ad_Ce],16
2441      nop.f         0
2442      nop.i         0
2443};;
2444{ .mfi
2445      ldfpd         FR_A13,FR_A12 = [GR_ad_Co],16
2446      nop.f         0
2447(p10) extr.u        GR_Ind = GR_ArgAsIs,44,8
2448}
2449{ .mfi
2450      ldfpd         FR_A11,FR_A10 = [GR_ad_Ce],16
2451      nop.f         0
2452      nop.i         0
2453};;
2454{ .mfi
2455      ldfpd         FR_A9,FR_A8 = [GR_ad_Co],16
2456(p10) fnma.s1       FR_Q1 = FR_05,FR_w2,FR_w
2457      nop.i         0
2458}
2459{ .mfi
2460      ldfpd         FR_A7,FR_A6 = [GR_ad_Ce],16
2461(p10) fma.s1        FR_w3 = FR_w2,FR_w,f0
2462      nop.i         0
2463};;
2464{ .mfi
2465(p10) getf.exp      GR_SignExp_w = FR_w
2466(p10) fma.s1        FR_w4 = FR_w2,FR_w2,f0
2467      nop.i         0
2468}
2469{ .mfi
2470(p10) shladd        GR_ad_2 = GR_Ind,4,GR_ad_2
2471(p10) fma.s1        FR_r2 = FR_r,FR_r,f0
2472      nop.i         0
2473};;
2474{ .mfi
2475(p10) ldfe          FR_T = [GR_ad_2]
2476(p10) fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
2477      nop.i         0
2478}
2479{ .mfi
2480      ldfe          FR_A5 = [GR_ad_Co],16
2481(p10) fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
2482      nop.i         0
2483};;
2484{ .mfi
2485      ldfe          FR_A4 = [GR_ad_Ce],16
2486      fma.s1        FR_x2 = FR_x,FR_x,f0
2487(p10) and           GR_Exp_w = GR_ExpMask, GR_SignExp_w
2488}
2489{ .mfi
2490      ldfe          FR_A3 = [GR_ad_Co],16
2491      nop.f         0
2492(p10) mov           GR_fff9 = 0xfff9
2493};;
2494//    p13 <== large w __libm_lgamma
2495//    p14 <== small w __libm_lgamma
2496{ .mfi
2497      ldfe          FR_A2 = [GR_ad_Ce],16
2498(p10) fma.s1        FR_Q8 = FR_Q8,FR_w,FR_Q7
2499(p10) cmp.ge.unc    p13,p14 = GR_Exp_w,GR_fff9
2500}
2501{ .mfi
2502      ldfe          FR_A1 = [GR_ad_Co],16
2503(p10) fma.s1        FR_Q6 = FR_Q6,FR_w,FR_Q5
2504      nop.i         0
2505};;
2506{ .mfi
2507      ldfe          FR_A0 = [GR_ad_Ce],16
2508(p10) fma.s1        FR_Q4 = FR_Q4,FR_w,FR_Q3
2509      nop.i         0
2510}
2511{ .mfi
2512      nop.m         0
2513(p10) fma.s1        FR_Q2 = FR_Q2,FR_w3,FR_Q1
2514      nop.i         0
2515};;
2516{ .mfi
2517      // set p11 if signgum is 32-bit int
2518      // set p12 if signgum is 64-bit int
2519      cmp.eq        p12,p11 = 8,r34
2520(p10) fma.s1        FR_r3 = FR_r2,FR_r,f0
2521      nop.i         0
2522}
2523{ .mfi
2524      nop.m         0
2525(p10) fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
2526      mov           GR_SignOfGamma = 1
2527};;
2528.pred.rel "mutex",p11,p12
2529{ .mfi
2530      // store sign of gamma(x) as 32-bit int
2531(p11) st4           [r33] = GR_SignOfGamma
2532      fma.s1        FR_A17 = FR_A17,FR_x,FR_A16
2533      nop.i         0
2534}
2535{ .mfi
2536      // store sign of gamma(x) as 64-bit int
2537(p12) st8           [r33] = GR_SignOfGamma
2538      fma.s1        FR_A15 = FR_A15,FR_x,FR_A14
2539      nop.i         0
2540};;
2541{ .mfi
2542      nop.m         0
2543(p10) fcvt.xf       FR_N = FR_int_N
2544      nop.i         0
2545}
2546{ .mfi
2547      nop.m         0
2548(p10) fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
2549      nop.i         0
2550};;
2551{ .mfi
2552      nop.m         0
2553      fma.s1        FR_A13 = FR_A13,FR_x,FR_A12
2554      nop.i         0
2555}
2556{ .mfi
2557      nop.m         0
2558      fma.s1        FR_A11 = FR_A11,FR_x,FR_A10
2559      nop.i         0
2560};;
2561{ .mfi
2562      nop.m         0
2563      fma.s1        FR_A9 = FR_A9,FR_x,FR_A8
2564      nop.i         0
2565}
2566{ .mfi
2567      nop.m         0
2568      fma.s1        FR_A7 = FR_A7,FR_x,FR_A6
2569      nop.i         0
2570};;
2571{ .mfi
2572      nop.m         0
2573(p10) fma.s1        FR_Qlo = FR_Q8,FR_w2,FR_Q6
2574      nop.i         0
2575}
2576{ .mfi
2577      nop.m         0
2578(p10) fma.s1        FR_w6 = FR_w3,FR_w3,f0
2579      nop.i         0
2580};;
2581{ .mfi
2582      nop.m         0
2583(p10) fma.s1        FR_Qhi = FR_Q4,FR_w4,FR_Q2
2584      nop.i         0
2585}
2586{ .mfi
2587      nop.m         0
2588      fma.s1        FR_A5 = FR_A5,FR_x,FR_A4
2589      nop.i         0
2590};;
2591{ .mfi
2592      nop.m         0
2593(p10) fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
2594      nop.i         0
2595}
2596{ .mfi
2597      nop.m         0
2598      fma.s1        FR_A3 = FR_A3,FR_x,FR_A2
2599      nop.i         0
2600};;
2601{ .mfi
2602      nop.m         0
2603(p10) fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
2604      nop.i         0
2605}
2606{ .mfi
2607      nop.m         0
2608      fma.s1        FR_A1 = FR_A1,FR_x,FR_A0
2609      nop.i         0
2610};;
2611{ .mfi
2612      nop.m         0
2613      fma.s1        FR_A17 = FR_A17,FR_x2,FR_A15
2614      nop.i         0
2615}
2616{ .mfi
2617      nop.m         0
2618      fma.s1        FR_A13 = FR_A13,FR_x2,FR_A11
2619      nop.i         0
2620};;
2621{ .mfi
2622      nop.m         0
2623      fma.s1        FR_A9 = FR_A9,FR_x2,FR_A7
2624      nop.i         0
2625}
2626{ .mfi
2627      nop.m         0
2628      fma.s1        FR_x4 = FR_x2,FR_x2,f0
2629      nop.i         0
2630};;
2631{ .mfi
2632      nop.m         0
2633(p14) fma.s1        FR_LnX = FR_Qlo,FR_w6,FR_Qhi
2634      nop.i         0
2635};;
2636{ .mfi
2637      nop.m         0
2638      fma.s1        FR_A5 = FR_A5,FR_x2,FR_A3
2639      nop.i         0
2640};;
2641{ .mfi
2642      nop.m         0
2643(p13) fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
2644      nop.i         0
2645};;
2646{ .mfi
2647      nop.m         0
2648      fma.s1        FR_A17 = FR_A17,FR_x4,FR_A13
2649      nop.i         0
2650}
2651{ .mfi
2652      nop.m         0
2653      fma.s1        FR_x8 = FR_x4,FR_x4,f0
2654      nop.i         0
2655};;
2656{ .mfi
2657      nop.m         0
2658      fma.s1        FR_A9 = FR_A9,FR_x4,FR_A5
2659      nop.i         0
2660};;
2661{ .mfi
2662      nop.m         0
2663      fma.s1        FR_A17 = FR_A17,FR_x8,FR_A9
2664      nop.i         0
2665};;
2666{ .mfi
2667      nop.m         0
2668(p10) fms.s1        FR_A1 = FR_A1,f1,FR_LnX
2669      nop.i         0
2670};;
2671{ .mfb
2672      nop.m         0
2673      fma.d.s0      f8 = FR_A17,FR_x2,FR_A1
2674      br.ret.sptk   b0
2675};;
2676// branch for calculating of ln(GAMMA(x)) for 1.0 <= x < 2.25
2677//---------------------------------------------------------------------
2678.align 32
2679lgamma_1_2:
2680{ .mfi
2681      add           GR_ad_Co = 0x10B0,GR_ad_1
2682      fcmp.eq.s1    p12,p0 = f1,FR_w
2683      mov           GR_Arg125 = 0x3FF4
2684}
2685{ .mfi
2686      add           GR_ad_Ce = 0x1110,GR_ad_1
2687      nop.f         0
2688      mov           GR_Arg175 = 0x3FFC
2689};;
2690{ .mfi
2691      mov           GR_SignOfGamma = 1
2692      fcmp.eq.s1    p13,p0 = f1,FR_NormX
2693      cmp.lt        p6,p9 = GR_Arg,GR_Arg125 // 1.0 <= x < 1.25
2694}
2695{ .mfi
2696      // set p10 if signgum is 32-bit int
2697      // set p11 if signgum is 64-bit int
2698      cmp.eq        p11,p10 = 8,r34
2699      nop.f         0
2700      cmp.ge        p8,p0 = GR_Arg,GR_Arg175 // x >= 1.75
2701};;
2702.pred.rel "mutex",p10,p11
2703{ .mfi
2704      // store sign of gamma(x) as 32-bit int
2705(p10) st4           [r33] = GR_SignOfGamma
2706(p12) fma.d.s0      f8 = f0,f0,f0
2707(p9)  cmp.lt.unc    p7,p0 = GR_Arg,GR_Arg175 // 1.25 <= x < 1.75
2708}
2709{ .mib
2710      // store sign of gamma(x) as 64-bit int
2711(p11) st8           [r33] = GR_SignOfGamma
2712      mov           GR_Offs = 0
2713(p12) br.ret.spnt   b0 // fast exit for 2.0
2714};;
2715.pred.rel "mutex",p7,p8
2716{ .mfi
2717(p7)  mov           GR_Offs = 0xC0
2718(p7)  fms.s1        FR_x = FR_w,f1,FR_LocalMin
2719      nop.i         0
2720}
2721{ .mfb
2722(p8)  mov           GR_Offs = 0x180
2723(p13) fma.d.s0      f8 = f0,f0,f0
2724(p13) br.ret.spnt   b0 // fast exit for 1.0
2725};;
2726.pred.rel "mutex",p6,p8
2727{ .mfi
2728      add           GR_ad_Co = GR_ad_Co,GR_Offs
2729(p8)  fms.s1        FR_x = FR_w,f1,f1
2730      cmp.eq        p0,p10 = r0,r0
2731}
2732{ .mfb
2733      add           GR_ad_Ce = GR_ad_Ce,GR_Offs
2734(p6)  fma.s1        FR_x = f0,f0,FR_w
2735      br.cond.sptk  lgamma_common_0_2
2736};;
2737// branch for calculating of ln(GAMMA(x)) for -17 < x < 0
2738//---------------------------------------------------------------------
2739.align 32
2740lgamma_negrecursion:
2741{ .mfi
2742      getf.d        GR_ArgXfrAsIs = FR_Xfr
2743      fma.s1        FR_Xp2 = FR_2,f1,FR_NormX
2744      mov           GR_Arg05 = 0x3FE
2745}
2746{ .mfi
2747      add           GR_ad_Roots = 0x1390,GR_ad_1
2748      fma.s1        FR_NormX = FR_NormX,FR_Xfr,f0
2749      mov           GR_Arg075 = 0x3FE8
2750};;
2751{ .mfi
2752      getf.sig      GR_Sig = FR_int_Ntrunc
2753      fma.s1        FR_Xp3 = FR_2,f1,FR_Xp1
2754      shl           GR_Arg05 = GR_Arg05,52
2755}
2756{ .mfi
2757      mov           GR_Arg025 = 0x3FD0
2758      fma.s1        FR_Xp6 = FR_5,f1,FR_Xp1
2759      add           GR_ad_Co = 0x1C40,GR_ad_Data
2760};;
2761{ .mfi
2762      add           GR_ad_Dx = 8,GR_ad_Roots
2763      fma.s1        FR_Xp7 = FR_2,f1,FR_Xp5
2764      shr.u         GR_ArgXfr = GR_ArgXfrAsIs,48
2765}
2766{ .mfi
2767      add           GR_ad_Ce = 0x60,GR_ad_Co
2768      fma.s1        FR_Xp8 = FR_3,f1,FR_Xp5
2769      cmp.lt        p6,p0 = GR_ArgXfrAsIs,GR_Arg05
2770};;
2771{ .mfi
2772      and           GR_RootInd = 0xF,GR_Sig
2773      fma.s1        FR_Xp9 = FR_2,FR_2,FR_Xp5
2774      // p10 if arg < 0.25
2775      cmp.lt        p10,p14 = GR_ArgXfr,GR_Arg025
2776}
2777{ .mfi
2778(p6)  add           GR_ad_Roots = 0x120,GR_ad_Roots
2779      fma.s1        FR_Xp11 = f1,f1,FR_Xp10
2780(p6)  add           GR_ad_Dx = 0x120,GR_ad_Dx
2781};;
2782{ .mfi
2783      shladd        GR_ad_Root = GR_RootInd,4,GR_ad_Roots
2784      fma.s1        FR_Xp12 = FR_2,f1,FR_Xp10
2785      // p11 if 0.25 <= arg < 0.75
2786      // p12 if 0.75 <= arg < 1.0
2787(p14) cmp.lt.unc    p11,p12 = GR_ArgXfr,GR_Arg075
2788}
2789{ .mfi
2790      shladd        GR_ad_Dx = GR_RootInd,4,GR_ad_Dx
2791      fma.s1        FR_Xp13 = FR_3,f1,FR_Xp10
2792      cmp.eq        p0,p13 = 0,GR_Sig
2793};;
2794{ .mfi
2795      ld8           GR_Root = [GR_ad_Root]
2796      fma.s1        FR_Xp14 = FR_2,FR_2,FR_Xp10
2797(p12) mov           GR_Offs = 0x180
2798}
2799{ .mfi
2800      ldfd          FR_Root = [GR_ad_Root]
2801      fma.s1        FR_Xp15 = FR_5,f1,FR_Xp10
2802      and           GR_Sig = 0xF,GR_Sig
2803};;
2804{ .mfi
2805      ld8           GR_Dx = [GR_ad_Dx]
2806      fma.s1        FR_Xp16 = FR_3,FR_2,FR_Xp10
2807(p13) cmp.ge.unc    p6,p0 = 0xD,GR_Sig
2808}
2809{ .mfi
2810(p11) mov           GR_Offs = 0xC0
2811(p13) fma.s1        FR_NormX = FR_NormX,FR_Xp1,f0
2812(p13) cmp.ge.unc    p7,p0 = 0xB,GR_Sig
2813};;
2814{ .mfi
2815(p14) add           GR_ad_Co = GR_Offs,GR_ad_Co
2816(p6)  fma.s1        FR_Xp2 = FR_Xp2,FR_Xp3,f0
2817(p13) cmp.ge.unc    p8,p0 = 0x9,GR_Sig
2818}
2819{ .mfi
2820(p14) add           GR_ad_Ce = GR_Offs,GR_ad_Ce
2821(p7)  fma.s1        FR_Xp4 = FR_Xp4,FR_Xp5,f0
2822(p13) cmp.ge.unc    p9,p0 = 0x7,GR_Sig
2823};;
2824{ .mfi
2825      ldfpd         FR_B17,FR_B16 = [GR_ad_Co],16
2826(p8)  fma.s1        FR_Xp6 = FR_Xp6,FR_Xp7,f0
2827(p13) cmp.ge.unc    p6,p0 = 0x5,GR_Sig
2828}
2829{ .mfi
2830      ldfpd         FR_B15,FR_B14 = [GR_ad_Ce],16
2831(p9)  fma.s1        FR_Xp8 = FR_Xp8,FR_Xp9,f0
2832(p13) cmp.ge.unc    p7,p0 = 0x3,GR_Sig
2833};;
2834{ .mfi
2835      ldfpd         FR_B13,FR_B12 = [GR_ad_Co],16
2836(p6)  fma.s1        FR_Xp10 = FR_Xp10,FR_Xp11,f0
2837(p13) cmp.ge.unc    p8,p0 = 0x1,GR_Sig
2838}
2839{ .mfi
2840      ldfpd         FR_B11,FR_B10 = [GR_ad_Ce],16
2841(p7)  fma.s1        FR_Xp12 = FR_Xp12,FR_Xp13,f0
2842(p13) cmp.eq.unc    p9,p0 = 0,GR_Sig
2843};;
2844{ .mfi
2845      ldfpd         FR_B9,FR_B8 = [GR_ad_Co],16
2846(p8)  fma.s1        FR_Xp14 = FR_Xp14,FR_Xp15,f0
2847      mov           GR_Arg15 = 0xC02E // -15
2848}
2849{ .mfi
2850      ldfpd         FR_B7,FR_B6 = [GR_ad_Ce],16
2851      fcmp.eq.s1    p15,p0 = f0,FR_Xf
2852(p13) cmp.ge.unc    p6,p0 = 0xC,GR_Sig
2853};;
2854{ .mfi
2855      ldfe          FR_B5 = [GR_ad_Co],16
2856(p9)  fma.s1        FR_NormX = FR_NormX,FR_Xp16,f0
2857      sub           GR_Root = GR_ArgAsIs,GR_Root
2858}
2859{ .mfi
2860      sub           GR_RootInd = 0xE,GR_RootInd
2861(p11) fms.s1        FR_x = FR_Xfr,f1,FR_LocalMin // x-LocalMin
2862(p13) cmp.ge.unc    p7,p0 = 0x8,GR_Sig
2863};;
2864.pred.rel "mutex",p10,p12
2865{ .mfi
2866      ldfe          FR_B4 = [GR_ad_Ce],16
2867(p10) fms.s1        FR_x = FR_Xfr,f1,f0 // x
2868      add           GR_Root = GR_Root,GR_Dx
2869}
2870{ .mfb
2871      cmp.gtu       p14,p0 = 0xE,GR_RootInd
2872(p12) fms.s1        FR_x = FR_Xfr,f1,f1 // x-1
2873(p15) br.cond.spnt  lgamma_singularity
2874};;
2875{ .mfi
2876      ldfe          FR_B3 = [GR_ad_Co],16
2877(p6)  fma.s1        FR_Xp2 = FR_Xp2,FR_Xp4,f0
2878(p14) cmp.lt.unc    p11,p0 = GR_Arg,GR_Arg15
2879}
2880{ .mfi
2881      ldfe          FR_B2 = [GR_ad_Ce],16
2882(p7)  fma.s1        FR_Xp6 = FR_Xp6,FR_Xp8,f0
2883      add           GR_2xDx = GR_Dx,GR_Dx
2884};;
2885{ .mfi
2886      ldfe          FR_B1 = [GR_ad_Co],16
2887      fms.s1        FR_r = f8,f1,FR_Root
2888(p13) cmp.ge.unc    p6,p0 = 0x4,GR_Sig
2889}
2890{ .mib
2891      ldfe          FR_B0 = [GR_ad_Ce],16
2892(p11) cmp.leu.unc   p10,p0 = GR_Root,GR_2xDx
2893(p10) br.cond.spnt  lgamma_negroots
2894};;
2895{ .mfi
2896      ldfpd         FR_P5,FR_P4 = [GR_ad_1],16
2897(p6)  fma.s1        FR_Xp10 = FR_Xp10,FR_Xp12,f0
2898      tbit.z        p14,p15 = GR_Sig,0
2899}
2900{ .mfi
2901      ldfpd         FR_P3,FR_P2 = [GR_ad_2],16
2902      fnma.d.s0     FR_T = f1,f1,f8 //      nop.f         0
2903
2904(p13) cmp.ge.unc    p7,p0 = 0x2,GR_Sig
2905};;
2906{ .mfi
2907      ldfe          FR_Ln2 = [GR_ad_1],0x50
2908(p7)  fma.s1        FR_NormX = FR_NormX,FR_Xp14,f0
2909      mov           GR_PseudoRoot = 0xBFFBC
2910}
2911{ .mlx
2912      add           GR_ad_2 = 0x40,GR_ad_2
2913      movl          GR_2xDx = 0x00002346DC5D6389
2914};;
2915{ .mfi
2916      ldfpd         FR_Q8,FR_Q7 = [GR_ad_1],16
2917      fma.s1        FR_x2 = FR_x,FR_x,f0
2918      shl           GR_PseudoRoot = GR_PseudoRoot,44
2919}
2920{ .mfi
2921      ldfpd         FR_Q6,FR_Q5 = [GR_ad_2],16
2922      fma.s1        FR_B17 = FR_B17,FR_x,FR_B16
2923(p13) cmp.ge.unc    p6,p0 = 0xA,GR_Sig
2924};;
2925{ .mfi
2926      ldfpd         FR_Q4,FR_Q3 = [GR_ad_1],16
2927(p6)  fma.s1        FR_Xp2 = FR_Xp2,FR_Xp6,f0
2928      sub           GR_PseudoRoot = GR_ArgAsIs,GR_PseudoRoot
2929}
2930{ .mfi
2931      ldfpd         FR_Q2,FR_Q1 = [GR_ad_2],16
2932      fma.s1        FR_B15 = FR_B15,FR_x,FR_B14
2933(p13) cmp.ge.unc    p7,p0 = 0x6,GR_Sig
2934};;
2935{ .mfi
2936      add           GR_ad_Co = 0x12F0,GR_ad_2
2937      fma.s1        FR_B13 = FR_B13,FR_x,FR_B12
2938      cmp.leu.unc   p10,p0 = GR_PseudoRoot,GR_2xDx
2939}
2940{ .mfi
2941      add           GR_ad_Ce = 0x1300,GR_ad_2
2942      fma.s1        FR_B11 = FR_B11,FR_x,FR_B10
2943      mov           GR_ExpMask = 0x1ffff
2944};;
2945{ .mfi
2946(p10) ldfe          FR_PR01 = [GR_ad_Co],0xF0
2947      fma.s1        FR_B9 = FR_B9,FR_x,FR_B8
2948      mov           GR_ExpBias = 0xFFFF
2949}
2950{ .mfb
2951(p10) ldfe          FR_PR11 = [GR_ad_Ce],0xF0
2952      fma.s1        FR_B7 = FR_B7,FR_x,FR_B6
2953(p10) br.cond.spnt  lgamma_pseudoroot
2954};;
2955{ .mfi
2956(p13) cmp.ge.unc    p6,p0 = 0xE,GR_Sig
2957(p7)  fma.s1        FR_NormX = FR_NormX,FR_Xp10,f0
2958      tbit.z.unc    p8,p0 = GR_Sig,0
2959}
2960{ .mfi
2961      mov           GR_SignOfGamma = 1
2962      fma.s1        FR_B5 = FR_B5,FR_x,FR_B4
2963      // set p9  if signgum is 32-bit int
2964      // set p10 if signgum is 64-bit int
2965      cmp.eq        p10,p9 = 8,r34
2966};;
2967{ .mfi
2968      nop.m         0
2969      fma.s1        FR_B3 = FR_B3,FR_x,FR_B2
2970(p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
2971}
2972{ .mfi
2973      nop.m         0
2974(p14) fms.s1        FR_w = f0,f0,f1
2975      nop.i         0
2976};;
2977.pred.rel "mutex",p9,p10
2978{ .mfi
2979      // store sign of gamma(x) as 32-bit int
2980(p9)  st4           [r33] = GR_SignOfGamma
2981      fma.s1        FR_B1 = FR_B1,FR_x,FR_B0
2982      nop.i         0
2983}
2984{ .mfi
2985      // store sign of gamma(x) as 64-bit int
2986(p10) st8           [r33] = GR_SignOfGamma
2987      fma.s1        FR_B17 = FR_B17,FR_x2,FR_B15
2988      nop.i         0
2989};;
2990{ .mfi
2991      nop.m         0
2992      fma.s1        FR_B13 = FR_B13,FR_x2,FR_B11
2993      nop.i         0
2994};;
2995{ .mfi
2996      nop.m         0
2997      fma.s1        FR_B9 = FR_B9,FR_x2,FR_B7
2998      nop.i         0
2999}
3000{ .mfi
3001      nop.m         0
3002      fma.s1        FR_x4 = FR_x2,FR_x2,f0
3003      nop.i         0
3004};;
3005{ .mfi
3006      nop.m         0
3007(p6)  fma.s1        FR_NormX = FR_NormX,FR_Xp2,f0
3008      nop.i         0
3009};;
3010{ .mfi
3011      nop.m         0
3012      fma.s1        FR_B5 = FR_B5,FR_x2,FR_B3
3013      nop.i         0
3014};;
3015{ .mfi
3016      nop.m         0
3017      fma.s1        FR_B17 = FR_B17,FR_x4,FR_B13
3018      nop.i         0
3019}
3020{ .mfi
3021      nop.m         0
3022      fma.s1        FR_x8 = FR_x4,FR_x4,f0
3023      nop.i         0
3024};;
3025.pred.rel "mutex",p14,p15
3026{ .mfi
3027      nop.m         0
3028(p15) fms.s1        FR_w = FR_NormX,f1,f1
3029      nop.i         0
3030}
3031{ .mfi
3032      nop.m         0
3033(p14) fnma.s1       FR_w = FR_NormX,f1,FR_w
3034      nop.i         0
3035};;
3036{ .mfi
3037      nop.m         0
3038      fma.s1        FR_B9 = FR_B9,FR_x4,FR_B5
3039      nop.i         0
3040};;
3041{ .mfi
3042      nop.m         0
3043      frcpa.s1      FR_C,p0 = f1,FR_NormX
3044      nop.i         0
3045};;
3046{ .mfi
3047      getf.exp      GR_Exp = FR_NormX
3048      nop.f         0
3049      nop.i         0
3050};;
3051{ .mfi
3052      getf.d        GR_ArgAsIs = FR_NormX
3053      nop.f         0
3054      nop.i         0
3055};;
3056{ .mfi
3057      nop.m         0
3058      fma.s1        FR_w2 = FR_w,FR_w,f0
3059      nop.i         0
3060}
3061{ .mfi
3062      and           GR_Exp = GR_Exp,GR_ExpMask
3063      fma.s1        FR_Q8 = FR_Q8,FR_w,FR_Q7
3064      nop.i         0
3065};;
3066{ .mfi
3067      sub           GR_Exp = GR_Exp,GR_ExpBias
3068      fma.s1        FR_B17 = FR_B17,FR_x8,FR_B9
3069      extr.u        GR_Ind = GR_ArgAsIs,44,8
3070}
3071{ .mfi
3072      nop.m         0
3073      fma.s1        FR_Q6 = FR_Q6,FR_w,FR_Q5
3074      nop.i         0
3075};;
3076{ .mfi
3077      setf.sig      FR_int_N = GR_Exp
3078      fms.s1        FR_r = FR_C,FR_NormX,f1
3079      nop.i         0
3080}
3081{ .mfi
3082      shladd        GR_ad_2 = GR_Ind,4,GR_ad_2
3083      nop.f         0
3084      nop.i         0
3085};;
3086{ .mfi
3087      getf.exp      GR_SignExp_w = FR_w
3088      fma.s1        FR_Q4 = FR_Q4,FR_w,FR_Q3
3089      nop.i         0
3090}
3091{ .mfi
3092      ldfe          FR_T = [GR_ad_2]
3093      nop.f         0
3094      nop.i         0
3095};;
3096{ .mfi
3097      and           GR_Exp_w = GR_ExpMask, GR_SignExp_w
3098      fnma.s1       FR_Q1 = FR_05,FR_w2,FR_w
3099      mov           GR_fff9 = 0xfff9
3100}
3101{ .mfi
3102      nop.m         0
3103      fma.s1        FR_w3 = FR_w2,FR_w,f0
3104      nop.i         0
3105};;
3106{ .mfi
3107      nop.m         0
3108      fma.s1        FR_w4 = FR_w2,FR_w2,f0
3109//    p13 <== large w __libm_lgamma
3110//    p14 <== small w __libm_lgamma
3111      cmp.ge        p13,p14 = GR_Exp_w,GR_fff9
3112}
3113{ .mfi
3114      nop.m         0
3115      fma.s1        FR_Qlo = FR_Q8,FR_w2,FR_Q6
3116      nop.i         0
3117};;
3118{ .mfi
3119      nop.m         0
3120(p13) fma.s1        FR_r2 = FR_r,FR_r,f0
3121      nop.i         0
3122}
3123{ .mfi
3124      nop.m         0
3125      fma.s1        FR_B17 = FR_B17,FR_x2,FR_B1
3126      nop.i         0
3127};;
3128{ .mfi
3129      nop.m         0
3130(p13) fma.s1        FR_P32 = FR_P3,FR_r,FR_P2
3131      nop.i         0
3132}
3133{ .mfi
3134      nop.m         0
3135(p13) fma.s1        FR_P54 = FR_P5,FR_r,FR_P4
3136      nop.i         0
3137};;
3138{ .mfi
3139      nop.m         0
3140(p14) fma.s1        FR_Q2 = FR_Q2,FR_w3,FR_Q1
3141      nop.i         0
3142}
3143{ .mfi
3144      nop.m         0
3145(p14) fma.s1        FR_w6 = FR_w3,FR_w3,f0
3146      nop.i         0
3147};;
3148{ .mfi
3149      nop.m         0
3150(p13) fcvt.xf       FR_N = FR_int_N
3151      nop.i         0
3152};;
3153{ .mfi
3154      nop.m         0
3155(p13) fma.s1        FR_r3 = FR_r2,FR_r,f0
3156      nop.i         0
3157}
3158{ .mfi
3159      nop.m         0
3160(p13) fnma.s1       FR_P10 = FR_r2,FR_05,FR_r
3161      nop.i         0
3162};;
3163{ .mfi
3164      nop.m         0
3165(p13) fma.s1        FR_P54 = FR_P54,FR_r2,FR_P32
3166      nop.i         0
3167};;
3168{ .mfi
3169      nop.m         0
3170(p14) fma.s1        FR_Qhi = FR_Q4,FR_w4,FR_Q2
3171      nop.i         0
3172}
3173{ .mfi
3174      nop.m         0
3175(p14) fnma.s1       FR_Qlo = FR_Qlo,FR_w6,FR_B17
3176      nop.i         0
3177};;
3178{ .mfi
3179      nop.m         0
3180(p13) fma.s1        FR_TpNxLn2 = FR_N,FR_Ln2,FR_T
3181      nop.i         0
3182};;
3183{ .mfi
3184      nop.m         0
3185(p13) fma.s1        FR_P54 = FR_P54,FR_r3,FR_P10
3186      nop.i         0
3187};;
3188.pred.rel "mutex",p13,p14
3189{ .mfi
3190      nop.m         0
3191(p14) fms.d.s0      f8 = FR_Qlo,f1,FR_Qhi
3192      nop.i         0
3193}
3194{ .mfi
3195      nop.m         0
3196(p13) fma.s1        FR_LnX = FR_TpNxLn2,f1,FR_P54
3197      nop.i         0
3198};;
3199{ .mfb
3200      nop.m         0
3201(p13) fms.d.s0      f8 = FR_B17,f1,FR_LnX
3202      br.ret.sptk   b0
3203};;
3204// branch for calculating of ln(GAMMA(x)) near negative roots
3205//---------------------------------------------------------------------
3206.align 32
3207lgamma_negroots:
3208{ .mfi
3209      shladd        GR_Offs = GR_RootInd,3,r0 //GR_RootInd*8
3210      fma.s1        FR_r2 = FR_r,FR_r,f0
3211      add           GR_ad_Co = 0x15C0,GR_ad_1//0x1590,GR_ad_1
3212}
3213{ .mfi
3214      add           GR_ad_Ce = 0x1610,GR_ad_1//0x15E0,GR_ad_1
3215      nop.f         0
3216      cmp.lt        p6,p0 = GR_ArgXfrAsIs,GR_Arg05
3217};;
3218{ .mfi
3219      add           GR_ad_Roots = 0x10A0,GR_ad_1
3220      nop.f         0
3221(p6)  add           GR_ad_Co = 0x820,GR_ad_Co
3222}
3223{ .mfi
3224(p6)  add           GR_ad_Ce = 0x820,GR_ad_Ce
3225      nop.f         0
3226      shladd        GR_Offs = GR_RootInd,1,GR_Offs //GR_RootInd*10
3227};;
3228{ .mmi
3229      shladd        GR_ad_Co = GR_Offs,4,GR_ad_Co
3230      shladd        GR_ad_Ce = GR_Offs,4,GR_ad_Ce
3231      cmp.eq        p8,p7 = r0,r0
3232};;
3233{ .mmi
3234      ldfpd         FR_A15,FR_A14 = [GR_ad_Co],16
3235      ldfpd         FR_A13,FR_A12 = [GR_ad_Ce],16
3236      mov           GR_SignOfGamma = 1
3237};;
3238{ .mmi
3239      ldfpd         FR_A11,FR_A10 = [GR_ad_Co],16
3240      ldfpd         FR_A9,FR_A8   = [GR_ad_Ce],16
3241(p6)  cmp.eq        p7,p8 = r0,GR_RootInd
3242};;
3243{ .mmi
3244      ldfpd         FR_A7,FR_A6 = [GR_ad_Co],16
3245      ldfpd         FR_A5,FR_A4 = [GR_ad_Ce],16
3246      tbit.z        p11,p0 = GR_Sig,0
3247};;
3248{ .mmi
3249      ldfe          FR_A3 = [GR_ad_Co],16
3250      ldfe          FR_A2 = [GR_ad_Ce],16
3251      // set p9  if signgum is 32-bit int
3252      // set p10 if signgum is 64-bit int
3253      cmp.eq        p10,p9 = 8,r34
3254};;
3255{ .mmi
3256      ldfe          FR_A1 = [GR_ad_Co],16
3257      ldfe          FR_A0 = [GR_ad_Ce],16
3258(p11) sub           GR_SignOfGamma = r0,GR_SignOfGamma
3259};;
3260{ .mfi
3261      ldfe          FR_A00 = [GR_ad_Roots]
3262      fma.s1        FR_r4 = FR_r2,FR_r2,f0
3263      nop.i         0
3264};;
3265{ .mfi
3266      nop.m         0
3267      fma.s1        FR_A15 = FR_A15,FR_r,FR_A14
3268      nop.i         0
3269}
3270{ .mfi
3271      nop.m         0
3272      fma.s1        FR_A13 = FR_A13,FR_r,FR_A12
3273      nop.i         0
3274};;
3275.pred.rel "mutex",p9,p10
3276{ .mfi
3277      // store sign of gamma(x) as 32-bit int
3278(p9)  st4           [r33] = GR_SignOfGamma
3279      fma.s1        FR_A11 = FR_A11,FR_r,FR_A10
3280      nop.i         0
3281}
3282{ .mfi
3283      // store sign of gamma(x) as 64-bit int
3284(p10) st8           [r33] = GR_SignOfGamma
3285      fma.s1        FR_A9 = FR_A9,FR_r,FR_A8
3286      nop.i         0
3287};;
3288{ .mfi
3289      nop.m         0
3290      fma.s1        FR_A7 = FR_A7,FR_r,FR_A6
3291      nop.i         0
3292}
3293{ .mfi
3294      nop.m         0
3295      fma.s1        FR_A5 = FR_A5,FR_r,FR_A4
3296      nop.i         0
3297};;
3298{ .mfi
3299      nop.m         0
3300      fma.s1        FR_A3 = FR_A3,FR_r,FR_A2
3301      nop.i         0
3302}
3303{ .mfi
3304      nop.m         0
3305      fma.s1        FR_r8 = FR_r4,FR_r4,f0
3306      nop.i         0
3307};;
3308{ .mfi
3309      nop.m         0
3310      fma.s1        FR_A1 = FR_A1,FR_r,FR_A0
3311      nop.i         0
3312};;
3313{ .mfi
3314      nop.m         0
3315      fma.s1        FR_A15 = FR_A15,FR_r2,FR_A13
3316      nop.i         0
3317};;
3318{ .mfi
3319      nop.m         0
3320      fma.s1        FR_A11 = FR_A11,FR_r2,FR_A9
3321      nop.i         0
3322};;
3323{ .mfi
3324      nop.m         0
3325      fma.s1        FR_A7 = FR_A7,FR_r2,FR_A5
3326      nop.i         0
3327};;
3328{ .mfi
3329      nop.m         0
3330      fma.s1        FR_A3 = FR_A3,FR_r2,FR_A1
3331      nop.i         0
3332};;
3333{ .mfi
3334      nop.m         0
3335      fma.s1        FR_A15 = FR_A15,FR_r4,FR_A11
3336      nop.i         0
3337};;
3338{ .mfi
3339      nop.m         0
3340      fma.s1        FR_A7 = FR_A7,FR_r4,FR_A3
3341      nop.i         0
3342};;
3343.pred.rel "mutex",p7,p8
3344{ .mfi
3345      nop.m         0
3346(p7)  fma.s1        FR_A1 = FR_A15,FR_r8,FR_A7
3347      nop.i         0
3348}
3349{ .mfi
3350      nop.m         0
3351(p8)  fma.d.s0      f8 = FR_A15,FR_r8,FR_A7
3352      nop.i         0
3353};;
3354{ .mfb
3355      nop.m         0
3356(p7)  fma.d.s0      f8 = FR_A1,FR_r,FR_A00
3357      br.ret.sptk   b0
3358};;
3359// branch for handling pseudo root on (-2;-1)
3360//---------------------------------------------------------------------
3361.align 32
3362lgamma_pseudoroot:
3363{ .mmi
3364      ldfe          FR_PR21 = [GR_ad_Co],32
3365      ldfe          FR_PR31 = [GR_ad_Ce],32
3366      // set p9  if signgum is 32-bit int
3367      // set p10 if signgum is 64-bit int
3368      cmp.eq        p10,p9 = 8,r34
3369};;
3370{ .mmi
3371      ldfe          FR_PR00 = [GR_ad_Co],32
3372      ldfe          FR_PR10 = [GR_ad_Ce],0xF0
3373      mov           GR_SignOfGamma = 1
3374};;
3375{ .mmi
3376      ldfe          FR_PR20 = [GR_ad_Co],0xF0
3377      ldfe          FR_PR30 = [GR_ad_Ce]
3378      tbit.z        p8,p0 = GR_Sig,0
3379};;
3380{ .mfi
3381      ldfe          FR_PRN = [GR_ad_Co]
3382      fma.s1        FR_PR01 = f8,f1,FR_PR01
3383      nop.i         0
3384}
3385{ .mfi
3386      nop.m         0
3387      fma.s1        FR_PR11 = f8,f1,FR_PR11
3388(p8)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
3389};;
3390.pred.rel "mutex",p9,p10
3391{ .mfi
3392      // store sign of gamma(x) as 32-bit int
3393(p9)  st4           [r33] = GR_SignOfGamma
3394      fma.s1        FR_PR21 = f8,f1,FR_PR21
3395      nop.i         0
3396}
3397{ .mfi
3398      // store sign of gamma(x) as 64-bit int
3399(p10) st8           [r33] = GR_SignOfGamma
3400      fma.s1        FR_PR31 = f8,f1,FR_PR31
3401      nop.i         0
3402};;
3403{ .mfi
3404      nop.m         0
3405      fma.s1        FR_PR01 = f8,FR_PR01,FR_PR00
3406      nop.i         0
3407}
3408{ .mfi
3409      nop.m         0
3410      fma.s1        FR_PR11 = f8,FR_PR11,FR_PR10
3411      nop.i         0
3412};;
3413{ .mfi
3414      nop.m         0
3415      fma.s1        FR_PR21 = f8,FR_PR21,FR_PR20
3416      nop.i         0
3417}
3418{ .mfi
3419      nop.m         0
3420      fma.s1        FR_PR31 = f8,FR_PR31,FR_PR30
3421      nop.i         0
3422};;
3423{ .mfi
3424      nop.m         0
3425      fma.s1        FR_PR01 = FR_PR11,FR_PR01,f0
3426      nop.i         0
3427};;
3428{ .mfi
3429      nop.m         0
3430      fma.s1        FR_PR21 = FR_PR31,FR_PR21,f0
3431      nop.i         0
3432};;
3433{ .mfi
3434      nop.m         0
3435      fma.s1        FR_PR01 = FR_PR21,FR_PR01,f0
3436      nop.i         0
3437};;
3438{ .mfb
3439      nop.m         0
3440      fma.d.s0      f8 = FR_PR01,FR_PRN,f0
3441      br.ret.sptk   b0
3442};;
3443// branch for handling +/-0, NaT, QNaN, +/-INF and denormalised numbers
3444//---------------------------------------------------------------------
3445.align 32
3446lgamma_spec:
3447{ .mfi
3448      getf.exp      GR_SignExp = FR_NormX
3449      fclass.m      p6,p0 = f8,0x21 // is arg +INF?
3450      mov           GR_SignOfGamma = 1
3451};;
3452{ .mfi
3453      getf.sig      GR_ArgAsIs = FR_NormX
3454      fclass.m      p7,p0 = f8,0xB // is x deno?
3455      // set p11 if signgum is 32-bit int
3456      // set p12 if signgum is 64-bit int
3457      cmp.eq        p12,p11 = 8,r34
3458};;
3459.pred.rel "mutex",p11,p12
3460{ .mfi
3461      // store sign of gamma(x) as 32-bit int
3462(p11) st4           [r33] = GR_SignOfGamma
3463      fclass.m      p8,p0 = f8,0x1C0 // is arg NaT or NaN?
3464      dep.z         GR_Ind = GR_SignExp,8,4
3465}
3466{ .mib
3467      // store sign of gamma(x) as 64-bit int
3468(p12) st8           [r33] = GR_SignOfGamma
3469      cmp.lt        p10,p0 = GR_SignExp,GR_ExpBias
3470(p6)  br.ret.spnt   b0 // exit for +INF
3471};;
3472{ .mfi
3473      and           GR_Exp = GR_SignExp,GR_ExpMask
3474      fclass.m      p9,p0 = f8,0x22 // is arg -INF?
3475      nop.i         0
3476};;
3477{ .mfi
3478      add           GR_ad_Co = GR_Ind,GR_ad_Data
3479(p7)  fma.s0        FR_tmp = f8,f8,f8
3480      extr.u        GR_ArgAsIs = GR_ArgAsIs,11,52
3481}
3482{ .mfb
3483      nop.m         0
3484(p8)  fms.d.s0      f8 = f8,f1,f8
3485(p8)  br.ret.spnt   b0 // exit for NaT and NaN
3486};;
3487{ .mib
3488      nop.m         0
3489      shr.u         GR_Arg = GR_ArgAsIs,48
3490(p7)  br.cond.sptk  lgamma_common
3491};;
3492{ .mfb
3493      nop.m         0
3494(p9)  fmerge.s      f8 = f1,f8
3495(p9)  br.ret.spnt   b0 // exit -INF
3496};;
3497// branch for handling negative integers and +/-0
3498//---------------------------------------------------------------------
3499.align 32
3500lgamma_singularity:
3501{ .mfi
3502      mov           GR_ad_SignGam = r33
3503      fclass.m      p6,p0 = f8, 0x6 // is x -0?
3504      mov           GR_SignOfGamma = 1
3505}
3506{ .mfi
3507      // set p9  if signgum is 32-bit int
3508      // set p10 if signgum is 64-bit int
3509      cmp.eq        p10,p9 = 8,r34
3510      fma.s1        FR_X = f0,f0,f8
3511      nop.i         0
3512};;
3513{ .mfi
3514      nop.m         0
3515      frcpa.s0      f8,p0 = f1,f0
3516      mov           GR_TAG = 106 // negative
3517}
3518{ .mib
3519      nop.m         0
3520(p6)  sub           GR_SignOfGamma = r0,GR_SignOfGamma
3521      br.cond.sptk  lgamma_libm_err
3522};;
3523// overflow (x > OVERFLOV_BOUNDARY)
3524//---------------------------------------------------------------------
3525.align 32
3526lgamma_overflow:
3527{ .mfi
3528      mov           GR_SignOfGamma = 1
3529      nop.f         0
3530      mov           r8 = 0x1FFFE
3531};;
3532{ .mfi
3533      setf.exp      f9 = r8
3534      fmerge.s      FR_X = f8,f8
3535      mov           GR_TAG = 105 // overflow
3536};;
3537{ .mfi
3538      mov           GR_ad_SignGam = r33
3539      nop.f         0
3540      // set p9  if signgum is 32-bit int
3541      // set p10 if signgum is 64-bit int
3542      cmp.eq        p10,p9 = 8,r34
3543}
3544{ .mfi
3545      nop.m         0
3546      fma.d.s0      f8 = f9,f9,f0 // Set I,O and +INF result
3547      nop.i         0
3548};;
3549//
3550//---------------------------------------------------------------------
3551.align 32
3552lgamma_libm_err:
3553{ .mmi
3554      alloc         r32 = ar.pfs,1,4,4,0
3555      mov           GR_Parameter_TAG = GR_TAG
3556      nop.i         0
3557};;
3558.pred.rel "mutex",p9,p10
3559{ .mmi
3560      // store sign of gamma(x) as 32-bit int
3561(p9)  st4           [GR_ad_SignGam] = GR_SignOfGamma
3562      // store sign of gamma(x) as 64-bit int
3563(p10) st8           [GR_ad_SignGam] = GR_SignOfGamma
3564      nop.i         0
3565};;
3566GLOBAL_LIBM_END(__libm_lgamma)
3567
3568
3569LOCAL_LIBM_ENTRY(__libm_error_region)
3570.prologue
3571{ .mfi
3572        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
3573        nop.f 0
3574.save   ar.pfs,GR_SAVE_PFS
3575        mov   GR_SAVE_PFS=ar.pfs                // Save ar.pfs
3576}
3577{ .mfi
3578.fframe 64
3579        add   sp=-64,sp                         // Create new stack
3580        nop.f 0
3581        mov   GR_SAVE_GP=gp                     // Save gp
3582};;
3583{ .mmi
3584        stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
3585        add GR_Parameter_X = 16,sp              // Parameter 1 address
3586.save   b0, GR_SAVE_B0
3587        mov GR_SAVE_B0=b0                       // Save b0
3588};;
3589.body
3590{ .mib
3591        stfd [GR_Parameter_X] = FR_X                  // STORE Parameter 1
3592                                                      // on stack
3593        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
3594        nop.b 0
3595}
3596{ .mib
3597        stfd [GR_Parameter_Y] = FR_RESULT             // STORE Parameter 3
3598                                                      // on stack
3599        add   GR_Parameter_Y = -16,GR_Parameter_Y
3600        br.call.sptk b0=__libm_error_support#         // Call error handling
3601                                                      // function
3602};;
3603{ .mmi
3604        nop.m 0
3605        nop.m 0
3606        add   GR_Parameter_RESULT = 48,sp
3607};;
3608{ .mmi
3609        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
3610.restore sp
3611        add   sp = 64,sp                       // Restore stack pointer
3612        mov   b0 = GR_SAVE_B0                  // Restore return address
3613};;
3614{ .mib
3615        mov   gp = GR_SAVE_GP                  // Restore gp
3616        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
3617        br.ret.sptk     b0                     // Return
3618};;
3619
3620LOCAL_LIBM_END(__libm_error_region)
3621.type   __libm_error_support#,@function
3622.global __libm_error_support#
3623