1/* 2 * Public domain. 3 * 4 */ 5 6/* 7 * The 8087 method for the exponential function is to calculate 8 * exp(x) = 2^(x log2(e)) 9 * after separating integer and fractional parts 10 * x log2(e) = i + f, |f| <= .5 11 * 2^i is immediate but f needs to be precise for long double accuracy. 12 * Suppress range reduction error in computing f by the following. 13 * Separate x into integer and fractional parts 14 * x = xi + xf, |xf| <= .5 15 * Separate log2(e) into the sum of an exact number c0 and small part c1. 16 * c0 + c1 = log2(e) to extra precision 17 * Then 18 * f = (c0 xi - i) + c0 xf + c1 x 19 * where c0 xi is exact and so also is (c0 xi - i). 20 * -- moshier@na-net.ornl.gov 21 */ 22 23#include <libm-alias-ldouble.h> 24#include <machine/asm.h> 25#include <i386-math-asm.h> 26#include <libm-alias-finite.h> 27 28#ifdef USE_AS_EXP10L 29# define IEEE754_EXPL __ieee754_exp10l 30# define FLDLOG fldl2t 31#elif defined USE_AS_EXPM1L 32# define IEEE754_EXPL __expm1l 33# define FLDLOG fldl2e 34#else 35# define IEEE754_EXPL __ieee754_expl 36# define FLDLOG fldl2e 37#endif 38 39 .section .rodata.cst16,"aM",@progbits,16 40 41 .p2align 4 42#ifdef USE_AS_EXP10L 43 .type c0,@object 44c0: .byte 0, 0, 0, 0, 0, 0, 0x9a, 0xd4, 0x00, 0x40 45 .byte 0, 0, 0, 0, 0, 0 46 ASM_SIZE_DIRECTIVE(c0) 47 .type c1,@object 48c1: .byte 0x58, 0x92, 0xfc, 0x15, 0x37, 0x9a, 0x97, 0xf0, 0xef, 0x3f 49 .byte 0, 0, 0, 0, 0, 0 50 ASM_SIZE_DIRECTIVE(c1) 51#else 52 .type c0,@object 53c0: .byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f 54 .byte 0, 0, 0, 0, 0, 0 55 ASM_SIZE_DIRECTIVE(c0) 56 .type c1,@object 57c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f 58 .byte 0, 0, 0, 0, 0, 0 59 ASM_SIZE_DIRECTIVE(c1) 60#endif 61#ifndef USE_AS_EXPM1L 62 .type csat,@object 63csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40 64 .byte 0, 0, 0, 0, 0, 0 65 ASM_SIZE_DIRECTIVE(csat) 66DEFINE_LDBL_MIN 67#endif 68 69#ifdef PIC 70# define MO(op) op##@GOTOFF(%ecx) 71#else 72# define MO(op) op 73#endif 74 75 .text 76ENTRY(IEEE754_EXPL) 77#ifdef USE_AS_EXPM1L 78 movzwl 4+8(%esp), %eax 79 xorb $0x80, %ah // invert sign bit (now 1 is "positive") 80 cmpl $0xc006, %eax // is num positive and exp >= 6 (number is >= 128.0)? 81 jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0) 82#endif 83 fldt 4(%esp) 84/* I added the following ugly construct because expl(+-Inf) resulted 85 in NaN. The ugliness results from the bright minds at Intel. 86 For the i686 the code can be written better. 87 -- drepper@cygnus.com. */ 88 fxam /* Is NaN or +-Inf? */ 89#ifdef PIC 90 LOAD_PIC_REG (cx) 91#endif 92#ifdef USE_AS_EXPM1L 93 xorb $0x80, %ah 94 cmpl $0xc006, %eax 95 fstsw %ax 96 movb $0x45, %dh 97 jb 4f 98 99 /* Below -64.0 (may be -NaN or -Inf). */ 100 andb %ah, %dh 101 cmpb $0x01, %dh 102 je 6f /* Is +-NaN, jump. */ 103 jmp 1f /* -large, possibly -Inf. */ 104 1054: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */ 106 /* Test for +-0 as argument. */ 107 andb %ah, %dh 108 cmpb $0x40, %dh 109 je 2f 110 111 /* Test for arguments that are small but not subnormal. */ 112 movzwl 4+8(%esp), %eax 113 andl $0x7fff, %eax 114 cmpl $0x3fbf, %eax 115 jge 3f 116 /* Argument's exponent below -64; avoid spurious underflow if 117 normal. */ 118 cmpl $0x0001, %eax 119 jge 2f 120 /* Force underflow and return the argument, to avoid wrong signs 121 of zero results from the code below in some rounding modes. */ 122 fld %st 123 fmul %st 124 fstp %st 125 jmp 2f 126#else 127 movzwl 4+8(%esp), %eax 128 andl $0x7fff, %eax 129 cmpl $0x400d, %eax 130 jg 5f 131 cmpl $0x3fbc, %eax 132 jge 3f 133 /* Argument's exponent below -67, result rounds to 1. */ 134 fld1 135 faddp 136 jmp 2f 1375: /* Overflow, underflow or infinity or NaN as argument. */ 138 fstsw %ax 139 movb $0x45, %dh 140 andb %ah, %dh 141 cmpb $0x05, %dh 142 je 1f /* Is +-Inf, jump. */ 143 cmpb $0x01, %dh 144 je 6f /* Is +-NaN, jump. */ 145 /* Overflow or underflow; saturate. */ 146 fstp %st 147 fldt MO(csat) 148 andb $2, %ah 149 jz 3f 150 fchs 151#endif 1523: FLDLOG /* 1 log2(base) */ 153 fmul %st(1), %st /* 1 x log2(base) */ 154 /* Set round-to-nearest temporarily. */ 155 subl $8, %esp 156 cfi_adjust_cfa_offset (8) 157 fstcw 4(%esp) 158 movl $0xf3ff, %edx 159 andl 4(%esp), %edx 160 movl %edx, (%esp) 161 fldcw (%esp) 162 frndint /* 1 i */ 163 fld %st(1) /* 2 x */ 164 frndint /* 2 xi */ 165 fldcw 4(%esp) 166 addl $8, %esp 167 cfi_adjust_cfa_offset (-8) 168 fld %st(1) /* 3 i */ 169 fldt MO(c0) /* 4 c0 */ 170 fld %st(2) /* 5 xi */ 171 fmul %st(1), %st /* 5 c0 xi */ 172 fsubp %st, %st(2) /* 4 f = c0 xi - i */ 173 fld %st(4) /* 5 x */ 174 fsub %st(3), %st /* 5 xf = x - xi */ 175 fmulp %st, %st(1) /* 4 c0 xf */ 176 faddp %st, %st(1) /* 3 f = f + c0 xf */ 177 fldt MO(c1) /* 4 */ 178 fmul %st(4), %st /* 4 c1 * x */ 179 faddp %st, %st(1) /* 3 f = f + c1 * x */ 180 f2xm1 /* 3 2^(fract(x * log2(base))) - 1 */ 181#ifdef USE_AS_EXPM1L 182 fstp %st(1) /* 2 */ 183 fscale /* 2 scale factor is st(1); base^x - 2^i */ 184 fxch /* 2 i */ 185 fld1 /* 3 1.0 */ 186 fscale /* 3 2^i */ 187 fld1 /* 4 1.0 */ 188 fsubrp %st, %st(1) /* 3 2^i - 1.0 */ 189 fstp %st(1) /* 2 */ 190 faddp %st, %st(1) /* 1 base^x - 1.0 */ 191#else 192 fld1 /* 4 1.0 */ 193 faddp /* 3 2^(fract(x * log2(base))) */ 194 fstp %st(1) /* 2 */ 195 fscale /* 2 scale factor is st(1); base^x */ 196 fstp %st(1) /* 1 */ 197 LDBL_CHECK_FORCE_UFLOW_NONNEG 198#endif 199 fstp %st(1) /* 0 */ 200 jmp 2f 2011: 202#ifdef USE_AS_EXPM1L 203 /* For expm1l, only negative sign gets here. */ 204 fstp %st 205 fld1 206 fchs 207#else 208 testl $0x200, %eax /* Test sign. */ 209 jz 2f /* If positive, jump. */ 210 fstp %st 211 fldz /* Set result to 0. */ 212#endif 2132: ret 2146: /* NaN argument. */ 215 fadd %st 216 ret 217END(IEEE754_EXPL) 218 219#ifdef USE_AS_EXPM1L 220libm_hidden_def (__expm1l) 221libm_alias_ldouble (__expm1, expm1) 222#elif defined USE_AS_EXP10L 223libm_alias_finite (__ieee754_exp10l, __exp10l) 224#else 225libm_alias_finite (__ieee754_expl, __expl) 226#endif 227