1/* ix87 specific implementation of pow function. 2 Copyright (C) 1996-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <machine/asm.h> 20#include <x86_64-math-asm.h> 21#include <libm-alias-finite.h> 22 23 .section .rodata.cst8,"aM",@progbits,8 24 25 .p2align 3 26 .type one,@object 27one: .double 1.0 28 ASM_SIZE_DIRECTIVE(one) 29 .type p2,@object 30p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40 31 ASM_SIZE_DIRECTIVE(p2) 32 .type p63,@object 33p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 34 ASM_SIZE_DIRECTIVE(p63) 35 .type p64,@object 36p64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 37 ASM_SIZE_DIRECTIVE(p64) 38 .type p78,@object 39p78: .byte 0, 0, 0, 0, 0, 0, 0xd0, 0x44 40 ASM_SIZE_DIRECTIVE(p78) 41 .type pm79,@object 42pm79: .byte 0, 0, 0, 0, 0, 0, 0, 0x3b 43 ASM_SIZE_DIRECTIVE(pm79) 44 45 .section .rodata.cst16,"aM",@progbits,16 46 47 .p2align 3 48 .type infinity,@object 49inf_zero: 50infinity: 51 .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f 52 ASM_SIZE_DIRECTIVE(infinity) 53 .type zero,@object 54zero: .double 0.0 55 ASM_SIZE_DIRECTIVE(zero) 56 .type minf_mzero,@object 57minf_mzero: 58minfinity: 59 .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff 60mzero: 61 .byte 0, 0, 0, 0, 0, 0, 0, 0x80 62 ASM_SIZE_DIRECTIVE(minf_mzero) 63DEFINE_LDBL_MIN 64 65#ifdef PIC 66# define MO(op) op##(%rip) 67#else 68# define MO(op) op 69#endif 70 71 .text 72ENTRY(__ieee754_powl) 73 fldt 24(%rsp) // y 74 fxam 75 76 77 fnstsw 78 movb %ah, %dl 79 andb $0x45, %ah 80 cmpb $0x40, %ah // is y == 0 ? 81 je 11f 82 83 cmpb $0x05, %ah // is y == ±inf ? 84 je 12f 85 86 cmpb $0x01, %ah // is y == NaN ? 87 je 30f 88 89 fldt 8(%rsp) // x : y 90 91 fxam 92 fnstsw 93 movb %ah, %dh 94 andb $0x45, %ah 95 cmpb $0x40, %ah 96 je 20f // x is ±0 97 98 cmpb $0x05, %ah 99 je 15f // x is ±inf 100 101 cmpb $0x01, %ah 102 je 31f // x is NaN 103 104 fxch // y : x 105 106 /* fistpll raises invalid exception for |y| >= 1L<<63. */ 107 fldl MO(p63) // 1L<<63 : y : x 108 fld %st(1) // y : 1L<<63 : y : x 109 fabs // |y| : 1L<<63 : y : x 110 fcomip %st(1), %st // 1L<<63 : y : x 111 fstp %st(0) // y : x 112 jnc 2f 113 114 /* First see whether `y' is a natural number. In this case we 115 can use a more precise algorithm. */ 116 fld %st // y : y : x 117 fistpll -8(%rsp) // y : x 118 fildll -8(%rsp) // int(y) : y : x 119 fucomip %st(1),%st // y : x 120 je 9f 121 122 // If y has absolute value at most 0x1p-79, then any finite 123 // nonzero x will result in 1. Saturate y to those bounds to 124 // avoid underflow in the calculation of y*log2(x). 125 fldl MO(pm79) // 0x1p-79 : y : x 126 fld %st(1) // y : 0x1p-79 : y : x 127 fabs // |y| : 0x1p-79 : y : x 128 fcomip %st(1), %st // 0x1p-79 : y : x 129 fstp %st(0) // y : x 130 jnc 3f 131 fstp %st(0) // pop y 132 fldl MO(pm79) // 0x1p-79 : x 133 testb $2, %dl 134 jnz 3f // y > 0 135 fchs // -0x1p-79 : x 136 jmp 3f 137 1389: /* OK, we have an integer value for y. Unless very small 139 (we use < 4), use the algorithm for real exponent to avoid 140 accumulation of errors. */ 141 fldl MO(p2) // 4 : y : x 142 fld %st(1) // y : 4 : y : x 143 fabs // |y| : 4 : y : x 144 fcomip %st(1), %st // 4 : y : x 145 fstp %st(0) // y : x 146 jnc 3f 147 mov -8(%rsp),%eax 148 mov -4(%rsp),%edx 149 orl $0, %edx 150 fstp %st(0) // x 151 jns 4f // y >= 0, jump 152 fdivrl MO(one) // 1/x (now referred to as x) 153 negl %eax 154 adcl $0, %edx 155 negl %edx 1564: fldl MO(one) // 1 : x 157 fxch 158 159 /* If y is even, take the absolute value of x. Otherwise, 160 ensure all intermediate values that might overflow have the 161 sign of x. */ 162 testb $1, %al 163 jnz 6f 164 fabs 165 1666: shrdl $1, %edx, %eax 167 jnc 5f 168 fxch 169 fabs 170 fmul %st(1) // x : ST*x 171 fxch 1725: fld %st // x : x : ST*x 173 fabs // |x| : x : ST*x 174 fmulp // |x|*x : ST*x 175 shrl $1, %edx 176 movl %eax, %ecx 177 orl %edx, %ecx 178 jnz 6b 179 fstp %st(0) // ST*x 180 LDBL_CHECK_FORCE_UFLOW_NONNAN 181 ret 182 183 /* y is ±NAN */ 18430: fldt 8(%rsp) // x : y 185 fldl MO(one) // 1.0 : x : y 186 fucomip %st(1),%st // x : y 187 je 32f 18831: /* At least one argument NaN, and result should be NaN. */ 189 faddp 190 ret 19132: jc 31b 192 /* pow (1, NaN); check if the NaN signaling. */ 193 testb $0x40, 31(%rsp) 194 jz 31b 195 fstp %st(1) 196 ret 197 198 .align ALIGNARG(4) 1992: // y is a large integer (absolute value at least 1L<<63). 200 // If y has absolute value at least 1L<<78, then any finite 201 // nonzero x will result in 0 (underflow), 1 or infinity (overflow). 202 // Saturate y to those bounds to avoid overflow in the calculation 203 // of y*log2(x). 204 fldl MO(p78) // 1L<<78 : y : x 205 fld %st(1) // y : 1L<<78 : y : x 206 fabs // |y| : 1L<<78 : y : x 207 fcomip %st(1), %st // 1L<<78 : y : x 208 fstp %st(0) // y : x 209 jc 3f 210 fstp %st(0) // pop y 211 fldl MO(p78) // 1L<<78 : x 212 testb $2, %dl 213 jz 3f // y > 0 214 fchs // -(1L<<78) : x 215 .align ALIGNARG(4) 2163: /* y is a real number. */ 217 subq $40, %rsp 218 cfi_adjust_cfa_offset (40) 219 fstpt 16(%rsp) // x 220 fstpt (%rsp) // <empty> 221 call HIDDEN_JUMPTARGET (__powl_helper) // <result> 222 addq $40, %rsp 223 cfi_adjust_cfa_offset (-40) 224 ret 225 226 // pow(x,±0) = 1, unless x is sNaN 227 .align ALIGNARG(4) 22811: fstp %st(0) // pop y 229 fldt 8(%rsp) // x 230 fxam 231 fnstsw 232 andb $0x45, %ah 233 cmpb $0x01, %ah 234 je 112f // x is NaN 235111: fstp %st(0) 236 fldl MO(one) 237 ret 238 239112: testb $0x40, 15(%rsp) 240 jnz 111b 241 fadd %st(0) 242 ret 243 244 // y == ±inf 245 .align ALIGNARG(4) 24612: fstp %st(0) // pop y 247 fldl MO(one) // 1 248 fldt 8(%rsp) // x : 1 249 fabs // abs(x) : 1 250 fucompp // < 1, == 1, or > 1 251 fnstsw 252 andb $0x45, %ah 253 cmpb $0x45, %ah 254 je 13f // jump if x is NaN 255 256 cmpb $0x40, %ah 257 je 14f // jump if |x| == 1 258 259 shlb $1, %ah 260 xorb %ah, %dl 261 andl $2, %edx 262#ifdef PIC 263 lea inf_zero(%rip),%rcx 264 fldl (%rcx, %rdx, 4) 265#else 266 fldl inf_zero(,%rdx, 4) 267#endif 268 ret 269 270 .align ALIGNARG(4) 27114: fldl MO(one) 272 ret 273 274 .align ALIGNARG(4) 27513: fldt 8(%rsp) // load x == NaN 276 fadd %st(0) 277 ret 278 279 .align ALIGNARG(4) 280 // x is ±inf 28115: fstp %st(0) // y 282 testb $2, %dh 283 jz 16f // jump if x == +inf 284 285 // fistpll raises invalid exception for |y| >= 1L<<63, but y 286 // may be odd unless we know |y| >= 1L<<64. 287 fldl MO(p64) // 1L<<64 : y 288 fld %st(1) // y : 1L<<64 : y 289 fabs // |y| : 1L<<64 : y 290 fcomip %st(1), %st // 1L<<64 : y 291 fstp %st(0) // y 292 jnc 16f 293 fldl MO(p63) // p63 : y 294 fxch // y : p63 295 fprem // y%p63 : p63 296 fstp %st(1) // y%p63 297 298 // We must find out whether y is an odd integer. 299 fld %st // y : y 300 fistpll -8(%rsp) // y 301 fildll -8(%rsp) // int(y) : y 302 fucomip %st(1),%st 303 ffreep %st // <empty> 304 jne 17f 305 306 // OK, the value is an integer, but is it odd? 307 mov -8(%rsp), %eax 308 mov -4(%rsp), %edx 309 andb $1, %al 310 jz 18f // jump if not odd 311 // It's an odd integer. 312 shrl $31, %edx 313#ifdef PIC 314 lea minf_mzero(%rip),%rcx 315 fldl (%rcx, %rdx, 8) 316#else 317 fldl minf_mzero(,%rdx, 8) 318#endif 319 ret 320 321 .align ALIGNARG(4) 32216: fcompl MO(zero) 323 fnstsw 324 shrl $5, %eax 325 andl $8, %eax 326#ifdef PIC 327 lea inf_zero(%rip),%rcx 328 fldl (%rcx, %rax, 1) 329#else 330 fldl inf_zero(,%rax, 1) 331#endif 332 ret 333 334 .align ALIGNARG(4) 33517: shll $30, %edx // sign bit for y in right position 33618: shrl $31, %edx 337#ifdef PIC 338 lea inf_zero(%rip),%rcx 339 fldl (%rcx, %rdx, 8) 340#else 341 fldl inf_zero(,%rdx, 8) 342#endif 343 ret 344 345 .align ALIGNARG(4) 346 // x is ±0 34720: fstp %st(0) // y 348 testb $2, %dl 349 jz 21f // y > 0 350 351 // x is ±0 and y is < 0. We must find out whether y is an odd integer. 352 testb $2, %dh 353 jz 25f 354 355 // fistpll raises invalid exception for |y| >= 1L<<63, but y 356 // may be odd unless we know |y| >= 1L<<64. 357 fldl MO(p64) // 1L<<64 : y 358 fld %st(1) // y : 1L<<64 : y 359 fabs // |y| : 1L<<64 : y 360 fcomip %st(1), %st // 1L<<64 : y 361 fstp %st(0) // y 362 jnc 25f 363 fldl MO(p63) // p63 : y 364 fxch // y : p63 365 fprem // y%p63 : p63 366 fstp %st(1) // y%p63 367 368 fld %st // y : y 369 fistpll -8(%rsp) // y 370 fildll -8(%rsp) // int(y) : y 371 fucomip %st(1),%st 372 ffreep %st // <empty> 373 jne 26f 374 375 // OK, the value is an integer, but is it odd? 376 mov -8(%rsp),%eax 377 mov -4(%rsp),%edx 378 andb $1, %al 379 jz 27f // jump if not odd 380 // It's an odd integer. 381 // Raise divide-by-zero exception and get minus infinity value. 382 fldl MO(one) 383 fdivl MO(zero) 384 fchs 385 ret 386 38725: fstp %st(0) 38826: 38927: // Raise divide-by-zero exception and get infinity value. 390 fldl MO(one) 391 fdivl MO(zero) 392 ret 393 394 .align ALIGNARG(4) 395 // x is ±0 and y is > 0. We must find out whether y is an odd integer. 39621: testb $2, %dh 397 jz 22f 398 399 // fistpll raises invalid exception for |y| >= 1L<<63, but y 400 // may be odd unless we know |y| >= 1L<<64. 401 fldl MO(p64) // 1L<<64 : y 402 fxch // y : 1L<<64 403 fcomi %st(1), %st // y : 1L<<64 404 fstp %st(1) // y 405 jnc 22f 406 fldl MO(p63) // p63 : y 407 fxch // y : p63 408 fprem // y%p63 : p63 409 fstp %st(1) // y%p63 410 411 fld %st // y : y 412 fistpll -8(%rsp) // y 413 fildll -8(%rsp) // int(y) : y 414 fucomip %st(1),%st 415 ffreep %st // <empty> 416 jne 23f 417 418 // OK, the value is an integer, but is it odd? 419 mov -8(%rsp),%eax 420 mov -4(%rsp),%edx 421 andb $1, %al 422 jz 24f // jump if not odd 423 // It's an odd integer. 424 fldl MO(mzero) 425 ret 426 42722: fstp %st(0) 42823: 42924: fldl MO(zero) 430 ret 431 432END(__ieee754_powl) 433libm_alias_finite (__ieee754_powl, __powl) 434