1/* Compute cubic root of float value. 2 Copyright (C) 1997-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <machine/asm.h> 20#include <libm-alias-float.h> 21 22 .section .rodata 23 24 .align ALIGNARG(4) 25 .type f3,@object 26f3: .double 0.191502161678719066 27 ASM_SIZE_DIRECTIVE(f3) 28 .type f2,@object 29f2: .double 0.697570460207922770 30 ASM_SIZE_DIRECTIVE(f2) 31 .type f1,@object 32f1: .double 0.492659620528969547 33 ASM_SIZE_DIRECTIVE(f1) 34 35#define CBRT2 1.2599210498948731648 36#define ONE_CBRT2 0.793700525984099737355196796584 37#define SQR_CBRT2 1.5874010519681994748 38#define ONE_SQR_CBRT2 0.629960524947436582364439673883 39 40 .type factor,@object 41 .align ALIGNARG(4) 42factor: .double ONE_SQR_CBRT2 43 .double ONE_CBRT2 44 .double 1.0 45 .double CBRT2 46 .double SQR_CBRT2 47 ASM_SIZE_DIRECTIVE(factor) 48 49 .type two25,@object 50two25: .byte 0, 0, 0, 0x4c 51 ASM_SIZE_DIRECTIVE(two25) 52 53#ifdef PIC 54#define MO(op) op##@GOTOFF(%ebx) 55#define MOX(op,x) op##@GOTOFF(%ebx,x,1) 56#else 57#define MO(op) op 58#define MOX(op,x) op(x) 59#endif 60 61 .text 62ENTRY(__cbrtf) 63 movl 4(%esp), %eax 64 xorl %ecx, %ecx 65 movl %eax, %edx 66 andl $0x7fffffff, %eax 67 jz 1f 68 cmpl $0x7f800000, %eax 69 jae 1f 70 71#ifdef PIC 72 pushl %ebx 73 cfi_adjust_cfa_offset (4) 74 cfi_rel_offset (ebx, 0) 75 LOAD_PIC_REG (bx) 76#endif 77 78 cmpl $0x00800000, %eax 79 jae 2f 80 81#ifdef PIC 82 flds 8(%esp) 83#else 84 flds 4(%esp) 85#endif 86 fmuls MO(two25) 87 movl $-25, %ecx 88#ifdef PIC 89 fstps 8(%esp) 90 movl 8(%esp), %eax 91#else 92 fstps 4(%esp) 93 movl 4(%esp), %eax 94#endif 95 movl %eax, %edx 96 andl $0x7fffffff, %eax 97 982: shrl $23, %eax 99 andl $0x807fffff, %edx 100 subl $126, %eax 101 orl $0x3f000000, %edx 102 addl %eax, %ecx 103#ifdef PIC 104 movl %edx, 8(%esp) 105 106 flds 8(%esp) /* xm */ 107#else 108 movl %edx, 4(%esp) 109 110 flds 4(%esp) /* xm */ 111#endif 112 fabs 113 114 /* The following code has two tracks: 115 a) compute the normalized cbrt value 116 b) compute xe/3 and xe%3 117 The right track computes the value for b) and this is done 118 in an optimized way by avoiding division. 119 120 But why two tracks at all? Very easy: efficiency. Some FP 121 instruction can overlap with a certain amount of integer (and 122 FP) instructions. So we get (except for the imull) all 123 instructions for free. */ 124 125 fld %st(0) /* xm : xm */ 126 fmull MO(f3) /* f3*xm : xm */ 127 movl $1431655766, %eax 128 fsubrl MO(f2) /* f2-f3*xm : xm */ 129 imull %ecx 130 fmul %st(1) /* (f2-f3*xm)*xm : xm */ 131 movl %ecx, %eax 132 faddl MO(f1) /* u:=f1+(f2-f3*xm)*xm : xm */ 133 sarl $31, %eax 134 fld %st /* u : u : xm */ 135 subl %eax, %edx 136 fmul %st(1) /* u*u : u : xm */ 137 fld %st(2) /* xm : u*u : u : xm */ 138 fadd %st /* 2*xm : u*u : u : xm */ 139 fxch %st(1) /* u*u : 2*xm : u : xm */ 140 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ 141 movl %edx, %eax 142 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ 143 leal (%edx,%edx,2),%edx 144 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ 145 subl %edx, %ecx 146 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ 147 shll $3, %ecx 148 fmulp /* u*(t2+2*xm) : 2*t2+xm */ 149 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ 150 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */ 151 pushl %eax 152 cfi_adjust_cfa_offset (4) 153 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ 154 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ 155 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ 156 popl %edx 157 cfi_adjust_cfa_offset (-4) 158#ifdef PIC 159 movl 8(%esp), %eax 160 popl %ebx 161 cfi_adjust_cfa_offset (-4) 162 cfi_restore (ebx) 163#else 164 movl 4(%esp), %eax 165#endif 166 testl %eax, %eax 167 fstp %st(1) 168 jns 4f 169 fchs 1704: ret 171 172 /* Return the argument. */ 1731: flds 4(%esp) 174 ret 175END(__cbrtf) 176libm_alias_float (__cbrt, cbrt) 177