1/* Compute cubic root of double value. 2 Copyright (C) 1997-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <machine/asm.h> 20#include <libm-alias-double.h> 21 22 .section .rodata 23 24 .align ALIGNARG(4) 25 .type f7,@object 26f7: .double -0.145263899385486377 27 ASM_SIZE_DIRECTIVE(f7) 28 .type f6,@object 29f6: .double 0.784932344976639262 30 ASM_SIZE_DIRECTIVE(f6) 31 .type f5,@object 32f5: .double -1.83469277483613086 33 ASM_SIZE_DIRECTIVE(f5) 34 .type f4,@object 35f4: .double 2.44693122563534430 36 ASM_SIZE_DIRECTIVE(f4) 37 .type f3,@object 38f3: .double -2.11499494167371287 39 ASM_SIZE_DIRECTIVE(f3) 40 .type f2,@object 41f2: .double 1.50819193781584896 42 ASM_SIZE_DIRECTIVE(f2) 43 .type f1,@object 44f1: .double 0.354895765043919860 45 ASM_SIZE_DIRECTIVE(f1) 46 47#define CBRT2 1.2599210498948731648 48#define ONE_CBRT2 0.793700525984099737355196796584 49#define SQR_CBRT2 1.5874010519681994748 50#define ONE_SQR_CBRT2 0.629960524947436582364439673883 51 52 .type factor,@object 53factor: .double ONE_SQR_CBRT2 54 .double ONE_CBRT2 55 .double 1.0 56 .double CBRT2 57 .double SQR_CBRT2 58 ASM_SIZE_DIRECTIVE(factor) 59 60 .type two54,@object 61two54: .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43 62 ASM_SIZE_DIRECTIVE(two54) 63 64#ifdef PIC 65#define MO(op) op##@GOTOFF(%ebx) 66#define MOX(op,x) op##@GOTOFF(%ebx,x,1) 67#else 68#define MO(op) op 69#define MOX(op,x) op(x) 70#endif 71 72 .text 73ENTRY(__cbrt) 74 movl 4(%esp), %ecx 75 movl 8(%esp), %eax 76 movl %eax, %edx 77 andl $0x7fffffff, %eax 78 orl %eax, %ecx 79 jz 1f 80 xorl %ecx, %ecx 81 cmpl $0x7ff00000, %eax 82 jae 1f 83 84#ifdef PIC 85 pushl %ebx 86 cfi_adjust_cfa_offset (4) 87 cfi_rel_offset (ebx, 0) 88 LOAD_PIC_REG (bx) 89#endif 90 91 cmpl $0x00100000, %eax 92 jae 2f 93 94#ifdef PIC 95 fldl 8(%esp) 96#else 97 fldl 4(%esp) 98#endif 99 fmull MO(two54) 100 movl $-54, %ecx 101#ifdef PIC 102 fstpl 8(%esp) 103 movl 12(%esp), %eax 104#else 105 fstpl 4(%esp) 106 movl 8(%esp), %eax 107#endif 108 movl %eax, %edx 109 andl $0x7fffffff, %eax 110 1112: shrl $20, %eax 112 andl $0x800fffff, %edx 113 subl $1022, %eax 114 orl $0x3fe00000, %edx 115 addl %eax, %ecx 116#ifdef PIC 117 movl %edx, 12(%esp) 118 119 fldl 8(%esp) /* xm */ 120#else 121 movl %edx, 8(%esp) 122 123 fldl 4(%esp) /* xm */ 124#endif 125 fabs 126 127 /* The following code has two tracks: 128 a) compute the normalized cbrt value 129 b) compute xe/3 and xe%3 130 The right track computes the value for b) and this is done 131 in an optimized way by avoiding division. 132 133 But why two tracks at all? Very easy: efficiency. Some FP 134 instruction can overlap with a certain amount of integer (and 135 FP) instructions. So we get (except for the imull) all 136 instructions for free. */ 137 138 fld %st(0) /* xm : xm */ 139 140 fmull MO(f7) /* f7*xm : xm */ 141 movl $1431655766, %eax 142 faddl MO(f6) /* f6+f7*xm : xm */ 143 imull %ecx 144 fmul %st(1) /* (f6+f7*xm)*xm : xm */ 145 movl %ecx, %eax 146 faddl MO(f5) /* f5+(f6+f7*xm)*xm : xm */ 147 sarl $31, %eax 148 fmul %st(1) /* (f5+(f6+f7*xm)*xm)*xm : xm */ 149 subl %eax, %edx 150 faddl MO(f4) /* f4+(f5+(f6+f7*xm)*xm)*xm : xm */ 151 fmul %st(1) /* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ 152 faddl MO(f3) /* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */ 153 fmul %st(1) /* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ 154 faddl MO(f2) /* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */ 155 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ 156 faddl MO(f1) /* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */ 157 158 fld %st /* u : u : xm */ 159 fmul %st(1) /* u*u : u : xm */ 160 fld %st(2) /* xm : u*u : u : xm */ 161 fadd %st /* 2*xm : u*u : u : xm */ 162 fxch %st(1) /* u*u : 2*xm : u : xm */ 163 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ 164 movl %edx, %eax 165 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ 166 leal (%edx,%edx,2),%edx 167 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ 168 subl %edx, %ecx 169 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ 170 shll $3, %ecx 171 fmulp /* u*(t2+2*xm) : 2*t2+xm */ 172 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ 173 fmull MOX(16+factor,%ecx) /* u*(t2+2*xm)/(2*t2+xm)*FACT */ 174 pushl %eax 175 cfi_adjust_cfa_offset (4) 176 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ 177 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ 178 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ 179 popl %edx 180 cfi_adjust_cfa_offset (-4) 181#ifdef PIC 182 movl 12(%esp), %eax 183 popl %ebx 184 cfi_adjust_cfa_offset (-4) 185 cfi_restore (ebx) 186#else 187 movl 8(%esp), %eax 188#endif 189 testl %eax, %eax 190 fstp %st(1) 191 jns 4f 192 fchs 1934: ret 194 195 /* Return the argument. */ 1961: fldl 4(%esp) 197 ret 198END(__cbrt) 199libm_alias_double (__cbrt, cbrt) 200