1/* Compute cubic root of long double value. 2 Copyright (C) 1997-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <libm-alias-ldouble.h> 20#include <machine/asm.h> 21 22 .section .rodata 23 24 .align ALIGNARG(4) 25 .type f8,@object 26f8: .tfloat 0.161617097923756032 27 ASM_SIZE_DIRECTIVE(f8) 28 .align ALIGNARG(4) 29 .type f7,@object 30f7: .tfloat -0.988553671195413709 31 ASM_SIZE_DIRECTIVE(f7) 32 .align ALIGNARG(4) 33 .type f6,@object 34f6: .tfloat 2.65298938441952296 35 ASM_SIZE_DIRECTIVE(f6) 36 .align ALIGNARG(4) 37 .type f5,@object 38f5: .tfloat -4.11151425200350531 39 ASM_SIZE_DIRECTIVE(f5) 40 .align ALIGNARG(4) 41 .type f4,@object 42f4: .tfloat 4.09559907378707839 43 ASM_SIZE_DIRECTIVE(f4) 44 .align ALIGNARG(4) 45 .type f3,@object 46f3: .tfloat -2.82414939754975962 47 ASM_SIZE_DIRECTIVE(f3) 48 .align ALIGNARG(4) 49 .type f2,@object 50f2: .tfloat 1.67595307700780102 51 ASM_SIZE_DIRECTIVE(f2) 52 .align ALIGNARG(4) 53 .type f1,@object 54f1: .tfloat 0.338058687610520237 55 ASM_SIZE_DIRECTIVE(f1) 56 57#define CBRT2 1.2599210498948731648 58#define ONE_CBRT2 0.793700525984099737355196796584 59#define SQR_CBRT2 1.5874010519681994748 60#define ONE_SQR_CBRT2 0.629960524947436582364439673883 61 62 /* We make the entries in the following table all 16 bytes 63 wide to avoid having to implement a multiplication by 10. */ 64 .type factor,@object 65 .align ALIGNARG(4) 66factor: .tfloat ONE_SQR_CBRT2 67 .byte 0, 0, 0, 0, 0, 0 68 .tfloat ONE_CBRT2 69 .byte 0, 0, 0, 0, 0, 0 70 .tfloat 1.0 71 .byte 0, 0, 0, 0, 0, 0 72 .tfloat CBRT2 73 .byte 0, 0, 0, 0, 0, 0 74 .tfloat SQR_CBRT2 75 ASM_SIZE_DIRECTIVE(factor) 76 77 .type two64,@object 78 .align ALIGNARG(4) 79two64: .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43 80 ASM_SIZE_DIRECTIVE(two64) 81 82#ifdef PIC 83#define MO(op) op##@GOTOFF(%ebx) 84#define MOX(op,x) op##@GOTOFF(%ebx,x,1) 85#else 86#define MO(op) op 87#define MOX(op,x) op(x) 88#endif 89 90 .text 91ENTRY(__cbrtl) 92 movl 4(%esp), %ecx 93 movl 12(%esp), %eax 94 orl 8(%esp), %ecx 95 movl %eax, %edx 96 andl $0x7fff, %eax 97 orl %eax, %ecx 98 jz 1f 99 xorl %ecx, %ecx 100 cmpl $0x7fff, %eax 101 je 1f 102 103#ifdef PIC 104 pushl %ebx 105 cfi_adjust_cfa_offset (4) 106 cfi_rel_offset (ebx, 0) 107 LOAD_PIC_REG (bx) 108#endif 109 110 cmpl $0, %eax 111 jne 2f 112 113#ifdef PIC 114 fldt 8(%esp) 115#else 116 fldt 4(%esp) 117#endif 118 fmull MO(two64) 119 movl $-64, %ecx 120#ifdef PIC 121 fstpt 8(%esp) 122 movl 16(%esp), %eax 123#else 124 fstpt 4(%esp) 125 movl 12(%esp), %eax 126#endif 127 movl %eax, %edx 128 andl $0x7fff, %eax 129 1302: andl $0x8000, %edx 131 subl $16382, %eax 132 orl $0x3ffe, %edx 133 addl %eax, %ecx 134#ifdef PIC 135 movl %edx, 16(%esp) 136 137 fldt 8(%esp) /* xm */ 138#else 139 movl %edx, 12(%esp) 140 141 fldt 4(%esp) /* xm */ 142#endif 143 fabs 144 145 /* The following code has two tracks: 146 a) compute the normalized cbrt value 147 b) compute xe/3 and xe%3 148 The right track computes the value for b) and this is done 149 in an optimized way by avoiding division. 150 151 But why two tracks at all? Very easy: efficiency. Some FP 152 instruction can overlap with a certain amount of integer (and 153 FP) instructions. So we get (except for the imull) all 154 instructions for free. */ 155 156 fldt MO(f8) /* f8 : xm */ 157 fmul %st(1) /* f8*xm : xm */ 158 159 fldt MO(f7) 160 faddp /* f7+f8*xm : xm */ 161 fmul %st(1) /* (f7+f8*xm)*xm : xm */ 162 movl $1431655766, %eax 163 fldt MO(f6) 164 faddp /* f6+(f7+f8*xm)*xm : xm */ 165 imull %ecx 166 fmul %st(1) /* (f6+(f7+f8*xm)*xm)*xm : xm */ 167 movl %ecx, %eax 168 fldt MO(f5) 169 faddp /* f5+(f6+(f7+f8*xm)*xm)*xm : xm */ 170 sarl $31, %eax 171 fmul %st(1) /* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */ 172 subl %eax, %edx 173 fldt MO(f4) 174 faddp /* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */ 175 fmul %st(1) /* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */ 176 fldt MO(f3) 177 faddp /* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */ 178 fmul %st(1) /* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */ 179 fldt MO(f2) 180 faddp /* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */ 181 fmul %st(1) /* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */ 182 fldt MO(f1) 183 faddp /* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */ 184 185 fld %st /* u : u : xm */ 186 fmul %st(1) /* u*u : u : xm */ 187 fld %st(2) /* xm : u*u : u : xm */ 188 fadd %st /* 2*xm : u*u : u : xm */ 189 fxch %st(1) /* u*u : 2*xm : u : xm */ 190 fmul %st(2) /* t2:=u*u*u : 2*xm : u : xm */ 191 movl %edx, %eax 192 fadd %st, %st(1) /* t2 : t2+2*xm : u : xm */ 193 leal (%edx,%edx,2),%edx 194 fadd %st(0) /* 2*t2 : t2+2*xm : u : xm */ 195 subl %edx, %ecx 196 faddp %st, %st(3) /* t2+2*xm : u : 2*t2+xm */ 197 shll $4, %ecx 198 fmulp /* u*(t2+2*xm) : 2*t2+xm */ 199 fdivp %st, %st(1) /* u*(t2+2*xm)/(2*t2+xm) */ 200 fldt MOX(32+factor,%ecx) 201 fmulp /* u*(t2+2*xm)/(2*t2+xm)*FACT */ 202 pushl %eax 203 cfi_adjust_cfa_offset (4) 204 fildl (%esp) /* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */ 205 fxch /* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */ 206 fscale /* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */ 207 popl %edx 208 cfi_adjust_cfa_offset (-4) 209#ifdef PIC 210 movl 16(%esp), %eax 211 popl %ebx 212 cfi_adjust_cfa_offset (-4) 213 cfi_restore (ebx) 214#else 215 movl 12(%esp), %eax 216#endif 217 testl $0x8000, %eax 218 fstp %st(1) 219 jz 4f 220 fchs 2214: ret 222 223 /* Return the argument. */ 2241: fldt 4(%esp) 225 fadd %st 226 ret 227END(__cbrtl) 228libm_alias_ldouble (__cbrt, cbrt) 229