1/* Compute cubic root of double value.
2   Copyright (C) 1997-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <machine/asm.h>
20#include <libm-alias-double.h>
21
22        .section .rodata
23
24        .align ALIGNARG(4)
25        .type f7,@object
26f7:	.double -0.145263899385486377
27	ASM_SIZE_DIRECTIVE(f7)
28        .type f6,@object
29f6:	.double 0.784932344976639262
30	ASM_SIZE_DIRECTIVE(f6)
31        .type f5,@object
32f5:	.double -1.83469277483613086
33	ASM_SIZE_DIRECTIVE(f5)
34        .type f4,@object
35f4:	.double 2.44693122563534430
36	ASM_SIZE_DIRECTIVE(f4)
37        .type f3,@object
38f3:	.double -2.11499494167371287
39	ASM_SIZE_DIRECTIVE(f3)
40        .type f2,@object
41f2:	.double 1.50819193781584896
42	ASM_SIZE_DIRECTIVE(f2)
43        .type f1,@object
44f1:	.double 0.354895765043919860
45	ASM_SIZE_DIRECTIVE(f1)
46
47#define CBRT2		1.2599210498948731648
48#define ONE_CBRT2	0.793700525984099737355196796584
49#define SQR_CBRT2	1.5874010519681994748
50#define ONE_SQR_CBRT2	0.629960524947436582364439673883
51
52	.type factor,@object
53factor:	.double ONE_SQR_CBRT2
54	.double ONE_CBRT2
55	.double 1.0
56	.double CBRT2
57	.double SQR_CBRT2
58	ASM_SIZE_DIRECTIVE(factor)
59
60        .type two54,@object
61two54:  .byte 0, 0, 0, 0, 0, 0, 0x50, 0x43
62        ASM_SIZE_DIRECTIVE(two54)
63
64#ifdef PIC
65#define MO(op) op##@GOTOFF(%ebx)
66#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
67#else
68#define MO(op) op
69#define MOX(op,x) op(x)
70#endif
71
72	.text
73ENTRY(__cbrt)
74	movl	4(%esp), %ecx
75	movl	8(%esp), %eax
76	movl	%eax, %edx
77	andl	$0x7fffffff, %eax
78	orl	%eax, %ecx
79	jz	1f
80	xorl	%ecx, %ecx
81	cmpl	$0x7ff00000, %eax
82	jae	1f
83
84#ifdef PIC
85	pushl	%ebx
86	cfi_adjust_cfa_offset (4)
87	cfi_rel_offset (ebx, 0)
88	LOAD_PIC_REG (bx)
89#endif
90
91	cmpl	$0x00100000, %eax
92	jae	2f
93
94#ifdef PIC
95	fldl	8(%esp)
96#else
97	fldl	4(%esp)
98#endif
99	fmull	MO(two54)
100	movl	$-54, %ecx
101#ifdef PIC
102	fstpl	8(%esp)
103	movl	12(%esp), %eax
104#else
105	fstpl	4(%esp)
106	movl	8(%esp), %eax
107#endif
108	movl	%eax, %edx
109	andl	$0x7fffffff, %eax
110
1112:	shrl	$20, %eax
112	andl	$0x800fffff, %edx
113	subl	$1022, %eax
114	orl	$0x3fe00000, %edx
115	addl	%eax, %ecx
116#ifdef PIC
117	movl	%edx, 12(%esp)
118
119	fldl	8(%esp)			/* xm */
120#else
121	movl	%edx, 8(%esp)
122
123	fldl	4(%esp)			/* xm */
124#endif
125	fabs
126
127	/* The following code has two tracks:
128	    a) compute the normalized cbrt value
129	    b) compute xe/3 and xe%3
130	   The right track computes the value for b) and this is done
131	   in an optimized way by avoiding division.
132
133	   But why two tracks at all?  Very easy: efficiency.  Some FP
134	   instruction can overlap with a certain amount of integer (and
135	   FP) instructions.  So we get (except for the imull) all
136	   instructions for free.  */
137
138	fld	%st(0)			/* xm : xm */
139
140	fmull	MO(f7)			/* f7*xm : xm */
141			movl	$1431655766, %eax
142	faddl	MO(f6)			/* f6+f7*xm : xm */
143			imull	%ecx
144	fmul	%st(1)			/* (f6+f7*xm)*xm : xm */
145			movl	%ecx, %eax
146	faddl	MO(f5)			/* f5+(f6+f7*xm)*xm : xm */
147			sarl	$31, %eax
148	fmul	%st(1)			/* (f5+(f6+f7*xm)*xm)*xm : xm */
149			subl	%eax, %edx
150	faddl	MO(f4)			/* f4+(f5+(f6+f7*xm)*xm)*xm : xm */
151	fmul	%st(1)			/* (f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
152	faddl	MO(f3)			/* f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm : xm */
153	fmul	%st(1)			/* (f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
154	faddl	MO(f2)			/* f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm : xm */
155	fmul	%st(1)			/* (f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
156	faddl	MO(f1)			/* u:=f1+(f2+(f3+(f4+(f5+(f6+f7*xm)*xm)*xm)*xm)*xm)*xm : xm */
157
158	fld	%st			/* u : u : xm */
159	fmul	%st(1)			/* u*u : u : xm */
160	fld	%st(2)			/* xm : u*u : u : xm */
161	fadd	%st			/* 2*xm : u*u : u : xm */
162	fxch	%st(1)			/* u*u : 2*xm : u : xm */
163	fmul	%st(2)			/* t2:=u*u*u : 2*xm : u : xm */
164			movl	%edx, %eax
165	fadd	%st, %st(1)		/* t2 : t2+2*xm : u : xm */
166			leal	(%edx,%edx,2),%edx
167	fadd	%st(0)			/* 2*t2 : t2+2*xm : u : xm */
168			subl	%edx, %ecx
169	faddp	%st, %st(3)		/* t2+2*xm : u : 2*t2+xm */
170			shll	$3, %ecx
171	fmulp				/* u*(t2+2*xm) : 2*t2+xm */
172	fdivp	%st, %st(1)		/* u*(t2+2*xm)/(2*t2+xm) */
173	fmull	MOX(16+factor,%ecx)	/* u*(t2+2*xm)/(2*t2+xm)*FACT */
174	pushl	%eax
175	cfi_adjust_cfa_offset (4)
176	fildl	(%esp)			/* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
177	fxch				/* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
178	fscale				/* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
179	popl	%edx
180	cfi_adjust_cfa_offset (-4)
181#ifdef PIC
182	movl	12(%esp), %eax
183	popl	%ebx
184	cfi_adjust_cfa_offset (-4)
185	cfi_restore (ebx)
186#else
187	movl	8(%esp), %eax
188#endif
189	testl	%eax, %eax
190	fstp	%st(1)
191	jns	4f
192	fchs
1934:	ret
194
195	/* Return the argument.  */
1961:	fldl	4(%esp)
197	ret
198END(__cbrt)
199libm_alias_double (__cbrt, cbrt)
200