1/* Copyright (C) 2004-2022 Free Software Foundation, Inc.
2   This file is part of the GNU C Library.
3
4   The GNU C Library is free software; you can redistribute it and/or
5   modify it under the terms of the GNU Lesser General Public
6   License as published by the Free Software Foundation; either
7   version 2.1 of the License, or (at your option) any later version.
8
9   The GNU C Library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13
14   You should have received a copy of the GNU Lesser General Public
15   License along with the GNU C Library.  If not, see
16   <https://www.gnu.org/licenses/>.  */
17
18#include "div_libc.h"
19
20
21/* 64-bit unsigned long remainder.  These are not normal C functions.  Argument
22   registers are t10 and t11, the result goes in t12.  Only t12 and AT may be
23   clobbered.
24
25   Theory of operation here is that we can use the FPU divider for virtually
26   all operands that we see: all dividend values between -2**53 and 2**53-1
27   can be computed directly.  Note that divisor values need not be checked
28   against that range because the rounded fp value will be close enough such
29   that the quotient is < 1, which will properly be truncated to zero when we
30   convert back to integer.
31
32   When the dividend is outside the range for which we can compute exact
33   results, we use the fp quotent as an estimate from which we begin refining
34   an exact integral value.  This reduces the number of iterations in the
35   shift-and-subtract loop significantly.
36
37   The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
38   for cvttq/c even without /sui being set.  It will not, however, properly
39   raise the exception, so we don't have to worry about FPCR_INED being clear
40   and so dying by SIGFPE.  */
41
42	.text
43	.align	4
44	.globl	__remqu
45	.type	__remqu, @funcnoplt
46	.usepv	__remqu, no
47
48	cfi_startproc
49	cfi_return_column (RA)
50__remqu:
51	lda	sp, -FRAME(sp)
52	cfi_def_cfa_offset (FRAME)
53	CALL_MCOUNT
54
55	/* Get the fp divide insn issued as quickly as possible.  After
56	   that's done, we have at least 22 cycles until its results are
57	   ready -- all the time in the world to figure out how we're
58	   going to use the results.  */
59	subq	Y, 1, AT
60	and	Y, AT, AT
61	beq	AT, $powerof2
62
63	stt	$f0, 0(sp)
64	excb
65	stt	$f1, 8(sp)
66	stt	$f3, 48(sp)
67	cfi_rel_offset ($f0, 0)
68	cfi_rel_offset ($f1, 8)
69	cfi_rel_offset ($f3, 48)
70	mf_fpcr	$f3
71
72	_ITOFT2	X, $f0, 16, Y, $f1, 24
73	cvtqt	$f0, $f0
74	cvtqt	$f1, $f1
75
76	blt	X, $x_is_neg
77	divt/c	$f0, $f1, $f0
78
79	/* Check to see if Y was mis-converted as signed value.  */
80	ldt	$f1, 8(sp)
81	blt	Y, $y_is_neg
82
83	/* Check to see if X fit in the double as an exact value.  */
84	srl	X, 53, AT
85	bne	AT, $x_big
86
87	/* If we get here, we're expecting exact results from the division.
88	   Do nothing else besides convert, compute remainder, clean up.  */
89	cvttq/c	$f0, $f0
90	excb
91	mt_fpcr	$f3
92	_FTOIT	$f0, AT, 16
93
94	mulq	AT, Y, AT
95	ldt	$f0, 0(sp)
96	ldt	$f3, 48(sp)
97	lda	sp, FRAME(sp)
98	cfi_remember_state
99	cfi_restore ($f0)
100	cfi_restore ($f1)
101	cfi_restore ($f3)
102	cfi_def_cfa_offset (0)
103
104	.align	4
105	subq	X, AT, RV
106	ret	$31, (RA), 1
107
108	.align	4
109	cfi_restore_state
110$x_is_neg:
111	/* If we get here, X is so big that bit 63 is set, which made the
112	   conversion come out negative.  Fix it up lest we not even get
113	   a good estimate.  */
114	ldah	AT, 0x5f80		/* 2**64 as float.  */
115	stt	$f2, 24(sp)
116	cfi_rel_offset ($f2, 24)
117	_ITOFS	AT, $f2, 16
118
119	.align	4
120	addt	$f0, $f2, $f0
121	unop
122	divt/c	$f0, $f1, $f0
123	unop
124
125	/* Ok, we've now the divide issued.  Continue with other checks.  */
126	ldt	$f1, 8(sp)
127	unop
128	ldt	$f2, 24(sp)
129	blt	Y, $y_is_neg
130	cfi_restore ($f1)
131	cfi_restore ($f2)
132	cfi_remember_state	/* for y_is_neg */
133
134	.align	4
135$x_big:
136	/* If we get here, X is large enough that we don't expect exact
137	   results, and neither X nor Y got mis-translated for the fp
138	   division.  Our task is to take the fp result, figure out how
139	   far it's off from the correct result and compute a fixup.  */
140	stq	t0, 16(sp)
141	stq	t1, 24(sp)
142	stq	t2, 32(sp)
143	stq	t3, 40(sp)
144	cfi_rel_offset (t0, 16)
145	cfi_rel_offset (t1, 24)
146	cfi_rel_offset (t2, 32)
147	cfi_rel_offset (t3, 40)
148
149#define Q	t0		/* quotient */
150#define R	RV		/* remainder */
151#define SY	t1		/* scaled Y */
152#define S	t2		/* scalar */
153#define QY	t3		/* Q*Y */
154
155	cvttq/c	$f0, $f0
156	_FTOIT	$f0, Q, 8
157	mulq	Q, Y, QY
158
159	.align	4
160	stq	t4, 8(sp)
161	excb
162	ldt	$f0, 0(sp)
163	mt_fpcr	$f3
164	cfi_rel_offset (t4, 8)
165	cfi_restore ($f0)
166
167	subq	QY, X, R
168	mov	Y, SY
169	mov	1, S
170	bgt	R, $q_high
171
172$q_high_ret:
173	subq	X, QY, R
174	mov	Y, SY
175	mov	1, S
176	bgt	R, $q_low
177
178$q_low_ret:
179	ldq	t4, 8(sp)
180	ldq	t0, 16(sp)
181	ldq	t1, 24(sp)
182	ldq	t2, 32(sp)
183
184	ldq	t3, 40(sp)
185	ldt	$f3, 48(sp)
186	lda	sp, FRAME(sp)
187	cfi_remember_state
188	cfi_restore (t0)
189	cfi_restore (t1)
190	cfi_restore (t2)
191	cfi_restore (t3)
192	cfi_restore (t4)
193	cfi_restore ($f3)
194	cfi_def_cfa_offset (0)
195	ret	$31, (RA), 1
196
197	.align	4
198	cfi_restore_state
199	/* The quotient that we computed was too large.  We need to reduce
200	   it by S such that Y*S >= R.  Obviously the closer we get to the
201	   correct value the better, but overshooting high is ok, as we'll
202	   fix that up later.  */
2030:
204	addq	SY, SY, SY
205	addq	S, S, S
206$q_high:
207	cmpult	SY, R, AT
208	bne	AT, 0b
209
210	subq	Q, S, Q
211	unop
212	subq	QY, SY, QY
213	br	$q_high_ret
214
215	.align	4
216	/* The quotient that we computed was too small.  Divide Y by the
217	   current remainder (R) and add that to the existing quotient (Q).
218	   The expectation, of course, is that R is much smaller than X.  */
219	/* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
220	   already have a copy of Y in SY and the value 1 in S.  */
2210:
222	addq	SY, SY, SY
223	addq	S, S, S
224$q_low:
225	cmpult	SY, R, AT
226	bne	AT, 0b
227
228	/* Shift-down and subtract loop.  Each iteration compares our scaled
229	   Y (SY) with the remainder (R); if SY <= R then X is divisible by
230	   Y's scalar (S) so add it to the quotient (Q).  */
2312:	addq	Q, S, t3
232	srl	S, 1, S
233	cmpule	SY, R, AT
234	subq	R, SY, t4
235
236	cmovne	AT, t3, Q
237	cmovne	AT, t4, R
238	srl	SY, 1, SY
239	bne	S, 2b
240
241	br	$q_low_ret
242
243	.align	4
244	cfi_restore_state
245$y_is_neg:
246	/* If we get here, Y is so big that bit 63 is set.  The results
247	   from the divide will be completely wrong.  Fortunately, the
248	   quotient must be either 0 or 1, so the remainder must be X
249	   or X-Y, so just compute it directly.  */
250	cmpule	Y, X, AT
251	excb
252	mt_fpcr	$f3
253	subq	X, Y, RV
254	ldt	$f0, 0(sp)
255	ldt	$f3, 48(sp)
256	cmoveq	AT, X, RV
257
258	lda	sp, FRAME(sp)
259	cfi_restore ($f0)
260	cfi_restore ($f3)
261	cfi_def_cfa_offset (0)
262	ret	$31, (RA), 1
263
264	.align	4
265	cfi_def_cfa_offset (FRAME)
266$powerof2:
267	subq	Y, 1, AT
268	beq	Y, DIVBYZERO
269	and	X, AT, RV
270	lda	sp, FRAME(sp)
271	cfi_def_cfa_offset (0)
272	ret	$31, (RA), 1
273
274	cfi_endproc
275	.size	__remqu, .-__remqu
276
277	DO_DIVBYZERO
278