1! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2! store difference in a third limb vector.
3!
4! Copyright (C) 1995-2022 Free Software Foundation, Inc.
5!
6! This file is part of the GNU MP Library.
7!
8! The GNU MP Library is free software; you can redistribute it and/or modify
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
11! option) any later version.
12!
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16! License for more details.
17!
18! You should have received a copy of the GNU Lesser General Public License
19! along with the GNU MP Library; see the file COPYING.LIB.  If not,
20! see <https://www.gnu.org/licenses/>.
21
22
23! INPUT PARAMETERS
24#define RES_PTR	%o0
25#define S1_PTR	%o1
26#define S2_PTR	%o2
27#define SIZE	%o3
28
29#include <sysdep.h>
30
31ENTRY(__mpn_sub_n)
32	xor	S2_PTR,RES_PTR,%g1
33	andcc	%g1,4,%g0
34	bne	LOC(1)			! branch if alignment differs
35	nop
36! **  V1a  **
37	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
38	be	LOC(v1)			! if no, branch
39	nop
40/* Add least significant limb separately to align RES_PTR and S2_PTR */
41	ld	[S1_PTR],%g4
42	add	S1_PTR,4,S1_PTR
43	ld	[S2_PTR],%g2
44	add	S2_PTR,4,S2_PTR
45	add	SIZE,-1,SIZE
46	subcc	%g4,%g2,%o4
47	st	%o4,[RES_PTR]
48	add	RES_PTR,4,RES_PTR
49LOC(v1):
50	addx	%g0,%g0,%o4		! save cy in register
51	cmp	SIZE,2			! if SIZE < 2 ...
52	bl	LOC(end2)		! ... branch to tail code
53	subcc	%g0,%o4,%g0		! restore cy
54
55	ld	[S1_PTR+0],%g4
56	addcc	SIZE,-10,SIZE
57	ld	[S1_PTR+4],%g1
58	ldd	[S2_PTR+0],%g2
59	blt	LOC(fin1)
60	subcc	%g0,%o4,%g0		! restore cy
61/* Add blocks of 8 limbs until less than 8 limbs remain */
62LOC(loop1):
63	subxcc	%g4,%g2,%o4
64	ld	[S1_PTR+8],%g4
65	subxcc	%g1,%g3,%o5
66	ld	[S1_PTR+12],%g1
67	ldd	[S2_PTR+8],%g2
68	std	%o4,[RES_PTR+0]
69	subxcc	%g4,%g2,%o4
70	ld	[S1_PTR+16],%g4
71	subxcc	%g1,%g3,%o5
72	ld	[S1_PTR+20],%g1
73	ldd	[S2_PTR+16],%g2
74	std	%o4,[RES_PTR+8]
75	subxcc	%g4,%g2,%o4
76	ld	[S1_PTR+24],%g4
77	subxcc	%g1,%g3,%o5
78	ld	[S1_PTR+28],%g1
79	ldd	[S2_PTR+24],%g2
80	std	%o4,[RES_PTR+16]
81	subxcc	%g4,%g2,%o4
82	ld	[S1_PTR+32],%g4
83	subxcc	%g1,%g3,%o5
84	ld	[S1_PTR+36],%g1
85	ldd	[S2_PTR+32],%g2
86	std	%o4,[RES_PTR+24]
87	addx	%g0,%g0,%o4		! save cy in register
88	addcc	SIZE,-8,SIZE
89	add	S1_PTR,32,S1_PTR
90	add	S2_PTR,32,S2_PTR
91	add	RES_PTR,32,RES_PTR
92	bge	LOC(loop1)
93	subcc	%g0,%o4,%g0		! restore cy
94
95LOC(fin1):
96	addcc	SIZE,8-2,SIZE
97	blt	LOC(end1)
98	subcc	%g0,%o4,%g0		! restore cy
99/* Add blocks of 2 limbs until less than 2 limbs remain */
100LOC(loope1):
101	subxcc	%g4,%g2,%o4
102	ld	[S1_PTR+8],%g4
103	subxcc	%g1,%g3,%o5
104	ld	[S1_PTR+12],%g1
105	ldd	[S2_PTR+8],%g2
106	std	%o4,[RES_PTR+0]
107	addx	%g0,%g0,%o4		! save cy in register
108	addcc	SIZE,-2,SIZE
109	add	S1_PTR,8,S1_PTR
110	add	S2_PTR,8,S2_PTR
111	add	RES_PTR,8,RES_PTR
112	bge	LOC(loope1)
113	subcc	%g0,%o4,%g0		! restore cy
114LOC(end1):
115	subxcc	%g4,%g2,%o4
116	subxcc	%g1,%g3,%o5
117	std	%o4,[RES_PTR+0]
118	addx	%g0,%g0,%o4		! save cy in register
119
120	andcc	SIZE,1,%g0
121	be	LOC(ret1)
122	subcc	%g0,%o4,%g0		! restore cy
123/* Add last limb */
124	ld	[S1_PTR+8],%g4
125	ld	[S2_PTR+8],%g2
126	subxcc	%g4,%g2,%o4
127	st	%o4,[RES_PTR+8]
128
129LOC(ret1):
130	retl
131	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
132
133LOC(1):	xor	S1_PTR,RES_PTR,%g1
134	andcc	%g1,4,%g0
135	bne	LOC(2)
136	nop
137! **  V1b  **
138	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
139	be	LOC(v1b)		! if no, branch
140	nop
141/* Add least significant limb separately to align RES_PTR and S1_PTR */
142	ld	[S2_PTR],%g4
143	add	S2_PTR,4,S2_PTR
144	ld	[S1_PTR],%g2
145	add	S1_PTR,4,S1_PTR
146	add	SIZE,-1,SIZE
147	subcc	%g2,%g4,%o4
148	st	%o4,[RES_PTR]
149	add	RES_PTR,4,RES_PTR
150LOC(v1b):
151	addx	%g0,%g0,%o4		! save cy in register
152	cmp	SIZE,2			! if SIZE < 2 ...
153	bl	LOC(end2)		! ... branch to tail code
154	subcc	%g0,%o4,%g0		! restore cy
155
156	ld	[S2_PTR+0],%g4
157	addcc	SIZE,-10,SIZE
158	ld	[S2_PTR+4],%g1
159	ldd	[S1_PTR+0],%g2
160	blt	LOC(fin1b)
161	subcc	%g0,%o4,%g0		! restore cy
162/* Add blocks of 8 limbs until less than 8 limbs remain */
163LOC(loop1b):
164	subxcc	%g2,%g4,%o4
165	ld	[S2_PTR+8],%g4
166	subxcc	%g3,%g1,%o5
167	ld	[S2_PTR+12],%g1
168	ldd	[S1_PTR+8],%g2
169	std	%o4,[RES_PTR+0]
170	subxcc	%g2,%g4,%o4
171	ld	[S2_PTR+16],%g4
172	subxcc	%g3,%g1,%o5
173	ld	[S2_PTR+20],%g1
174	ldd	[S1_PTR+16],%g2
175	std	%o4,[RES_PTR+8]
176	subxcc	%g2,%g4,%o4
177	ld	[S2_PTR+24],%g4
178	subxcc	%g3,%g1,%o5
179	ld	[S2_PTR+28],%g1
180	ldd	[S1_PTR+24],%g2
181	std	%o4,[RES_PTR+16]
182	subxcc	%g2,%g4,%o4
183	ld	[S2_PTR+32],%g4
184	subxcc	%g3,%g1,%o5
185	ld	[S2_PTR+36],%g1
186	ldd	[S1_PTR+32],%g2
187	std	%o4,[RES_PTR+24]
188	addx	%g0,%g0,%o4		! save cy in register
189	addcc	SIZE,-8,SIZE
190	add	S1_PTR,32,S1_PTR
191	add	S2_PTR,32,S2_PTR
192	add	RES_PTR,32,RES_PTR
193	bge	LOC(loop1b)
194	subcc	%g0,%o4,%g0		! restore cy
195
196LOC(fin1b):
197	addcc	SIZE,8-2,SIZE
198	blt	LOC(end1b)
199	subcc	%g0,%o4,%g0		! restore cy
200/* Add blocks of 2 limbs until less than 2 limbs remain */
201LOC(loope1b):
202	subxcc	%g2,%g4,%o4
203	ld	[S2_PTR+8],%g4
204	subxcc	%g3,%g1,%o5
205	ld	[S2_PTR+12],%g1
206	ldd	[S1_PTR+8],%g2
207	std	%o4,[RES_PTR+0]
208	addx	%g0,%g0,%o4		! save cy in register
209	addcc	SIZE,-2,SIZE
210	add	S1_PTR,8,S1_PTR
211	add	S2_PTR,8,S2_PTR
212	add	RES_PTR,8,RES_PTR
213	bge	LOC(loope1b)
214	subcc	%g0,%o4,%g0		! restore cy
215LOC(end1b):
216	subxcc	%g2,%g4,%o4
217	subxcc	%g3,%g1,%o5
218	std	%o4,[RES_PTR+0]
219	addx	%g0,%g0,%o4		! save cy in register
220
221	andcc	SIZE,1,%g0
222	be	LOC(ret1b)
223	subcc	%g0,%o4,%g0		! restore cy
224/* Add last limb */
225	ld	[S2_PTR+8],%g4
226	ld	[S1_PTR+8],%g2
227	subxcc	%g2,%g4,%o4
228	st	%o4,[RES_PTR+8]
229
230LOC(ret1b):
231	retl
232	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
233
234! **  V2  **
235/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
236   alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
237   things can be aligned (that we care about) we now know that the alignment
238   of S1_PTR and S2_PTR are the same.  */
239
240LOC(2):	cmp	SIZE,1
241	be	LOC(jone)
242	nop
243	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
244	be	LOC(v2)			! if no, branch
245	nop
246/* Add least significant limb separately to align S1_PTR and S2_PTR */
247	ld	[S1_PTR],%g4
248	add	S1_PTR,4,S1_PTR
249	ld	[S2_PTR],%g2
250	add	S2_PTR,4,S2_PTR
251	add	SIZE,-1,SIZE
252	subcc	%g4,%g2,%o4
253	st	%o4,[RES_PTR]
254	add	RES_PTR,4,RES_PTR
255
256LOC(v2):
257	addx	%g0,%g0,%o4		! save cy in register
258	addcc	SIZE,-8,SIZE
259	blt	LOC(fin2)
260	subcc	%g0,%o4,%g0		! restore cy
261/* Add blocks of 8 limbs until less than 8 limbs remain */
262LOC(loop2):
263	ldd	[S1_PTR+0],%g2
264	ldd	[S2_PTR+0],%o4
265	subxcc	%g2,%o4,%g2
266	st	%g2,[RES_PTR+0]
267	subxcc	%g3,%o5,%g3
268	st	%g3,[RES_PTR+4]
269	ldd	[S1_PTR+8],%g2
270	ldd	[S2_PTR+8],%o4
271	subxcc	%g2,%o4,%g2
272	st	%g2,[RES_PTR+8]
273	subxcc	%g3,%o5,%g3
274	st	%g3,[RES_PTR+12]
275	ldd	[S1_PTR+16],%g2
276	ldd	[S2_PTR+16],%o4
277	subxcc	%g2,%o4,%g2
278	st	%g2,[RES_PTR+16]
279	subxcc	%g3,%o5,%g3
280	st	%g3,[RES_PTR+20]
281	ldd	[S1_PTR+24],%g2
282	ldd	[S2_PTR+24],%o4
283	subxcc	%g2,%o4,%g2
284	st	%g2,[RES_PTR+24]
285	subxcc	%g3,%o5,%g3
286	st	%g3,[RES_PTR+28]
287	addx	%g0,%g0,%o4		! save cy in register
288	addcc	SIZE,-8,SIZE
289	add	S1_PTR,32,S1_PTR
290	add	S2_PTR,32,S2_PTR
291	add	RES_PTR,32,RES_PTR
292	bge	LOC(loop2)
293	subcc	%g0,%o4,%g0		! restore cy
294
295LOC(fin2):
296	addcc	SIZE,8-2,SIZE
297	blt	LOC(end2)
298	subcc	%g0,%o4,%g0		! restore cy
299LOC(loope2):
300	ldd	[S1_PTR+0],%g2
301	ldd	[S2_PTR+0],%o4
302	subxcc	%g2,%o4,%g2
303	st	%g2,[RES_PTR+0]
304	subxcc	%g3,%o5,%g3
305	st	%g3,[RES_PTR+4]
306	addx	%g0,%g0,%o4		! save cy in register
307	addcc	SIZE,-2,SIZE
308	add	S1_PTR,8,S1_PTR
309	add	S2_PTR,8,S2_PTR
310	add	RES_PTR,8,RES_PTR
311	bge	LOC(loope2)
312	subcc	%g0,%o4,%g0		! restore cy
313LOC(end2):
314	andcc	SIZE,1,%g0
315	be	LOC(ret2)
316	subcc	%g0,%o4,%g0		! restore cy
317/* Add last limb */
318LOC(jone):
319	ld	[S1_PTR],%g4
320	ld	[S2_PTR],%g2
321	subxcc	%g4,%g2,%o4
322	st	%o4,[RES_PTR]
323
324LOC(ret2):
325	retl
326	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
327
328END(__mpn_sub_n)
329