1! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
2! store the product in a second limb vector.
3
4! Copyright (C) 1992-2022 Free Software Foundation, Inc.
5
6! This file is part of the GNU MP Library.
7
8! The GNU MP Library is free software; you can redistribute it and/or modify
9! it under the terms of the GNU Lesser General Public License as published by
10! the Free Software Foundation; either version 2.1 of the License, or (at your
11! option) any later version.
12
13! The GNU MP Library is distributed in the hope that it will be useful, but
14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16! License for more details.
17
18! You should have received a copy of the GNU Lesser General Public License
19! along with the GNU MP Library; see the file COPYING.LIB.  If not,
20! see <https://www.gnu.org/licenses/>.
21
22
23! INPUT PARAMETERS
24! res_ptr	o0
25! s1_ptr	o1
26! size		o2
27! s2_limb	o3
28
29#include <sysdep.h>
30
31ENTRY(__mpn_mul_1)
32	sll	%o2,4,%g1
33	mov	%o7,%g4			! Save return address register
34	and	%g1,(4-1)<<4,%g1
351:	call	2f
36	 add	%o7,3f-1b,%g3
372:	mov	%g4,%o7			! Restore return address register
38	jmp	%g3+%g1
39	 ld	[%o1+0],%o4	! 1
40
41	.align	4
423:
43LOC(00):
44	add	%o0,-4,%o0
45	add	%o1,-4,%o1
46	b	LOC(loop00)		/* 4, 8, 12, ... */
47	 orcc	%g0,%g0,%g2
48LOC(01):
49	b	LOC(loop01)		/* 1, 5, 9, ... */
50	 orcc	%g0,%g0,%g2
51	nop
52	nop
53LOC(10):
54	add	%o0,-12,%o0	/* 2, 6, 10, ... */
55	add	%o1,4,%o1
56	b	LOC(loop10)
57	 orcc	%g0,%g0,%g2
58	nop
59LOC(11):
60	add	%o0,-8,%o0	/* 3, 7, 11, ... */
61	add	%o1,-8,%o1
62	b	LOC(loop11)
63	 orcc	%g0,%g0,%g2
64
65LOC(loop):
66	addcc	%g3,%g2,%g3	! 1
67	ld	[%o1+4],%o4	! 2
68	st	%g3,[%o0+0]	! 1
69	rd	%y,%g2		! 1
70LOC(loop00):
71	umul	%o4,%o3,%g3	! 2
72	addxcc	%g3,%g2,%g3	! 2
73	ld	[%o1+8],%o4	! 3
74	st	%g3,[%o0+4]	! 2
75	rd	%y,%g2		! 2
76LOC(loop11):
77	umul	%o4,%o3,%g3	! 3
78	addxcc	%g3,%g2,%g3	! 3
79	ld	[%o1+12],%o4	! 4
80	add	%o1,16,%o1
81	st	%g3,[%o0+8]	! 3
82	rd	%y,%g2		! 3
83LOC(loop10):
84	umul	%o4,%o3,%g3	! 4
85	addxcc	%g3,%g2,%g3	! 4
86	ld	[%o1+0],%o4	! 1
87	st	%g3,[%o0+12]	! 4
88	add	%o0,16,%o0
89	rd	%y,%g2		! 4
90	addx	%g0,%g2,%g2
91LOC(loop01):
92	addcc	%o2,-4,%o2
93	bg	LOC(loop)
94	 umul	%o4,%o3,%g3	! 1
95
96	addcc	%g3,%g2,%g3	! 4
97	st	%g3,[%o0+0]	! 4
98	rd	%y,%g2		! 4
99	retl
100	 addx	%g0,%g2,%o0
101
102END(__mpn_mul_1)
103