1! SPARC v9 64-bit VIS3 __mpn_addmul_1 -- Multiply a limb vector with a
2! limb and add the result to a second limb vector.
3!
4! Copyright (C) 2013-2022 Free Software Foundation, Inc.
5! This file is part of the GNU C Library.
6!
7! The GNU C Library is free software; you can redistribute it and/or
8! modify it under the terms of the GNU Lesser General Public
9! License as published by the Free Software Foundation; either
10! version 2.1 of the License, or (at your option) any later version.
11!
12! The GNU C Library is distributed in the hope that it will be useful,
13! but WITHOUT ANY WARRANTY; without even the implied warranty of
14! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15! Lesser General Public License for more details.
16!
17! You should have received a copy of the GNU Lesser General Public
18! License along with the GNU C Library; if not, see
19! <https://www.gnu.org/licenses/>.
20
21#include <sysdep.h>
22
23#define res_ptr	%i0
24#define s1_ptr	%i1
25#define sz	%i2
26#define s2_limb	%i3
27#define carry	%o5
28#define tmp1	%g1
29#define tmp2	%g2
30#define tmp3	%g3
31#define tmp4	%o4
32#define tmp5	%l0
33#define tmp6	%l1
34#define tmp7	%l2
35#define tmp8	%l3
36
37	.register	%g2,#scratch
38	.register	%g3,#scratch
39ENTRY(__mpn_addmul_1_vis3)
40	save	%sp, -176, %sp
41	subcc	sz, 1, sz
42	be	.Lfinal_limb
43	 clr	carry
44
45.Lloop:
46	ldx	[s1_ptr  + 0x00], tmp1
47	ldx	[res_ptr + 0x00], tmp3
48	ldx	[s1_ptr  + 0x08], tmp2
49	ldx	[res_ptr + 0x08], tmp4
50	mulx	tmp1, s2_limb, tmp5
51	add	s1_ptr, 0x10, s1_ptr
52	umulxhi	tmp1, s2_limb, tmp6
53	add	res_ptr, 0x10, res_ptr
54	mulx	tmp2, s2_limb, tmp7
55	sub	sz, 2, sz
56	umulxhi	tmp2, s2_limb, tmp8
57	addcc	carry, tmp5, tmp5
58	addxc	%g0, tmp6, carry
59	addcc	tmp3, tmp5, tmp5
60	addxc	%g0, carry, carry
61	stx	tmp5, [res_ptr - 0x10]
62	addcc	carry, tmp7, tmp7
63	addxc	%g0, tmp8, carry
64	addcc	tmp4, tmp7, tmp7
65	addxc	%g0, carry, carry
66	brgz	sz, .Lloop
67	 stx	tmp7, [res_ptr - 0x08]
68
69	brlz,pt	sz, .Lfinish
70	 nop
71
72.Lfinal_limb:
73	ldx	[s1_ptr  + 0x00], tmp1
74	ldx	[res_ptr + 0x00], tmp3
75	mulx	tmp1, s2_limb, tmp5
76	umulxhi	tmp1, s2_limb, tmp6
77	addcc	carry, tmp5, tmp5
78	addxc	%g0, tmp6, carry
79	addcc	tmp3, tmp5, tmp5
80	addxc	%g0, carry, carry
81	stx	tmp5, [res_ptr + 0x00]
82
83.Lfinish:
84	jmpl	%i7 + 8, %g0
85	 restore carry, 0, %o0
86END(__mpn_addmul_1_vis3)
87