1! SPARC v9 32-bit __mpn_submul_1 -- Multiply a limb vector with a limb
2! and subtract the result from a second limb vector.
3!
4! Copyright (C) 2013-2022 Free Software Foundation, Inc.
5! This file is part of the GNU C Library.
6!
7! The GNU C Library is free software; you can redistribute it and/or
8! modify it under the terms of the GNU Lesser General Public
9! License as published by the Free Software Foundation; either
10! version 2.1 of the License, or (at your option) any later version.
11!
12! The GNU C Library is distributed in the hope that it will be useful,
13! but WITHOUT ANY WARRANTY; without even the implied warranty of
14! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15! Lesser General Public License for more details.
16!
17! You should have received a copy of the GNU Lesser General Public
18! License along with the GNU C Library; if not, see
19! <https://www.gnu.org/licenses/>.
20
21#include <sysdep.h>
22
23#define res_ptr	%i0
24#define s1_ptr	%i1
25#define sz_arg	%i2
26#define s2l_arg	%i3
27#define sz	%o4
28#define carry	%o5
29#define s2_limb	%g1
30#define tmp1	%l0
31#define tmp2	%l1
32#define tmp3	%l2
33#define tmp4	%l3
34#define tmp64_1	%g3
35#define tmp64_2	%o3
36
37ENTRY(__mpn_submul_1)
38	save	%sp, -96, %sp
39	srl	sz_arg, 0, sz
40	srl	s2l_arg, 0, s2_limb
41	subcc	sz, 1, sz
42	be,pn	%icc, .Lfinal_limb
43	 subcc	%g0, 0, carry
44
45.Lloop:
46	lduw	[s1_ptr  + 0x00], tmp1
47	lduw	[res_ptr + 0x00], tmp3
48	lduw	[s1_ptr  + 0x04], tmp2
49	lduw	[res_ptr + 0x04], tmp4
50	mulx	tmp1, s2_limb, tmp64_1
51	add	s1_ptr, 8, s1_ptr
52	mulx	tmp2, s2_limb, tmp64_2
53	sub	sz, 2, sz
54	add	res_ptr, 8, res_ptr
55	addx	carry, tmp64_1, tmp64_1
56	srlx	tmp64_1, 32, carry
57	subcc	tmp3, tmp64_1, tmp64_1
58	stw	tmp64_1, [res_ptr - 0x08]
59	addx	carry, tmp64_2, tmp64_2
60	srlx	tmp64_2, 32, carry
61	subcc	tmp4, tmp64_2, tmp64_2
62	brgz	sz, .Lloop
63	 stw	tmp64_2, [res_ptr - 0x04]
64
65	brlz,pt	sz, .Lfinish
66	 nop
67
68.Lfinal_limb:
69	lduw	[s1_ptr + 0x00], tmp1
70	lduw	[res_ptr + 0x00], tmp3
71	mulx	tmp1, s2_limb, tmp64_1
72	addx	carry, tmp64_1, tmp64_1
73	srlx	tmp64_1, 32, carry
74	subcc	tmp3, tmp64_1, tmp64_1
75	stw	tmp64_1, [res_ptr + 0x00]
76
77.Lfinish:
78	addx	carry, 0, carry
79	jmpl	%i7 + 0x8, %g0
80	 restore carry, 0, %o0
81END(__mpn_submul_1)
82