1/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
2   the result to a second limb vector.
3   Copyright (C) 1999-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21
22#ifdef USE_AS_SUBMUL
23# define FUNC        __mpn_submul_1
24# define ADDSUBC     subfe
25# define ADDSUB      subfc
26#else
27# define FUNC        __mpn_addmul_1
28# define ADDSUBC     adde
29# define ADDSUB      addc
30#endif
31
32#define RP  r3
33#define UP  r4
34#define N   r5
35#define VL  r6
36
37#define R27SAVE  (-40)
38#define R28SAVE  (-32)
39#define R29SAVE  (-24)
40#define R30SAVE  (-16)
41#define R31SAVE  (-8)
42
43ENTRY_TOCLESS (FUNC, 5)
44	std	r31, R31SAVE(r1)
45	rldicl.	r0, N, 0, 62
46	std	r30, R30SAVE(r1)
47	cmpdi	VL, r0, 2
48	std	r29, R29SAVE(r1)
49	addi	N, N, 3
50	std	r28, R28SAVE(r1)
51	srdi	N, N, 2
52	std	r27, R27SAVE(r1)
53	cfi_offset(r31, R31SAVE)
54	cfi_offset(r30, R30SAVE)
55	cfi_offset(r29, R29SAVE)
56	cfi_offset(r28, R28SAVE)
57	cfi_offset(r27, R27SAVE)
58	mtctr	N
59	beq	cr0, L(b00)
60	blt	cr6, L(b01)
61	beq	cr6, L(b10)
62
63L(b11):	ld	r9, 0(UP)
64	ld	r28, 0(RP)
65	mulld	r0, r9, VL
66	mulhdu	r12, r9, VL
67	ADDSUB	r0, r0, r28
68	std	r0, 0(RP)
69	addi	RP, RP, 8
70	ld	r9, 8(UP)
71	ld	r27, 16(UP)
72	addi	UP, UP, 24
73#ifdef USE_AS_SUBMUL
74	subfe	r11, r11, r11
75#endif
76	b	L(bot)
77
78	.align	4
79L(b00):	ld	r9, 0(UP)
80	ld	r27, 8(UP)
81	ld	r28, 0(RP)
82	ld	r29, 8(RP)
83	mulld	r0, r9, VL
84	mulhdu	N, r9, VL
85	mulld	r7, r27, VL
86	mulhdu	r8, r27, VL
87	addc	r7, r7, N
88	addze	r12, r8
89	ADDSUB	r0, r0, r28
90	std	r0, 0(RP)
91	ADDSUBC	r7, r7, r29
92	std	r7, 8(RP)
93	addi	RP, RP, 16
94	ld	r9, 16(UP)
95	ld	r27, 24(UP)
96	addi	UP, UP, 32
97#ifdef USE_AS_SUBMUL
98	subfe	r11, r11, r11
99#endif
100	b	L(bot)
101
102	.align	4
103L(b01):	bdnz	L(gt1)
104	ld	r9, 0(UP)
105	ld	r11, 0(RP)
106	mulld	r0, r9, VL
107	mulhdu	r8, r9, VL
108	ADDSUB	r0, r0, r11
109	std	r0, 0(RP)
110#ifdef USE_AS_SUBMUL
111	subfe	r11, r11, r11
112	addic	r11, r11, 1
113#endif
114	addze	RP, r8
115	blr
116
117L(gt1):	ld	r9, 0(UP)
118	ld	r27, 8(UP)
119	mulld	r0, r9, VL
120	mulhdu	N, r9, VL
121	mulld	r7, r27, VL
122	mulhdu	r8, r27, VL
123	ld	r9, 16(UP)
124	ld	r28, 0(RP)
125	ld	r29, 8(RP)
126	ld	r30, 16(RP)
127	mulld	r11, r9, VL
128	mulhdu	r10, r9, VL
129	addc	r7, r7, N
130	adde	r11, r11, r8
131	addze	r12, r10
132	ADDSUB	r0, r0, r28
133	std	r0, 0(RP)
134	ADDSUBC	r7, r7, r29
135	std	r7, 8(RP)
136	ADDSUBC	r11, r11, r30
137	std	r11, 16(RP)
138	addi	RP, RP, 24
139	ld	r9, 24(UP)
140	ld	r27, 32(UP)
141	addi	UP, UP, 40
142#ifdef USE_AS_SUBMUL
143	subfe	r11, r11, r11
144#endif
145	b	L(bot)
146
147L(b10):	addic	r0, r0, r0
148	li	r12, 0
149	ld	r9, 0(UP)
150	ld	r27, 8(UP)
151	bdz	L(end)
152	addi	UP, UP, 16
153
154	.align	4
155L(top):	mulld	r0, r9, VL
156	mulhdu	N, r9, VL
157	mulld	r7, r27, VL
158	mulhdu	r8, r27, VL
159	ld	r9, 0(UP)
160	ld	r28, 0(RP)
161	ld	r27, 8(UP)
162	ld	r29, 8(RP)
163	adde	r0, r0, r12
164	adde	r7, r7, N
165	mulld	N, r9, VL
166	mulhdu	r10, r9, VL
167	mulld	r11, r27, VL
168	mulhdu	r12, r27, VL
169	ld	r9, 16(UP)
170	ld	r30, 16(RP)
171	ld	r27, 24(UP)
172	ld	r31, 24(RP)
173	adde	N, N, r8
174	adde	r11, r11, r10
175	addze	r12, r12
176	ADDSUB	r0, r0, r28
177	std	r0, 0(RP)
178	ADDSUBC	r7, r7, r29
179	std	r7, 8(RP)
180	ADDSUBC	N, N, r30
181	std	N, 16(RP)
182	ADDSUBC	r11, r11, r31
183	std	r11, 24(RP)
184	addi	UP, UP, 32
185#ifdef USE_AS_SUBMUL
186	subfe	r11, r11, r11
187#endif
188	addi	RP, RP, 32
189L(bot):
190#ifdef USE_AS_SUBMUL
191	addic	r11, r11, 1
192#endif
193	bdnz	L(top)
194
195L(end):	mulld	r0, r9, VL
196	mulhdu	N, r9, VL
197	mulld	r7, r27, VL
198	mulhdu	r8, r27, VL
199	ld	r28, 0(RP)
200	ld	r29, 8(RP)
201	adde	r0, r0, r12
202	adde	r7, r7, N
203	addze	r8, r8
204	ADDSUB	r0, r0, r28
205	std	r0, 0(RP)
206	ADDSUBC	r7, r7, r29
207	std	r7, 8(RP)
208#ifdef USE_AS_SUBMUL
209	subfe	r11, r11, r11
210	addic	r11, r11, 1
211#endif
212	addze	RP, r8
213	ld	r31, R31SAVE(r1)
214	ld	r30, R30SAVE(r1)
215	ld	r29, R29SAVE(r1)
216	ld	r28, R28SAVE(r1)
217	ld	r27, R27SAVE(r1)
218	blr
219END(FUNC)
220