1/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2   sum in a third limb vector.
3   Copyright (C) 2006-2022 Free Software Foundation, Inc.
4   This file is part of the GNU MP Library.
5
6   The GNU MP Library is free software; you can redistribute it and/or modify
7   it under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or (at your
9   option) any later version.
10
11   The GNU MP Library is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14   License for more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with the GNU MP Library; see the file COPYING.LIB.  If not,
18   see <https://www.gnu.org/licenses/>.  */
19
20#include "sysdep.h"
21#include "asm-syntax.h"
22
23#define rp	%rdi
24#define up	%rsi
25#define vp	%rdx
26#define n	%rcx
27#define cy	%r8
28
29#ifndef func
30# define func __mpn_add_n
31# define ADCSBB adc
32#endif
33
34	.text
35ENTRY (func)
36	xor	%r8, %r8
37	mov	(up), %r10
38	mov	(vp), %r11
39
40	lea	-8(up,n,8), up
41	lea	-8(vp,n,8), vp
42	lea	-16(rp,n,8), rp
43	mov	%ecx, %eax
44	neg	n
45	and	$3, %eax
46	je	L(b00)
47	add	%rax, n		/* clear low rcx bits for jrcxz */
48	cmp	$2, %eax
49	jl	L(b01)
50	je	L(b10)
51
52L(b11):	shr	%r8		/* set cy */
53	jmp	L(e11)
54
55L(b00):	shr	%r8		/* set cy */
56	mov	%r10, %r8
57	mov	%r11, %r9
58	lea	4(n), n
59	jmp	L(e00)
60
61L(b01):	shr	%r8		/* set cy */
62	jmp	L(e01)
63
64L(b10):	shr	%r8		/* set cy */
65	mov	%r10, %r8
66	mov	%r11, %r9
67	jmp	L(e10)
68
69L(end):	ADCSBB	%r11, %r10
70	mov	%r10, 8(rp)
71	mov	%ecx, %eax	/* clear eax, ecx contains 0 */
72	adc	%eax, %eax
73	ret
74
75	.p2align 4
76L(top):
77	mov	-24(up,n,8), %r8
78	mov	-24(vp,n,8), %r9
79	ADCSBB	%r11, %r10
80	mov	%r10, -24(rp,n,8)
81L(e00):
82	mov	-16(up,n,8), %r10
83	mov	-16(vp,n,8), %r11
84	ADCSBB	%r9, %r8
85	mov	%r8, -16(rp,n,8)
86L(e11):
87	mov	-8(up,n,8), %r8
88	mov	-8(vp,n,8), %r9
89	ADCSBB	%r11, %r10
90	mov	%r10, -8(rp,n,8)
91L(e10):
92	mov	(up,n,8), %r10
93	mov	(vp,n,8), %r11
94	ADCSBB	%r9, %r8
95	mov	%r8, (rp,n,8)
96L(e01):
97	jrcxz	L(end)
98	lea	4(n), n
99	jmp	L(top)
100END (func)
101