1/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
2   and store difference in a third limb vector.
3   Copyright (C) 1992-2022 Free Software Foundation, Inc.
4   This file is part of the GNU MP Library.
5
6   The GNU MP Library is free software; you can redistribute it and/or modify
7   it under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or (at your
9   option) any later version.
10
11   The GNU MP Library is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14   License for more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with the GNU MP Library; see the file COPYING.LIB.  If not,
18   see <https://www.gnu.org/licenses/>.  */
19
20#include "sysdep.h"
21#include "asm-syntax.h"
22
23#define PARMS	4+16		/* space for 4 saved regs */
24#define RES	PARMS
25#define S1	RES+4
26#define S2	S1+4
27#define SIZE	S2+4
28
29	.text
30ENTRY (__mpn_sub_n)
31
32	pushl	%edi
33	cfi_adjust_cfa_offset (4)
34	pushl	%esi
35	cfi_adjust_cfa_offset (4)
36	pushl	%ebp
37	cfi_adjust_cfa_offset (4)
38	pushl	%ebx
39	cfi_adjust_cfa_offset (4)
40
41	movl	RES(%esp),%edi
42	cfi_rel_offset (edi, 12)
43	movl	S1(%esp),%esi
44	cfi_rel_offset (esi, 8)
45	movl	S2(%esp),%ebx
46	cfi_rel_offset (ebx, 0)
47	movl	SIZE(%esp),%ecx
48	movl	(%ebx),%ebp
49	cfi_rel_offset (ebp, 4)
50
51	decl	%ecx
52	movl	%ecx,%edx
53	shrl	$3,%ecx
54	andl	$7,%edx
55	testl	%ecx,%ecx		/* zero carry flag */
56	jz	L(end)
57	pushl	%edx
58	cfi_adjust_cfa_offset (4)
59
60	ALIGN (3)
61L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
62	leal	32(%edi),%edi
63
64L(1):	movl	(%esi),%eax
65	movl	4(%esi),%edx
66	sbbl	%ebp,%eax
67	movl	4(%ebx),%ebp
68	sbbl	%ebp,%edx
69	movl	8(%ebx),%ebp
70	movl	%eax,-32(%edi)
71	movl	%edx,-28(%edi)
72
73L(2):	movl	8(%esi),%eax
74	movl	12(%esi),%edx
75	sbbl	%ebp,%eax
76	movl	12(%ebx),%ebp
77	sbbl	%ebp,%edx
78	movl	16(%ebx),%ebp
79	movl	%eax,-24(%edi)
80	movl	%edx,-20(%edi)
81
82L(3):	movl	16(%esi),%eax
83	movl	20(%esi),%edx
84	sbbl	%ebp,%eax
85	movl	20(%ebx),%ebp
86	sbbl	%ebp,%edx
87	movl	24(%ebx),%ebp
88	movl	%eax,-16(%edi)
89	movl	%edx,-12(%edi)
90
91L(4):	movl	24(%esi),%eax
92	movl	28(%esi),%edx
93	sbbl	%ebp,%eax
94	movl	28(%ebx),%ebp
95	sbbl	%ebp,%edx
96	movl	32(%ebx),%ebp
97	movl	%eax,-8(%edi)
98	movl	%edx,-4(%edi)
99
100	leal	32(%esi),%esi
101	leal	32(%ebx),%ebx
102	decl	%ecx
103	jnz	L(oop)
104
105	popl	%edx
106	cfi_adjust_cfa_offset (-4)
107L(end):
108	decl	%edx			/* test %edx w/o clobbering carry */
109	js	L(end2)
110	incl	%edx
111L(oop2):
112	leal	4(%edi),%edi
113	movl	(%esi),%eax
114	sbbl	%ebp,%eax
115	movl	4(%ebx),%ebp
116	movl	%eax,-4(%edi)
117	leal	4(%esi),%esi
118	leal	4(%ebx),%ebx
119	decl	%edx
120	jnz	L(oop2)
121L(end2):
122	movl	(%esi),%eax
123	sbbl	%ebp,%eax
124	movl	%eax,(%edi)
125
126	sbbl	%eax,%eax
127	negl	%eax
128
129	popl	%ebx
130	cfi_adjust_cfa_offset (-4)
131	cfi_restore (ebx)
132	popl	%ebp
133	cfi_adjust_cfa_offset (-4)
134	cfi_restore (ebp)
135	popl	%esi
136	cfi_adjust_cfa_offset (-4)
137	cfi_restore (esi)
138	popl	%edi
139	cfi_adjust_cfa_offset (-4)
140	cfi_restore (edi)
141
142	ret
143END (__mpn_sub_n)
144