1/* Compare two strings for differences.
2   For SPARC v9.
3   Copyright (C) 2011-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22
23#ifndef XCC
24	.register	%g2, #scratch
25	.register	%g3, #scratch
26	.register	%g6, #scratch
27#endif
28
29#define rSTR1		%o0
30#define rSTR2		%o1
31#define r0101		%o2	/* 0x0101010101010101 */
32#define r8080		%o3	/* 0x8080808080808080 */
33#define rSTRXOR		%o4
34#define rWORD1		%o5
35#define rTMP1		%g1
36#define rTMP2		%g2
37#define rWORD2		%g3
38#define rSLL		%g4
39#define rSRL		%g5
40#define rBARREL		%g6
41
42	/* There are two cases, either the two pointers are aligned
43	 * identically or they are not.  If they have the same
44	 * alignment we can use the normal full speed loop.  Otherwise
45	 * we have to use the barrel-shifter version.
46	 */
47
48	.text
49	.align	32
50ENTRY(strcmp)
51	or	rSTR2, rSTR1, rTMP1
52	sethi	%hi(0x80808080), r8080
53
54	andcc	rTMP1, 0x7, %g0
55	bne,pn	%icc, .Lmaybe_barrel_shift
56	 or	r8080, %lo(0x80808080), r8080
57	ldx	[rSTR1], rWORD1
58
59	sub	rSTR2, rSTR1, rSTR2
60	sllx	r8080, 32, rTMP1
61
62	ldx	[rSTR1 + rSTR2], rWORD2
63	or	r8080, rTMP1, r8080
64
65	ba,pt	%xcc, .Laligned_loop_entry
66	 srlx	r8080, 7, r0101
67
68	.align	32
69.Laligned_loop_entry:
70.Laligned_loop:
71	add	rSTR1, 8, rSTR1
72
73	sub	rWORD1, r0101, rTMP2
74	xorcc	rWORD1, rWORD2, rSTRXOR
75	bne,pn	%xcc, .Lcommon_endstring
76
77	 andn	r8080, rWORD1, rTMP1
78
79	ldxa	[rSTR1] ASI_PNF, rWORD1
80	andcc	rTMP1, rTMP2, %g0
81	be,a,pt	%xcc, .Laligned_loop
82
83	 ldxa	[rSTR1 + rSTR2] ASI_PNF, rWORD2
84
85.Lcommon_equal:
86	retl
87	 mov	0, %o0
88
89	/* All loops terminate here once they find an unequal word.
90	 * If a zero byte appears in the word before the first unequal
91	 * byte, we must report zero.  Otherwise we report '1' or '-1'
92	 * depending upon whether the first mis-matching byte is larger
93	 * in the first string or the second, respectively.
94	 *
95	 * First we compute a 64-bit mask value that has "0x01" in
96	 * each byte where a zero exists in rWORD1.  rSTRXOR holds the
97	 * value (rWORD1 ^ rWORD2).  Therefore, if considered as an
98	 * unsigned quantity, our "0x01" mask value is "greater than"
99	 * rSTRXOR then a zero terminating byte comes first and
100	 * therefore we report '0'.
101	 *
102	 * The formula for this mask is:
103	 *
104	 *    mask_tmp1 = ~rWORD1 & 0x8080808080808080;
105	 *    mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) +
106	 *                 0x7f7f7f7f7f7f7f7f);
107	 *
108	 *    mask = ((mask_tmp1 & ~mask_tmp2) >> 7);
109	 */
110.Lcommon_endstring:
111	andn	rWORD1, r8080, rTMP2
112	or	r8080, 1, %o1
113
114	mov	1, %o0
115	sub	rTMP2, %o1, rTMP2
116
117	cmp	rWORD1, rWORD2
118	andn	rTMP1, rTMP2, rTMP1
119
120	movleu	%xcc, -1, %o0
121	srlx	rTMP1, 7, rTMP1
122
123	/* In order not to be influenced by bytes after the zero byte, we
124	 * have to retain only the highest bit in the mask for the comparison
125	 * with rSTRXOR to work properly.
126	 */
127	mov	0, rTMP2
128	andcc	rTMP1, 0x0100, %g0
129
130	movne	%xcc, 8, rTMP2
131	sllx	rTMP1, 63 - 16, %o1
132
133	movrlz	%o1, 16, rTMP2
134	sllx	rTMP1, 63 - 24, %o1
135
136	movrlz	%o1, 24, rTMP2
137	sllx	rTMP1, 63 - 32, %o1
138
139	movrlz	%o1, 32, rTMP2
140	sllx	rTMP1, 63 - 40, %o1
141
142	movrlz	%o1, 40, rTMP2
143	sllx	rTMP1, 63 - 48, %o1
144
145	movrlz	%o1, 48, rTMP2
146	sllx	rTMP1, 63 - 56, %o1
147
148	movrlz	%o1, 56, rTMP2
149
150	srlx	rTMP1, rTMP2, rTMP1
151
152	sllx	rTMP1, rTMP2, rTMP1
153
154	cmp	rTMP1, rSTRXOR
155	retl
156	 movgu	%xcc, 0, %o0
157
158.Lmaybe_barrel_shift:
159	sub	rSTR2, rSTR1, rSTR2
160	sllx	r8080, 32, rTMP1
161
162	or	r8080, rTMP1, r8080
163	and	rSTR1, 0x7, rTMP2
164
165	srlx	r8080, 7, r0101
166	andn	rSTR1, 0x7, rSTR1
167
168	ldxa	[rSTR1] ASI_PNF, rWORD1
169	andcc	rSTR2, 0x7, rSLL
170	sll	rTMP2, 3, rSTRXOR
171
172	bne,pn	%icc, .Lneed_barrel_shift
173	 mov	-1, rTMP1
174	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
175
176	srlx	rTMP1, rSTRXOR, rTMP2
177
178	orn	rWORD1, rTMP2, rWORD1
179	ba,pt	%xcc, .Laligned_loop_entry
180	 orn	rBARREL, rTMP2, rWORD2
181
182.Lneed_barrel_shift:
183	sllx	rSLL, 3, rSLL
184	andn	rSTR2, 0x7, rSTR2
185
186	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
187	mov	64, rTMP2
188	sub	rTMP2, rSLL, rSRL
189
190	srlx	rTMP1, rSTRXOR, rTMP1
191	add	rSTR2, 8, rSTR2
192
193	orn	rWORD1, rTMP1, rWORD1
194	sllx	rBARREL, rSLL, rWORD2
195	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
196
197	add	rSTR1, 8, rSTR1
198	sub	rWORD1, r0101, rTMP2
199
200	srlx	rBARREL, rSRL, rSTRXOR
201
202	or	rWORD2, rSTRXOR, rWORD2
203
204	orn	rWORD2, rTMP1, rWORD2
205	ba,pt	%xcc, .Lbarrel_shift_loop_entry
206	 andn	r8080, rWORD1, rTMP1
207
208.Lbarrel_shift_loop:
209	sllx	rBARREL, rSLL, rWORD2
210	ldxa	[rSTR1 + rSTR2] ASI_PNF, rBARREL
211
212	add	rSTR1, 8, rSTR1
213	sub	rWORD1, r0101, rTMP2
214
215	srlx	rBARREL, rSRL, rSTRXOR
216	andn	r8080, rWORD1, rTMP1
217
218	or	rWORD2, rSTRXOR, rWORD2
219
220.Lbarrel_shift_loop_entry:
221	xorcc	rWORD1, rWORD2, rSTRXOR
222	bne,pn	%xcc, .Lcommon_endstring
223
224	 andcc	rTMP1, rTMP2, %g0
225	be,a,pt	%xcc, .Lbarrel_shift_loop
226	 ldxa	[rSTR1] ASI_PNF, rWORD1
227
228	retl
229	 mov	0, %o0
230END(strcmp)
231libc_hidden_builtin_def (strcmp)
232