1/* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR.
2   For Intel 80x86, x>=3.
3   Copyright (C) 1994-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23#define PARMS	4+4	/* space for 1 saved reg */
24#define RTN	PARMS
25#define STR	RTN
26#define CHR	STR+4
27
28	.text
29ENTRY (__rawmemchr)
30
31	/* Save callee-safe register used in this function.  */
32	pushl %edi
33	cfi_adjust_cfa_offset (4)
34	cfi_rel_offset (edi, 0)
35
36	/* Load parameters into registers.  */
37	movl STR(%esp), %eax
38	movl CHR(%esp), %edx
39
40	/* At the moment %edx contains C.  What we need for the
41	   algorithm is C in all bytes of the dword.  Avoid
42	   operations on 16 bit words because these require an
43	   prefix byte (and one more cycle).  */
44	movb %dl, %dh		/* Now it is 0|0|c|c */
45	movl %edx, %ecx
46	shll $16, %edx		/* Now c|c|0|0 */
47	movw %cx, %dx		/* And finally c|c|c|c */
48
49	/* Better performance can be achieved if the word (32
50	   bit) memory access is aligned on a four-byte-boundary.
51	   So process first bytes one by one until boundary is
52	   reached. Don't use a loop for better performance.  */
53
54	testb $3, %al		/* correctly aligned ? */
55	je L(1)			/* yes => begin loop */
56	cmpb %dl, (%eax)	/* compare byte */
57	je L(9)			/* target found => return */
58	incl %eax		/* increment source pointer */
59
60	testb $3, %al		/* correctly aligned ? */
61	je L(1)			/* yes => begin loop */
62	cmpb %dl, (%eax)	/* compare byte */
63	je L(9)			/* target found => return */
64	incl %eax		/* increment source pointer */
65
66	testb $3, %al		/* correctly aligned ? */
67	je L(1)			/* yes => begin loop */
68	cmpb %dl, (%eax)	/* compare byte */
69	je L(9)			/* target found => return */
70	incl %eax		/* increment source pointer */
71
72      /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
73	 change any of the hole bits of LONGWORD.
74
75	 1) Is this safe?  Will it catch all the zero bytes?
76	 Suppose there is a byte with all zeros.  Any carry bits
77	 propagating from its left will fall into the hole at its
78	 least significant bit and stop.  Since there will be no
79	 carry from its most significant bit, the LSB of the
80	 byte to the left will be unchanged, and the zero will be
81	 detected.
82
83	 2) Is this worthwhile?  Will it ignore everything except
84	 zero bytes?  Suppose every byte of LONGWORD has a bit set
85	 somewhere.  There will be a carry into bit 8.	If bit 8
86	 is set, this will carry into bit 16.  If bit 8 is clear,
87	 one of bits 9-15 must be set, so there will be a carry
88	 into bit 16.  Similarly, there will be a carry into bit
89	 24.  If one of bits 24-31 is set, there will be a carry
90	 into bit 32 (=carry flag), so all of the hole bits will
91	 be changed.
92
93	 3) But wait!  Aren't we looking for C, not zero?
94	 Good point.  So what we do is XOR LONGWORD with a longword,
95	 each of whose bytes is C.  This turns each byte that is C
96	 into a zero.  */
97
98
99	/* Each round the main loop processes 16 bytes.  */
100	ALIGN (4)
101
102L(1):	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
103	movl $0xfefefeff, %edi	/* magic value */
104	xorl %edx, %ecx		/* XOR with word c|c|c|c => bytes of str == c
105				   are now 0 */
106	addl %ecx, %edi		/* add the magic value to the word.  We get
107				   carry bits reported for each byte which
108				   is *not* 0 */
109
110	/* According to the algorithm we had to reverse the effect of the
111	   XOR first and then test the overflow bits.  But because the
112	   following XOR would destroy the carry flag and it would (in a
113	   representation with more than 32 bits) not alter then last
114	   overflow, we can now test this condition.  If no carry is signaled
115	   no overflow must have occurred in the last byte => it was 0.	*/
116	jnc L(8)
117
118	/* We are only interested in carry bits that change due to the
119	   previous add, so remove original bits */
120	xorl %ecx, %edi		/* ((word^charmask)+magic)^(word^charmask) */
121
122	/* Now test for the other three overflow bits.  */
123	orl $0xfefefeff, %edi	/* set all non-carry bits */
124	incl %edi		/* add 1: if one carry bit was *not* set
125				   the addition will not result in 0.  */
126
127	/* If at least one byte of the word is C we don't get 0 in %edi.  */
128	jnz L(8)		/* found it => return pointer */
129
130	/* This process is unfolded four times for better performance.
131	   we don't increment the source pointer each time.  Instead we
132	   use offsets and increment by 16 in each run of the loop.  But
133	   before probing for the matching byte we need some extra code
134	   (following LL(13) below).  Even the len can be compared with
135	   constants instead of decrementing each time.  */
136
137	movl 4(%eax), %ecx	/* get word (= 4 bytes) in question */
138	movl $0xfefefeff, %edi	/* magic value */
139	xorl %edx, %ecx		/* XOR with word c|c|c|c => bytes of str == c
140				   are now 0 */
141	addl %ecx, %edi		/* add the magic value to the word.  We get
142				   carry bits reported for each byte which
143				   is *not* 0 */
144	jnc L(7)		/* highest byte is C => return pointer */
145	xorl %ecx, %edi		/* ((word^charmask)+magic)^(word^charmask) */
146	orl $0xfefefeff, %edi	/* set all non-carry bits */
147	incl %edi		/* add 1: if one carry bit was *not* set
148				   the addition will not result in 0.  */
149	jnz L(7)		/* found it => return pointer */
150
151	movl 8(%eax), %ecx	/* get word (= 4 bytes) in question */
152	movl $0xfefefeff, %edi	/* magic value */
153	xorl %edx, %ecx		/* XOR with word c|c|c|c => bytes of str == c
154				   are now 0 */
155	addl %ecx, %edi		/* add the magic value to the word.  We get
156				   carry bits reported for each byte which
157				   is *not* 0 */
158	jnc L(6)		/* highest byte is C => return pointer */
159	xorl %ecx, %edi		/* ((word^charmask)+magic)^(word^charmask) */
160	orl $0xfefefeff, %edi	/* set all non-carry bits */
161	incl %edi		/* add 1: if one carry bit was *not* set
162				   the addition will not result in 0.  */
163	jnz L(6)		/* found it => return pointer */
164
165	movl 12(%eax), %ecx	/* get word (= 4 bytes) in question */
166	movl $0xfefefeff, %edi	/* magic value */
167	xorl %edx, %ecx		/* XOR with word c|c|c|c => bytes of str == c
168				   are now 0 */
169	addl %ecx, %edi		/* add the magic value to the word.  We get
170				   carry bits reported for each byte which
171				   is *not* 0 */
172	jnc L(5)		/* highest byte is C => return pointer */
173	xorl %ecx, %edi		/* ((word^charmask)+magic)^(word^charmask) */
174	orl $0xfefefeff, %edi	/* set all non-carry bits */
175	incl %edi		/* add 1: if one carry bit was *not* set
176				   the addition will not result in 0.  */
177	jnz L(5)		/* found it => return pointer */
178
179	/* Adjust both counters for a full round, i.e. 16 bytes.  */
180	addl $16, %eax
181	jmp L(1)
182	/* add missing source pointer increments */
183L(5):	addl $4, %eax
184L(6):	addl $4, %eax
185L(7):	addl $4, %eax
186
187	/* Test for the matching byte in the word.  %ecx contains a NUL
188	   char in the byte which originally was the byte we are looking
189	   at.  */
190L(8):	testb %cl, %cl		/* test first byte in dword */
191	jz L(9)			/* if zero => return pointer */
192	incl %eax		/* increment source pointer */
193
194	testb %ch, %ch		/* test second byte in dword */
195	jz L(9)			/* if zero => return pointer */
196	incl %eax		/* increment source pointer */
197
198	testl $0xff0000, %ecx	/* test third byte in dword */
199	jz L(9)			/* if zero => return pointer */
200	incl %eax		/* increment source pointer */
201
202	/* No further test needed we we know it is one of the four bytes.  */
203
204L(9):
205	popl %edi		/* pop saved register */
206	cfi_adjust_cfa_offset (-4)
207	cfi_restore (edi)
208
209	ret
210END (__rawmemchr)
211
212libc_hidden_def (__rawmemchr)
213weak_alias (__rawmemchr, rawmemchr)
214