1/* strlen -- Compute length of NUL terminated string.
2   Highly optimized version for ix86, x>=5.
3   Copyright (C) 1995-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23/* This version is especially optimized for the i586 (and following?)
24   processors.  This is mainly done by using the two pipelines.  The
25   version optimized for i486 is weak in this aspect because to get
26   as much parallelism we have to execute some *more* instructions.
27
28   The code below is structured to reflect the pairing of the instructions
29   as *I think* it is.  I have no processor data book to verify this.
30   If you find something you think is incorrect let me know.  */
31
32
33/* The magic value which is used throughout in the whole code.  */
34#define magic 0xfefefeff
35
36#define PARMS	4		/* no space for saved regs */
37#define STR	PARMS
38
39	.text
40ENTRY (strlen)
41
42	movl STR(%esp), %eax
43	movl $3, %edx		/* load mask (= 3) */
44
45	andl %eax, %edx		/* separate last two bits of address */
46
47	jz L(1)			/* aligned => start loop */
48	jp L(0)			/* exactly two bits set */
49
50	cmpb %dh, (%eax)	/* is byte NUL? */
51	je L(2)			/* yes => return */
52
53	incl %eax		/* increment pointer */
54	cmpb %dh, (%eax)	/* is byte NUL? */
55
56	je L(2)			/* yes => return */
57
58	incl %eax		/* increment pointer */
59	xorl $2, %edx
60
61	jz L(1)
62
63L(0):	cmpb %dh, (%eax)	/* is byte NUL? */
64	je L(2)			/* yes => return */
65
66	incl %eax		/* increment pointer */
67	xorl %edx, %edx		/* We need %edx == 0 for later */
68
69      /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
70	 change any of the hole bits of LONGWORD.
71
72	 1) Is this safe?  Will it catch all the zero bytes?
73	 Suppose there is a byte with all zeros.  Any carry bits
74	 propagating from its left will fall into the hole at its
75	 least significant bit and stop.  Since there will be no
76	 carry from its most significant bit, the LSB of the
77	 byte to the left will be unchanged, and the zero will be
78	 detected.
79
80	 2) Is this worthwhile?  Will it ignore everything except
81	 zero bytes?  Suppose every byte of LONGWORD has a bit set
82	 somewhere.  There will be a carry into bit 8.	If bit 8
83	 is set, this will carry into bit 16.  If bit 8 is clear,
84	 one of bits 9-15 must be set, so there will be a carry
85	 into bit 16.  Similarly, there will be a carry into bit
86	 24.  If one of bits 24-31 is set, there will be a carry
87	 into bit 32 (=carry flag), so all of the hole bits will
88	 be changed.
89
90	 Note: %edx == 0 in any case here.  */
91
92L(1):
93	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
94	addl $4, %eax		/* adjust pointer for *next* word */
95
96	subl %ecx, %edx		/* first step to negate word */
97	addl $magic, %ecx	/* add magic word */
98
99	decl %edx		/* complete negation of word */
100	jnc L(3)		/* previous addl caused overflow? */
101
102	xorl %ecx, %edx		/* (word+magic)^word */
103
104	andl $~magic, %edx	/* any of the carry flags set? */
105
106	jne L(3)		/* yes => determine byte */
107
108
109	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
110	addl $4, %eax		/* adjust pointer for *next* word */
111
112	subl %ecx, %edx		/* first step to negate word */
113	addl $magic, %ecx	/* add magic word */
114
115	decl %edx		/* complete negation of word */
116	jnc L(3)		/* previous addl caused overflow? */
117
118	xorl %ecx, %edx		/* (word+magic)^word */
119
120	andl $~magic, %edx	/* any of the carry flags set? */
121
122	jne L(3)		/* yes => determine byte */
123
124
125	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
126	addl $4, %eax		/* adjust pointer for *next* word */
127
128	subl %ecx, %edx		/* first step to negate word */
129	addl $magic, %ecx	/* add magic word */
130
131	decl %edx		/* complete negation of word */
132	jnc L(3)		/* previous addl caused overflow? */
133
134	xorl %ecx, %edx		/* (word+magic)^word */
135
136	andl $~magic, %edx	/* any of the carry flags set? */
137
138	jne L(3)		/* yes => determine byte */
139
140
141	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
142	addl $4, %eax		/* adjust pointer for *next* word */
143
144	subl %ecx, %edx		/* first step to negate word */
145	addl $magic, %ecx	/* add magic word */
146
147	decl %edx		/* complete negation of word */
148	jnc L(3)		/* previous addl caused overflow? */
149
150	xorl %ecx, %edx		/* (word+magic)^word */
151
152	andl $~magic, %edx	/* any of the carry flags set? */
153
154	je L(1)			/* no => start loop again */
155
156
157L(3):	subl $4, %eax		/* correct too early pointer increment */
158	subl $magic, %ecx
159
160	cmpb $0, %cl		/* lowest byte NUL? */
161	jz L(2)			/* yes => return */
162
163	inc %eax		/* increment pointer */
164	testb %ch, %ch		/* second byte NUL? */
165
166	jz L(2)			/* yes => return */
167
168	shrl $16, %ecx		/* make upper bytes accessible */
169	incl %eax		/* increment pointer */
170
171	cmpb $0, %cl		/* is third byte NUL? */
172	jz L(2)			/* yes => return */
173
174	incl %eax		/* increment pointer */
175
176L(2):	subl STR(%esp), %eax	/* now compute the length as difference
177				   between start and terminating NUL
178				   character */
179	ret
180END (strlen)
181libc_hidden_builtin_def (strlen)
182