1/* strlen(str) -- determine the length of the string STR.
2   Optimized for Intel 80x86, x>=4.
3   Copyright (C) 1991-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23#define PARMS	4		/* no space for saved regs */
24#define STR	PARMS
25
26	.text
27ENTRY (strlen)
28
29	movl STR(%esp), %ecx
30	movl %ecx, %eax		/* duplicate it */
31
32	andl $3, %ecx		/* mask alignment bits */
33	jz L(1)			/* aligned => start loop */
34	cmpb %ch, (%eax)	/* is byte NUL? */
35	je L(2)			/* yes => return */
36	incl %eax		/* increment pointer */
37
38	xorl $3, %ecx		/* was alignment = 3? */
39	jz L(1)			/* yes => now it is aligned and start loop */
40	cmpb %ch, (%eax)	/* is byte NUL? */
41	je L(2)			/* yes => return */
42	addl $1, %eax		/* increment pointer */
43
44	subl $1, %ecx		/* was alignment = 2? */
45	jz L(1)			/* yes => now it is aligned and start loop */
46	cmpb %ch, (%eax)	/* is byte NUL? */
47	je L(2)			/* yes => return */
48
49/* Don't change the above `addl $1,%eax' and `subl $1, %ecx' into `incl %eax'
50   and `decl %ecx' resp.  The additional two byte per instruction make the
51   label 4 to be aligned on a 16 byte boundary with nops.
52
53   The following `sub $15, %eax' is part of this trick, too.  Together with
54   the next instruction (`addl $16, %eax') it is in fact a `incl %eax', just
55   as expected from the algorithm.  But doing so has the advantage that
56   no jump to label 1 is necessary and so the pipeline is not flushed.  */
57
58	subl $15, %eax		/* effectively +1 */
59
60
61L(4):	addl $16, %eax		/* adjust pointer for full loop */
62
63L(1):	movl (%eax), %ecx	/* get word (= 4 bytes) in question */
64	movl $0xfefefeff, %edx	/* magic value */
65	addl %ecx, %edx		/* add the magic value to the word.  We get
66				   carry bits reported for each byte which
67				   is *not* 0 */
68	jnc L(3)		/* highest byte is NUL => return pointer */
69	xorl %ecx, %edx		/* (word+magic)^word */
70	orl $0xfefefeff, %edx	/* set all non-carry bits */
71	incl %edx		/* add 1: if one carry bit was *not* set
72				   the addition will not result in 0.  */
73	jnz L(3)		/* found NUL => return pointer */
74
75	movl 4(%eax), %ecx	/* get word (= 4 bytes) in question */
76	movl $0xfefefeff, %edx	/* magic value */
77	addl %ecx, %edx		/* add the magic value to the word.  We get
78				   carry bits reported for each byte which
79				   is *not* 0 */
80	jnc L(5)		/* highest byte is NUL => return pointer */
81	xorl %ecx, %edx		/* (word+magic)^word */
82	orl $0xfefefeff, %edx	/* set all non-carry bits */
83	incl %edx		/* add 1: if one carry bit was *not* set
84				   the addition will not result in 0.  */
85	jnz L(5)		/* found NUL => return pointer */
86
87	movl 8(%eax), %ecx	/* get word (= 4 bytes) in question */
88	movl $0xfefefeff, %edx	/* magic value */
89	addl %ecx, %edx		/* add the magic value to the word.  We get
90				   carry bits reported for each byte which
91				   is *not* 0 */
92	jnc L(6)		/* highest byte is NUL => return pointer */
93	xorl %ecx, %edx		/* (word+magic)^word */
94	orl $0xfefefeff, %edx	/* set all non-carry bits */
95	incl %edx		/* add 1: if one carry bit was *not* set
96				   the addition will not result in 0.  */
97	jnz L(6)		/* found NUL => return pointer */
98
99	movl 12(%eax), %ecx	/* get word (= 4 bytes) in question */
100	movl $0xfefefeff, %edx	/* magic value */
101	addl %ecx, %edx		/* add the magic value to the word.  We get
102				   carry bits reported for each byte which
103				   is *not* 0 */
104	jnc L(7)		/* highest byte is NUL => return pointer */
105	xorl %ecx, %edx		/* (word+magic)^word */
106	orl $0xfefefeff, %edx	/* set all non-carry bits */
107	incl %edx		/* add 1: if one carry bit was *not* set
108				   the addition will not result in 0.  */
109	jz L(4)			/* no NUL found => continue loop */
110
111L(7):	addl $4, %eax		/* adjust pointer */
112L(6):	addl $4, %eax
113L(5):	addl $4, %eax
114
115L(3):	testb %cl, %cl		/* is first byte NUL? */
116	jz L(2)			/* yes => return */
117	incl %eax		/* increment pointer */
118
119	testb %ch, %ch		/* is second byte NUL? */
120	jz L(2)			/* yes => return */
121	incl %eax		/* increment pointer */
122
123	testl $0xff0000, %ecx	/* is third byte NUL? */
124	jz L(2)			/* yes => return pointer */
125	incl %eax		/* increment pointer */
126
127L(2):	subl STR(%esp), %eax	/* compute difference to string start */
128
129	ret
130END (strlen)
131libc_hidden_builtin_def (strlen)
132