1/* strlen(str) -- determine the length of the string STR. 2 Optimized for Intel 80x86, x>=4. 3 Copyright (C) 1991-2022 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include "asm-syntax.h" 22 23#define PARMS 4 /* no space for saved regs */ 24#define STR PARMS 25 26 .text 27ENTRY (strlen) 28 29 movl STR(%esp), %ecx 30 movl %ecx, %eax /* duplicate it */ 31 32 andl $3, %ecx /* mask alignment bits */ 33 jz L(1) /* aligned => start loop */ 34 cmpb %ch, (%eax) /* is byte NUL? */ 35 je L(2) /* yes => return */ 36 incl %eax /* increment pointer */ 37 38 xorl $3, %ecx /* was alignment = 3? */ 39 jz L(1) /* yes => now it is aligned and start loop */ 40 cmpb %ch, (%eax) /* is byte NUL? */ 41 je L(2) /* yes => return */ 42 addl $1, %eax /* increment pointer */ 43 44 subl $1, %ecx /* was alignment = 2? */ 45 jz L(1) /* yes => now it is aligned and start loop */ 46 cmpb %ch, (%eax) /* is byte NUL? */ 47 je L(2) /* yes => return */ 48 49/* Don't change the above `addl $1,%eax' and `subl $1, %ecx' into `incl %eax' 50 and `decl %ecx' resp. The additional two byte per instruction make the 51 label 4 to be aligned on a 16 byte boundary with nops. 52 53 The following `sub $15, %eax' is part of this trick, too. Together with 54 the next instruction (`addl $16, %eax') it is in fact a `incl %eax', just 55 as expected from the algorithm. But doing so has the advantage that 56 no jump to label 1 is necessary and so the pipeline is not flushed. */ 57 58 subl $15, %eax /* effectively +1 */ 59 60 61L(4): addl $16, %eax /* adjust pointer for full loop */ 62 63L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ 64 movl $0xfefefeff, %edx /* magic value */ 65 addl %ecx, %edx /* add the magic value to the word. We get 66 carry bits reported for each byte which 67 is *not* 0 */ 68 jnc L(3) /* highest byte is NUL => return pointer */ 69 xorl %ecx, %edx /* (word+magic)^word */ 70 orl $0xfefefeff, %edx /* set all non-carry bits */ 71 incl %edx /* add 1: if one carry bit was *not* set 72 the addition will not result in 0. */ 73 jnz L(3) /* found NUL => return pointer */ 74 75 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ 76 movl $0xfefefeff, %edx /* magic value */ 77 addl %ecx, %edx /* add the magic value to the word. We get 78 carry bits reported for each byte which 79 is *not* 0 */ 80 jnc L(5) /* highest byte is NUL => return pointer */ 81 xorl %ecx, %edx /* (word+magic)^word */ 82 orl $0xfefefeff, %edx /* set all non-carry bits */ 83 incl %edx /* add 1: if one carry bit was *not* set 84 the addition will not result in 0. */ 85 jnz L(5) /* found NUL => return pointer */ 86 87 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ 88 movl $0xfefefeff, %edx /* magic value */ 89 addl %ecx, %edx /* add the magic value to the word. We get 90 carry bits reported for each byte which 91 is *not* 0 */ 92 jnc L(6) /* highest byte is NUL => return pointer */ 93 xorl %ecx, %edx /* (word+magic)^word */ 94 orl $0xfefefeff, %edx /* set all non-carry bits */ 95 incl %edx /* add 1: if one carry bit was *not* set 96 the addition will not result in 0. */ 97 jnz L(6) /* found NUL => return pointer */ 98 99 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ 100 movl $0xfefefeff, %edx /* magic value */ 101 addl %ecx, %edx /* add the magic value to the word. We get 102 carry bits reported for each byte which 103 is *not* 0 */ 104 jnc L(7) /* highest byte is NUL => return pointer */ 105 xorl %ecx, %edx /* (word+magic)^word */ 106 orl $0xfefefeff, %edx /* set all non-carry bits */ 107 incl %edx /* add 1: if one carry bit was *not* set 108 the addition will not result in 0. */ 109 jz L(4) /* no NUL found => continue loop */ 110 111L(7): addl $4, %eax /* adjust pointer */ 112L(6): addl $4, %eax 113L(5): addl $4, %eax 114 115L(3): testb %cl, %cl /* is first byte NUL? */ 116 jz L(2) /* yes => return */ 117 incl %eax /* increment pointer */ 118 119 testb %ch, %ch /* is second byte NUL? */ 120 jz L(2) /* yes => return */ 121 incl %eax /* increment pointer */ 122 123 testl $0xff0000, %ecx /* is third byte NUL? */ 124 jz L(2) /* yes => return pointer */ 125 incl %eax /* increment pointer */ 126 127L(2): subl STR(%esp), %eax /* compute difference to string start */ 128 129 ret 130END (strlen) 131libc_hidden_builtin_def (strlen) 132