1/* strlen -- Compute length of NUL terminated string. 2 Highly optimized version for ix86, x>=5. 3 Copyright (C) 1995-2022 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include "asm-syntax.h" 22 23/* This version is especially optimized for the i586 (and following?) 24 processors. This is mainly done by using the two pipelines. The 25 version optimized for i486 is weak in this aspect because to get 26 as much parallelism we have to execute some *more* instructions. 27 28 The code below is structured to reflect the pairing of the instructions 29 as *I think* it is. I have no processor data book to verify this. 30 If you find something you think is incorrect let me know. */ 31 32 33/* The magic value which is used throughout in the whole code. */ 34#define magic 0xfefefeff 35 36#define PARMS 4 /* no space for saved regs */ 37#define STR PARMS 38 39 .text 40ENTRY (strlen) 41 42 movl STR(%esp), %eax 43 movl $3, %edx /* load mask (= 3) */ 44 45 andl %eax, %edx /* separate last two bits of address */ 46 47 jz L(1) /* aligned => start loop */ 48 jp L(0) /* exactly two bits set */ 49 50 cmpb %dh, (%eax) /* is byte NUL? */ 51 je L(2) /* yes => return */ 52 53 incl %eax /* increment pointer */ 54 cmpb %dh, (%eax) /* is byte NUL? */ 55 56 je L(2) /* yes => return */ 57 58 incl %eax /* increment pointer */ 59 xorl $2, %edx 60 61 jz L(1) 62 63L(0): cmpb %dh, (%eax) /* is byte NUL? */ 64 je L(2) /* yes => return */ 65 66 incl %eax /* increment pointer */ 67 xorl %edx, %edx /* We need %edx == 0 for later */ 68 69 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to 70 change any of the hole bits of LONGWORD. 71 72 1) Is this safe? Will it catch all the zero bytes? 73 Suppose there is a byte with all zeros. Any carry bits 74 propagating from its left will fall into the hole at its 75 least significant bit and stop. Since there will be no 76 carry from its most significant bit, the LSB of the 77 byte to the left will be unchanged, and the zero will be 78 detected. 79 80 2) Is this worthwhile? Will it ignore everything except 81 zero bytes? Suppose every byte of LONGWORD has a bit set 82 somewhere. There will be a carry into bit 8. If bit 8 83 is set, this will carry into bit 16. If bit 8 is clear, 84 one of bits 9-15 must be set, so there will be a carry 85 into bit 16. Similarly, there will be a carry into bit 86 24. If one of bits 24-31 is set, there will be a carry 87 into bit 32 (=carry flag), so all of the hole bits will 88 be changed. 89 90 Note: %edx == 0 in any case here. */ 91 92L(1): 93 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 94 addl $4, %eax /* adjust pointer for *next* word */ 95 96 subl %ecx, %edx /* first step to negate word */ 97 addl $magic, %ecx /* add magic word */ 98 99 decl %edx /* complete negation of word */ 100 jnc L(3) /* previous addl caused overflow? */ 101 102 xorl %ecx, %edx /* (word+magic)^word */ 103 104 andl $~magic, %edx /* any of the carry flags set? */ 105 106 jne L(3) /* yes => determine byte */ 107 108 109 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 110 addl $4, %eax /* adjust pointer for *next* word */ 111 112 subl %ecx, %edx /* first step to negate word */ 113 addl $magic, %ecx /* add magic word */ 114 115 decl %edx /* complete negation of word */ 116 jnc L(3) /* previous addl caused overflow? */ 117 118 xorl %ecx, %edx /* (word+magic)^word */ 119 120 andl $~magic, %edx /* any of the carry flags set? */ 121 122 jne L(3) /* yes => determine byte */ 123 124 125 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 126 addl $4, %eax /* adjust pointer for *next* word */ 127 128 subl %ecx, %edx /* first step to negate word */ 129 addl $magic, %ecx /* add magic word */ 130 131 decl %edx /* complete negation of word */ 132 jnc L(3) /* previous addl caused overflow? */ 133 134 xorl %ecx, %edx /* (word+magic)^word */ 135 136 andl $~magic, %edx /* any of the carry flags set? */ 137 138 jne L(3) /* yes => determine byte */ 139 140 141 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 142 addl $4, %eax /* adjust pointer for *next* word */ 143 144 subl %ecx, %edx /* first step to negate word */ 145 addl $magic, %ecx /* add magic word */ 146 147 decl %edx /* complete negation of word */ 148 jnc L(3) /* previous addl caused overflow? */ 149 150 xorl %ecx, %edx /* (word+magic)^word */ 151 152 andl $~magic, %edx /* any of the carry flags set? */ 153 154 je L(1) /* no => start loop again */ 155 156 157L(3): subl $4, %eax /* correct too early pointer increment */ 158 subl $magic, %ecx 159 160 cmpb $0, %cl /* lowest byte NUL? */ 161 jz L(2) /* yes => return */ 162 163 inc %eax /* increment pointer */ 164 testb %ch, %ch /* second byte NUL? */ 165 166 jz L(2) /* yes => return */ 167 168 shrl $16, %ecx /* make upper bytes accessible */ 169 incl %eax /* increment pointer */ 170 171 cmpb $0, %cl /* is third byte NUL? */ 172 jz L(2) /* yes => return */ 173 174 incl %eax /* increment pointer */ 175 176L(2): subl STR(%esp), %eax /* now compute the length as difference 177 between start and terminating NUL 178 character */ 179 ret 180END (strlen) 181libc_hidden_builtin_def (strlen) 182