1/* wcslen with SSE2 2 Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20# include <sysdep.h> 21# define STR 4 22 23 .text 24ENTRY (__wcslen_sse2) 25 mov STR(%esp), %edx 26 27 cmp $0, (%edx) 28 jz L(exit_tail0) 29 cmp $0, 4(%edx) 30 jz L(exit_tail1) 31 cmp $0, 8(%edx) 32 jz L(exit_tail2) 33 cmp $0, 12(%edx) 34 jz L(exit_tail3) 35 cmp $0, 16(%edx) 36 jz L(exit_tail4) 37 cmp $0, 20(%edx) 38 jz L(exit_tail5) 39 cmp $0, 24(%edx) 40 jz L(exit_tail6) 41 cmp $0, 28(%edx) 42 jz L(exit_tail7) 43 44 pxor %xmm0, %xmm0 45 46 lea 32(%edx), %eax 47 lea 16(%edx), %ecx 48 and $-16, %eax 49 50 pcmpeqd (%eax), %xmm0 51 pmovmskb %xmm0, %edx 52 pxor %xmm1, %xmm1 53 test %edx, %edx 54 lea 16(%eax), %eax 55 jnz L(exit) 56 57 pcmpeqd (%eax), %xmm1 58 pmovmskb %xmm1, %edx 59 pxor %xmm2, %xmm2 60 test %edx, %edx 61 lea 16(%eax), %eax 62 jnz L(exit) 63 64 pcmpeqd (%eax), %xmm2 65 pmovmskb %xmm2, %edx 66 pxor %xmm3, %xmm3 67 test %edx, %edx 68 lea 16(%eax), %eax 69 jnz L(exit) 70 71 pcmpeqd (%eax), %xmm3 72 pmovmskb %xmm3, %edx 73 test %edx, %edx 74 lea 16(%eax), %eax 75 jnz L(exit) 76 77 and $-0x40, %eax 78 79 .p2align 4 80L(aligned_64_loop): 81 movaps (%eax), %xmm0 82 movaps 16(%eax), %xmm1 83 movaps 32(%eax), %xmm2 84 movaps 48(%eax), %xmm6 85 86 pminub %xmm1, %xmm0 87 pminub %xmm6, %xmm2 88 pminub %xmm0, %xmm2 89 pcmpeqd %xmm3, %xmm2 90 pmovmskb %xmm2, %edx 91 test %edx, %edx 92 lea 64(%eax), %eax 93 jz L(aligned_64_loop) 94 95 pcmpeqd -64(%eax), %xmm3 96 pmovmskb %xmm3, %edx 97 test %edx, %edx 98 lea 48(%ecx), %ecx 99 jnz L(exit) 100 101 pcmpeqd %xmm1, %xmm3 102 pmovmskb %xmm3, %edx 103 test %edx, %edx 104 lea -16(%ecx), %ecx 105 jnz L(exit) 106 107 pcmpeqd -32(%eax), %xmm3 108 pmovmskb %xmm3, %edx 109 test %edx, %edx 110 lea -16(%ecx), %ecx 111 jnz L(exit) 112 113 pcmpeqd %xmm6, %xmm3 114 pmovmskb %xmm3, %edx 115 test %edx, %edx 116 lea -16(%ecx), %ecx 117 jnz L(exit) 118 119 jmp L(aligned_64_loop) 120 121 .p2align 4 122L(exit): 123 sub %ecx, %eax 124 shr $2, %eax 125 test %dl, %dl 126 jz L(exit_high) 127 128 mov %dl, %cl 129 and $15, %cl 130 jz L(exit_1) 131 ret 132 133 .p2align 4 134L(exit_high): 135 mov %dh, %ch 136 and $15, %ch 137 jz L(exit_3) 138 add $2, %eax 139 ret 140 141 .p2align 4 142L(exit_1): 143 add $1, %eax 144 ret 145 146 .p2align 4 147L(exit_3): 148 add $3, %eax 149 ret 150 151 .p2align 4 152L(exit_tail0): 153 xor %eax, %eax 154 ret 155 156 .p2align 4 157L(exit_tail1): 158 mov $1, %eax 159 ret 160 161 .p2align 4 162L(exit_tail2): 163 mov $2, %eax 164 ret 165 166 .p2align 4 167L(exit_tail3): 168 mov $3, %eax 169 ret 170 171 .p2align 4 172L(exit_tail4): 173 mov $4, %eax 174 ret 175 176 .p2align 4 177L(exit_tail5): 178 mov $5, %eax 179 ret 180 181 .p2align 4 182L(exit_tail6): 183 mov $6, %eax 184 ret 185 186 .p2align 4 187L(exit_tail7): 188 mov $7, %eax 189 ret 190 191END (__wcslen_sse2) 192#endif 193