1/* Placeholder function, not used by any processor at the moment. 2 Copyright (C) 2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* UNUSED. Exists purely as reference implementation. */ 20 21#include <isa-level.h> 22 23#if ISA_SHOULD_BUILD (4) 24 25# include <sysdep.h> 26 27# ifdef USE_AS_WCSLEN 28# define VPCMP vpcmpd 29# define VPTESTN vptestnmd 30# define VPMINU vpminud 31# define CHAR_SIZE 4 32# else 33# define VPCMP vpcmpb 34# define VPTESTN vptestnmb 35# define VPMINU vpminub 36# define CHAR_SIZE 1 37# endif 38 39# define XMM0 xmm16 40# define PAGE_SIZE 4096 41# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) 42 43# if VEC_SIZE == 64 44# define KMOV kmovq 45# define KORTEST kortestq 46# define RAX rax 47# define RCX rcx 48# define RDX rdx 49# define SHR shrq 50# define TEXTSUFFIX evex512 51# define VMM0 zmm16 52# define VMM1 zmm17 53# define VMM2 zmm18 54# define VMM3 zmm19 55# define VMM4 zmm20 56# define VMOVA vmovdqa64 57# elif VEC_SIZE == 32 58/* Currently Unused. */ 59# define KMOV kmovd 60# define KORTEST kortestd 61# define RAX eax 62# define RCX ecx 63# define RDX edx 64# define SHR shrl 65# define TEXTSUFFIX evex256 66# define VMM0 ymm16 67# define VMM1 ymm17 68# define VMM2 ymm18 69# define VMM3 ymm19 70# define VMM4 ymm20 71# define VMOVA vmovdqa32 72# endif 73 74 .section .text.TEXTSUFFIX, "ax", @progbits 75/* Aligning entry point to 64 byte, provides better performance for 76 one vector length string. */ 77ENTRY_P2ALIGN (STRLEN, 6) 78# ifdef USE_AS_STRNLEN 79 /* Check zero length. */ 80 test %RSI_LP, %RSI_LP 81 jz L(ret_max) 82# ifdef __ILP32__ 83 /* Clear the upper 32 bits. */ 84 movl %esi, %esi 85# endif 86# endif 87 88 movl %edi, %eax 89 vpxorq %XMM0, %XMM0, %XMM0 90 andl $(PAGE_SIZE - 1), %eax 91 cmpl $(PAGE_SIZE - VEC_SIZE), %eax 92 ja L(page_cross) 93 94 /* Compare [w]char for null, mask bit will be set for match. */ 95 VPCMP $0, (%rdi), %VMM0, %k0 96 KMOV %k0, %RAX 97 test %RAX, %RAX 98 jz L(align_more) 99 100 bsf %RAX, %RAX 101# ifdef USE_AS_STRNLEN 102 cmpq %rsi, %rax 103 cmovnb %rsi, %rax 104# endif 105 ret 106 107 /* At this point vector max length reached. */ 108# ifdef USE_AS_STRNLEN 109 .p2align 4,,3 110L(ret_max): 111 movq %rsi, %rax 112 ret 113# endif 114 115L(align_more): 116 leaq VEC_SIZE(%rdi), %rax 117 /* Align rax to VEC_SIZE. */ 118 andq $-VEC_SIZE, %rax 119# ifdef USE_AS_STRNLEN 120 movq %rax, %rdx 121 subq %rdi, %rdx 122# ifdef USE_AS_WCSLEN 123 SHR $2, %RDX 124# endif 125 /* At this point rdx contains [w]chars already compared. */ 126 subq %rsi, %rdx 127 jae L(ret_max) 128 negq %rdx 129 /* At this point rdx contains number of w[char] needs to go. 130 Now onwards rdx will keep decrementing with each compare. */ 131# endif 132 133 /* Loop unroll 4 times for 4 vector loop. */ 134 VPCMP $0, (%rax), %VMM0, %k0 135 KMOV %k0, %RCX 136 test %RCX, %RCX 137 jnz L(ret_vec_x1) 138 139# ifdef USE_AS_STRNLEN 140 subq $CHAR_PER_VEC, %rdx 141 jbe L(ret_max) 142# endif 143 144 VPCMP $0, VEC_SIZE(%rax), %VMM0, %k0 145 KMOV %k0, %RCX 146 test %RCX, %RCX 147 jnz L(ret_vec_x2) 148 149# ifdef USE_AS_STRNLEN 150 subq $CHAR_PER_VEC, %rdx 151 jbe L(ret_max) 152# endif 153 154 VPCMP $0, (VEC_SIZE * 2)(%rax), %VMM0, %k0 155 KMOV %k0, %RCX 156 test %RCX, %RCX 157 jnz L(ret_vec_x3) 158 159# ifdef USE_AS_STRNLEN 160 subq $CHAR_PER_VEC, %rdx 161 jbe L(ret_max) 162# endif 163 164 VPCMP $0, (VEC_SIZE * 3)(%rax), %VMM0, %k0 165 KMOV %k0, %RCX 166 test %RCX, %RCX 167 jnz L(ret_vec_x4) 168 169# ifdef USE_AS_STRNLEN 170 subq $CHAR_PER_VEC, %rdx 171 jbe L(ret_max) 172 /* Save pointer before 4 x VEC_SIZE alignment. */ 173 movq %rax, %rcx 174# endif 175 176 /* Align address to VEC_SIZE * 4 for loop. */ 177 andq $-(VEC_SIZE * 4), %rax 178 179# ifdef USE_AS_STRNLEN 180 subq %rax, %rcx 181# ifdef USE_AS_WCSLEN 182 SHR $2, %RCX 183# endif 184 /* rcx contains number of [w]char will be recompared due to 185 alignment fixes. rdx must be incremented by rcx to offset 186 alignment adjustment. */ 187 addq %rcx, %rdx 188 /* Need jump as we don't want to add/subtract rdx for first 189 iteration of 4 x VEC_SIZE aligned loop. */ 190 jmp L(loop_entry) 191# endif 192 193 .p2align 4,,11 194L(loop): 195# ifdef USE_AS_STRNLEN 196 subq $(CHAR_PER_VEC * 4), %rdx 197 jbe L(ret_max) 198L(loop_entry): 199# endif 200 /* VPMINU and VPCMP combination provide better performance as 201 compared to alternative combinations. */ 202 VMOVA (VEC_SIZE * 4)(%rax), %VMM1 203 VPMINU (VEC_SIZE * 5)(%rax), %VMM1, %VMM2 204 VMOVA (VEC_SIZE * 6)(%rax), %VMM3 205 VPMINU (VEC_SIZE * 7)(%rax), %VMM3, %VMM4 206 207 VPTESTN %VMM2, %VMM2, %k0 208 VPTESTN %VMM4, %VMM4, %k1 209 210 subq $-(VEC_SIZE * 4), %rax 211 KORTEST %k0, %k1 212 jz L(loop) 213 214 VPTESTN %VMM1, %VMM1, %k2 215 KMOV %k2, %RCX 216 test %RCX, %RCX 217 jnz L(ret_vec_x1) 218 219 KMOV %k0, %RCX 220 /* At this point, if k0 is non zero, null char must be in the 221 second vector. */ 222 test %RCX, %RCX 223 jnz L(ret_vec_x2) 224 225 VPTESTN %VMM3, %VMM3, %k3 226 KMOV %k3, %RCX 227 test %RCX, %RCX 228 jnz L(ret_vec_x3) 229 /* At this point null [w]char must be in the fourth vector so no 230 need to check. */ 231 KMOV %k1, %RCX 232 233 /* Fourth, third, second vector terminating are pretty much 234 same, implemented this way to avoid branching and reuse code 235 from pre loop exit condition. */ 236L(ret_vec_x4): 237 bsf %RCX, %RCX 238 subq %rdi, %rax 239# ifdef USE_AS_WCSLEN 240 subq $-(VEC_SIZE * 3), %rax 241 shrq $2, %rax 242 addq %rcx, %rax 243# else 244 leaq (VEC_SIZE * 3)(%rcx, %rax), %rax 245# endif 246# ifdef USE_AS_STRNLEN 247 cmpq %rsi, %rax 248 cmovnb %rsi, %rax 249# endif 250 ret 251 252L(ret_vec_x3): 253 bsf %RCX, %RCX 254 subq %rdi, %rax 255# ifdef USE_AS_WCSLEN 256 subq $-(VEC_SIZE * 2), %rax 257 shrq $2, %rax 258 addq %rcx, %rax 259# else 260 leaq (VEC_SIZE * 2)(%rcx, %rax), %rax 261# endif 262# ifdef USE_AS_STRNLEN 263 cmpq %rsi, %rax 264 cmovnb %rsi, %rax 265# endif 266 ret 267 268L(ret_vec_x2): 269 subq $-VEC_SIZE, %rax 270L(ret_vec_x1): 271 bsf %RCX, %RCX 272 subq %rdi, %rax 273# ifdef USE_AS_WCSLEN 274 shrq $2, %rax 275# endif 276 addq %rcx, %rax 277# ifdef USE_AS_STRNLEN 278 cmpq %rsi, %rax 279 cmovnb %rsi, %rax 280# endif 281 ret 282 283L(page_cross): 284 movl %eax, %ecx 285# ifdef USE_AS_WCSLEN 286 andl $(VEC_SIZE - 1), %ecx 287 sarl $2, %ecx 288# endif 289 /* ecx contains number of w[char] to be skipped as a result 290 of address alignment. */ 291 xorq %rdi, %rax 292 VPCMP $0, (PAGE_SIZE - VEC_SIZE)(%rax), %VMM0, %k0 293 KMOV %k0, %RAX 294 /* Ignore number of character for alignment adjustment. */ 295 SHR %cl, %RAX 296 jz L(align_more) 297 298 bsf %RAX, %RAX 299# ifdef USE_AS_STRNLEN 300 cmpq %rsi, %rax 301 cmovnb %rsi, %rax 302# endif 303 ret 304 305END (STRLEN) 306#endif 307