1/* wcschr optimized with SSE2. 2 Copyright (C) 2017-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 20#include <isa-level.h> 21 22/* ISA level >= 2 because there is no wcschr-sse4 implementations. */ 23#if ISA_SHOULD_BUILD (2) 24 25# ifndef WCSCHR 26# define WCSCHR __wcschr_sse2 27# endif 28 29# include <sysdep.h> 30 31 .text 32ENTRY (WCSCHR) 33 34 movd %rsi, %xmm1 35 pxor %xmm2, %xmm2 36 mov %rdi, %rcx 37 punpckldq %xmm1, %xmm1 38 punpckldq %xmm1, %xmm1 39 40 and $63, %rcx 41 cmp $48, %rcx 42 ja L(cross_cache) 43 44 movdqu (%rdi), %xmm0 45 pcmpeqd %xmm0, %xmm2 46 add $16, %rdi 47 pcmpeqd %xmm1, %xmm0 48 pmovmskb %xmm2, %rdx 49 pmovmskb %xmm0, %rax 50 or %rax, %rdx 51 jnz L(matches) 52 53 and $-16, %rdi 54 55 movdqa (%rdi), %xmm0 56 pcmpeqd %xmm0, %xmm2 57 add $16, %rdi 58 pcmpeqd %xmm1, %xmm0 59 pmovmskb %xmm2, %rdx 60 pmovmskb %xmm0, %rax 61 or %rax, %rdx 62 jnz L(matches) 63 64 jmp L(loop) 65 66L(cross_cache): 67 and $15, %rcx 68 and $-16, %rdi 69 movdqa (%rdi), %xmm0 70 pcmpeqd %xmm0, %xmm2 71 pcmpeqd %xmm1, %xmm0 72 pmovmskb %xmm2, %rdx 73 pmovmskb %xmm0, %rax 74 75 sar %cl, %rdx 76 sar %cl, %rax 77 test %rax, %rax 78 je L(unaligned_no_match) 79 80 bsf %rax, %rax 81 test %rdx, %rdx 82 je L(unaligned_match) 83 bsf %rdx, %rdx 84 cmp %rdx, %rax 85 ja L(return_null) 86 87L(unaligned_match): 88 add %rdi, %rax 89 add %rcx, %rax 90 ret 91 92 .p2align 4 93L(unaligned_no_match): 94 test %rdx, %rdx 95 jne L(return_null) 96 pxor %xmm2, %xmm2 97 98 add $16, %rdi 99 100 .p2align 4 101/* Loop start on aligned string. */ 102L(loop): 103 movdqa (%rdi), %xmm0 104 pcmpeqd %xmm0, %xmm2 105 add $16, %rdi 106 pcmpeqd %xmm1, %xmm0 107 pmovmskb %xmm2, %rdx 108 pmovmskb %xmm0, %rax 109 or %rax, %rdx 110 jnz L(matches) 111 112 movdqa (%rdi), %xmm0 113 pcmpeqd %xmm0, %xmm2 114 add $16, %rdi 115 pcmpeqd %xmm1, %xmm0 116 pmovmskb %xmm2, %rdx 117 pmovmskb %xmm0, %rax 118 or %rax, %rdx 119 jnz L(matches) 120 121 movdqa (%rdi), %xmm0 122 pcmpeqd %xmm0, %xmm2 123 add $16, %rdi 124 pcmpeqd %xmm1, %xmm0 125 pmovmskb %xmm2, %rdx 126 pmovmskb %xmm0, %rax 127 or %rax, %rdx 128 jnz L(matches) 129 130 movdqa (%rdi), %xmm0 131 pcmpeqd %xmm0, %xmm2 132 add $16, %rdi 133 pcmpeqd %xmm1, %xmm0 134 pmovmskb %xmm2, %rdx 135 pmovmskb %xmm0, %rax 136 or %rax, %rdx 137 jnz L(matches) 138 jmp L(loop) 139 140 .p2align 4 141L(matches): 142 pmovmskb %xmm2, %rdx 143 test %rax, %rax 144 jz L(return_null) 145 bsf %rax, %rax 146 test %rdx, %rdx 147 je L(match) 148 bsf %rdx, %rcx 149 cmp %rcx, %rax 150 ja L(return_null) 151L(match): 152 sub $16, %rdi 153 add %rdi, %rax 154 ret 155 156 .p2align 4 157L(return_null): 158 xor %rax, %rax 159 ret 160 161END (WCSCHR) 162#endif 163