1/* memchr - find a character in a memory zone 2 3 Copyright (C) 2015-2022 Free Software Foundation, Inc. 4 5 This file is part of the GNU C Library. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library. If not, see 19 <https://www.gnu.org/licenses/>. */ 20 21#include <sysdep.h> 22 23/* Assumptions: 24 * 25 * ARMv8-a, AArch64, Advanced SIMD. 26 * MTE compatible. 27 */ 28 29#ifndef MEMCHR 30# define MEMCHR __memchr 31#endif 32 33/* Arguments and results. */ 34#define srcin x0 35#define chrin w1 36#define cntin x2 37#define result x0 38 39#define src x3 40#define cntrem x4 41#define synd x5 42#define shift x6 43#define tmp x7 44 45#define vrepchr v0 46#define qdata q1 47#define vdata v1 48#define vhas_chr v2 49#define vend v3 50#define dend d3 51 52/* 53 Core algorithm: 54 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits 55 per byte. We take 4 bits of every comparison byte with shift right and narrow 56 by 4 instruction. Since the bits in the nibble mask reflect the order in 57 which things occur in the original string, counting leading zeros identifies 58 exactly which byte matched. */ 59 60ENTRY (MEMCHR) 61 PTR_ARG (0) 62 SIZE_ARG (2) 63 bic src, srcin, 15 64 cbz cntin, L(nomatch) 65 ld1 {vdata.16b}, [src] 66 dup vrepchr.16b, chrin 67 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 68 lsl shift, srcin, 2 69 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 70 fmov synd, dend 71 lsr synd, synd, shift 72 cbz synd, L(start_loop) 73 74 rbit synd, synd 75 clz synd, synd 76 add result, srcin, synd, lsr 2 77 cmp cntin, synd, lsr 2 78 csel result, result, xzr, hi 79 ret 80 81L(start_loop): 82 sub tmp, src, srcin 83 add tmp, tmp, 16 84 subs cntrem, cntin, tmp 85 b.ls L(nomatch) 86 87 /* Make sure that it won't overread by a 16-byte chunk */ 88 add tmp, cntrem, 15 89 tbnz tmp, 4, L(loop32_2) 90 91 .p2align 4 92L(loop32): 93 ldr qdata, [src, 16]! 94 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 95 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 96 fmov synd, dend 97 cbnz synd, L(end) 98 99L(loop32_2): 100 ldr qdata, [src, 16]! 101 subs cntrem, cntrem, 32 102 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 103 b.ls L(end) 104 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 105 fmov synd, dend 106 cbz synd, L(loop32) 107L(end): 108 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 109 fmov synd, dend 110 add tmp, srcin, cntin 111 sub cntrem, tmp, src 112#ifndef __AARCH64EB__ 113 rbit synd, synd 114#endif 115 clz synd, synd 116 cmp cntrem, synd, lsr 2 117 add result, src, synd, lsr 2 118 csel result, result, xzr, hi 119 ret 120 121L(nomatch): 122 mov result, 0 123 ret 124 125END (MEMCHR) 126weak_alias (MEMCHR, memchr) 127libc_hidden_builtin_def (memchr) 128