1/* memchr - find a character in a memory zone using base integer registers 2 3 Copyright (C) 2018-2022 Free Software Foundation, Inc. 4 5 This file is part of the GNU C Library. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library. If not, see 19 <https://www.gnu.org/licenses/>. */ 20 21#include <sysdep.h> 22 23/* Assumptions: 24 * 25 * ARMv8-a, AArch64 26 * Use base integer registers. 27 */ 28 29#ifndef MEMCHR 30# define MEMCHR __memchr_nosimd 31#endif 32 33/* Arguments and results. */ 34#define srcin x0 35#define chrin x1 36#define cntin x2 37 38#define result x0 39 40#define repchr x1 41 42#define tmp1 x2 43#define tmp2 x3 44#define tmp3 x4 45#define tmp4 x5 46 47#define src x6 48#define srcend x7 49#define srcend16 x8 50 51#define anymore x9 52 53#define zeroones x10 54 55#define data1 x11 56#define data2 x12 57 58#define has_chr1 x13 59#define has_chr2 x14 60 61#define REP8_01 0x0101010101010101 62#define REP8_7f 0x7f7f7f7f7f7f7f7f 63 64 65ENTRY_ALIGN (MEMCHR, 6) 66 67 PTR_ARG (0) 68 SIZE_ARG (2) 69 70 /* Do not dereference srcin if no bytes to compare. */ 71 cbz cntin, L(none_chr) 72 73 /* Start address is 16-byte aligned or not? */ 74 tst srcin, 15 75 bic src, srcin, 15 76 77 mov zeroones, REP8_01 78 and repchr, chrin, 255 79 /* Generate a qword integer as |c|c|c|c|c|c|c|c|. */ 80 mul repchr, repchr, zeroones 81 82 add srcend, srcin, cntin 83 /* 84 * srcend16 is address of the block following the last block. 85 * 86 * [A block is 16-byte aligned and sized.] 87 */ 88 add srcend16, srcend, 15 89 bic srcend16, srcend16, 15 90 91 b.eq L(loop) 92 93 /* Load the first block containing start address. */ 94 ldp data1, data2, [src], 16 95 96 lsl tmp1, srcin, 3 97 mov tmp2, ~0 98#ifdef __AARCH64EB__ 99 lsr tmp3, tmp2, tmp1 100#else 101 lsl tmp3, tmp2, tmp1 102#endif 103 /* Start address is in the first or the second qword? */ 104 tst srcin, 8 105 106 /* 107 * Transform any byte in the block to zero using XOR operation, 108 * if that byte equals the char to search. In this way, searching 109 * the char becomes detecting zero in the resulting two qwords. 110 */ 111 eor data1, data1, repchr 112 eor data2, data2, repchr 113 114 /* 115 * Set those unused bytes(before start address) to 0xff, so 116 * that they will not hit any zero detection. 117 */ 118 orn tmp1, data1, tmp3 119 orn tmp2, data2, tmp3 120 121 csinv data1, tmp1, xzr, eq 122 csel data2, data2, tmp2, eq 123 124 /* 125 * When the first and last block are the same, there are two cases: 126 * o. Memory range to search is just in one block. 127 * ( start address - end address) < 0 128 * 129 * o. Memory range is so large that end address wrap-around. 130 * ( start address - end address) > 0 131 */ 132 cmp srcin, srcend 133 ccmp src, srcend16, 0, mi 134 csetm anymore, ne 135 b L(find_chr) 136 137 .p2align 4 138L(loop): 139 ldp data1, data2, [src], 16 140 141 subs anymore, src, srcend16 142 143 /* 144 * Transform any byte in the block to zero using XOR operation, 145 * if that byte equals the char to search. 146 */ 147 eor data1, data1, repchr 148 eor data2, data2, repchr 149 150L(find_chr): 151 /* 152 * Use the following integer test to find out if any byte in a 153 * qword is zero. If do not contain zero-valued byte, test result 154 * is zero. 155 * 156 * (qword - 0x0101010101010101) & ~(qword) & 0x8080808080808080 157 * = 158 * (qword - 0x0101010101010101) & ~(qword | 0x7f7f7f7f7f7f7f7f) 159 * 160 */ 161 sub tmp1, data1, zeroones 162 sub tmp2, data2, zeroones 163 164 orr tmp3, data1, REP8_7f 165 orr tmp4, data2, REP8_7f 166 167 bic has_chr1, tmp1, tmp3 168 bic has_chr2, tmp2, tmp4 169 170 orr tmp1, has_chr1, has_chr2 171 ccmp tmp1, 0, 0, ne 172 173 b.eq L(loop) 174 175 cbz has_chr1, 1f 176 sub result, src, 16 177#ifdef __AARCH64EB__ 178 rev data1, data1 179#else 180 rev has_chr1, has_chr1 181#endif 182 b L(done) 183 1841: cbz has_chr2, L(none_chr) 185 sub result, src, 8 186#ifdef __AARCH64EB__ 187 rev data1, data2 188#else 189 rev has_chr1, has_chr2 190#endif 191 192L(done): 193#ifdef __AARCH64EB__ 194 /* 195 * For big-endian, can not directly use has_chr1/has_chr2 because 196 * two qwords has been reversed after loading from memory. 197 * Thus, have to perform char detection on two qwords again, which 198 * should be byte-swapped this time. 199 */ 200 sub tmp1, data1, zeroones 201 orr tmp3, data1, REP8_7f 202 bic has_chr1, tmp1, tmp3 203 rev has_chr1, has_chr1 204#endif 205 206 /* 207 * If the specified char is found in a qword, the corresponding 208 * byte of in has_chr has value of 1, while this is only true for 209 * the first occurrence, not other occurrences. 210 */ 211 cmp anymore, 0 212 clz tmp1, has_chr1 213 add result, result, tmp1, lsr 3 214 ccmp result, srcend, 8, eq /* NZCV = 8000 */ 215 csel result, result, xzr, mi 216 ret 217 218L(none_chr): 219 mov result, 0 220 ret 221 222END (MEMCHR) 223libc_hidden_builtin_def (MEMCHR) 224