1/* strnlen - calculate the length of a string with limit. 2 3 Copyright (C) 2013-2022 Free Software Foundation, Inc. 4 5 This file is part of the GNU C Library. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library. If not, see 19 <https://www.gnu.org/licenses/>. */ 20 21#include <sysdep.h> 22 23/* Assumptions: 24 * 25 * ARMv8-a, AArch64, Advanced SIMD. 26 * MTE compatible. 27 */ 28 29#define srcin x0 30#define cntin x1 31#define result x0 32 33#define src x2 34#define synd x3 35#define shift x4 36#define tmp x4 37#define cntrem x5 38 39#define qdata q0 40#define vdata v0 41#define vhas_chr v1 42#define vend v2 43#define dend d2 44 45/* 46 Core algorithm: 47 48 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits 49 per byte. We take 4 bits of every comparison byte with shift right and narrow 50 by 4 instruction. Since the bits in the nibble mask reflect the order in 51 which things occur in the original string, counting trailing zeros identifies 52 exactly which byte matched. */ 53 54ENTRY (__strnlen) 55 PTR_ARG (0) 56 SIZE_ARG (1) 57 bic src, srcin, 15 58 cbz cntin, L(nomatch) 59 ld1 {vdata.16b}, [src], 16 60 cmeq vhas_chr.16b, vdata.16b, 0 61 lsl shift, srcin, 2 62 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 63 fmov synd, dend 64 lsr synd, synd, shift 65 cbz synd, L(start_loop) 66L(finish): 67 rbit synd, synd 68 clz synd, synd 69 lsr result, synd, 2 70 cmp cntin, result 71 csel result, cntin, result, ls 72 ret 73 74L(start_loop): 75 sub tmp, src, srcin 76 subs cntrem, cntin, tmp 77 b.ls L(nomatch) 78 79 /* Make sure that it won't overread by a 16-byte chunk */ 80 add tmp, cntrem, 15 81 tbnz tmp, 4, L(loop32_2) 82 83 .p2align 5 84L(loop32): 85 ldr qdata, [src], 16 86 cmeq vhas_chr.16b, vdata.16b, 0 87 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 88 fmov synd, dend 89 cbnz synd, L(end) 90L(loop32_2): 91 ldr qdata, [src], 16 92 subs cntrem, cntrem, 32 93 cmeq vhas_chr.16b, vdata.16b, 0 94 b.ls L(end) 95 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 96 fmov synd, dend 97 cbz synd, L(loop32) 98 99L(end): 100 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 101 sub src, src, 16 102 mov synd, vend.d[0] 103 sub result, src, srcin 104#ifndef __AARCH64EB__ 105 rbit synd, synd 106#endif 107 clz synd, synd 108 add result, result, synd, lsr 2 109 cmp cntin, result 110 csel result, cntin, result, ls 111 ret 112 113L(nomatch): 114 mov result, cntin 115 ret 116 117END (__strnlen) 118libc_hidden_def (__strnlen) 119weak_alias (__strnlen, strnlen) 120libc_hidden_def (strnlen) 121