1/* strcpy/stpcpy - copy a string returning pointer to start/end. 2 Copyright (C) 2013-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* To build as stpcpy, define BUILD_STPCPY before compiling this file. 20 21 To test the page crossing code path more thoroughly, compile with 22 -DSTRCPY_TEST_PAGE_CROSS - this will force all unaligned copies through 23 the slower entry path. This option is not intended for production use. */ 24 25#include <sysdep.h> 26 27/* Assumptions: 28 * 29 * ARMv8-a, AArch64, Advanced SIMD. 30 * MTE compatible. 31 */ 32 33/* Arguments and results. */ 34#define dstin x0 35#define srcin x1 36#define result x0 37 38#define src x2 39#define dst x3 40#define len x4 41#define synd x4 42#define tmp x5 43#define shift x5 44#define data1 x6 45#define dataw1 w6 46#define data2 x7 47#define dataw2 w7 48 49#define dataq q0 50#define vdata v0 51#define vhas_nul v1 52#define vend v2 53#define dend d2 54#define dataq2 q1 55 56#ifdef BUILD_STPCPY 57# define STRCPY __stpcpy 58# define IFSTPCPY(X,...) X,__VA_ARGS__ 59#else 60# define STRCPY strcpy 61# define IFSTPCPY(X,...) 62#endif 63 64/* 65 Core algorithm: 66 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits 67 per byte. We take 4 bits of every comparison byte with shift right and narrow 68 by 4 instruction. Since the bits in the nibble mask reflect the order in 69 which things occur in the original string, counting leading zeros identifies 70 exactly which byte matched. */ 71 72ENTRY (STRCPY) 73 PTR_ARG (0) 74 PTR_ARG (1) 75 bic src, srcin, 15 76 ld1 {vdata.16b}, [src] 77 cmeq vhas_nul.16b, vdata.16b, 0 78 lsl shift, srcin, 2 79 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 80 fmov synd, dend 81 lsr synd, synd, shift 82 cbnz synd, L(tail) 83 84 ldr dataq, [src, 16]! 85 cmeq vhas_nul.16b, vdata.16b, 0 86 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 87 fmov synd, dend 88 cbz synd, L(start_loop) 89 90#ifndef __AARCH64EB__ 91 rbit synd, synd 92#endif 93 sub tmp, src, srcin 94 clz len, synd 95 add len, tmp, len, lsr 2 96 tbz len, 4, L(less16) 97 sub tmp, len, 15 98 ldr dataq, [srcin] 99 ldr dataq2, [srcin, tmp] 100 str dataq, [dstin] 101 str dataq2, [dstin, tmp] 102 IFSTPCPY (add result, dstin, len) 103 ret 104 105 .p2align 4,,8 106L(tail): 107 rbit synd, synd 108 clz len, synd 109 lsr len, len, 2 110 111 .p2align 4 112L(less16): 113 tbz len, 3, L(less8) 114 sub tmp, len, 7 115 ldr data1, [srcin] 116 ldr data2, [srcin, tmp] 117 str data1, [dstin] 118 str data2, [dstin, tmp] 119 IFSTPCPY (add result, dstin, len) 120 ret 121 122 .p2align 4 123L(less8): 124 subs tmp, len, 3 125 b.lo L(less4) 126 ldr dataw1, [srcin] 127 ldr dataw2, [srcin, tmp] 128 str dataw1, [dstin] 129 str dataw2, [dstin, tmp] 130 IFSTPCPY (add result, dstin, len) 131 ret 132 133L(less4): 134 cbz len, L(zerobyte) 135 ldrh dataw1, [srcin] 136 strh dataw1, [dstin] 137L(zerobyte): 138 strb wzr, [dstin, len] 139 IFSTPCPY (add result, dstin, len) 140 ret 141 142 .p2align 4 143L(start_loop): 144 sub len, src, srcin 145 ldr dataq2, [srcin] 146 add dst, dstin, len 147 str dataq2, [dstin] 148 149 .p2align 5 150L(loop): 151 str dataq, [dst], 16 152 ldr dataq, [src, 16]! 153 cmeq vhas_nul.16b, vdata.16b, 0 154 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 155 fmov synd, dend 156 cbz synd, L(loop) 157 158 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 159 fmov synd, dend 160#ifndef __AARCH64EB__ 161 rbit synd, synd 162#endif 163 clz len, synd 164 lsr len, len, 2 165 sub tmp, len, 15 166 ldr dataq, [src, tmp] 167 str dataq, [dst, tmp] 168 IFSTPCPY (add result, dst, len) 169 ret 170 171END (STRCPY) 172 173#ifdef BUILD_STPCPY 174weak_alias (__stpcpy, stpcpy) 175libc_hidden_def (__stpcpy) 176libc_hidden_builtin_def (stpcpy) 177#else 178libc_hidden_builtin_def (strcpy) 179#endif 180