1/* Optimized version of the standard strncpy() function. 2 This file is part of the GNU C Library. 3 Copyright (C) 2000-2022 Free Software Foundation, Inc. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* Return: dest 20 21 Inputs: 22 in0: dest 23 in1: src 24 in2: len 25 26 In this form, it assumes little endian mode. 27 */ 28 29#include <sysdep.h> 30#undef ret 31 32#define saved_lc r15 33#define saved_pr r16 34#define thresh r17 35#define dest r18 36#define dest2 r19 37#define src r20 38#define len r21 39#define asrc r22 40#define tmp r23 41#define pos r24 42#define w0 r25 43#define w1 r26 44#define c r27 45#define sh2 r28 46#define sh1 r29 47#define loopcnt r30 48#define value r31 49 50ENTRY(strncpy) 51 .prologue 52 alloc r2 = ar.pfs, 3, 0, 29, 32 53 54#define MEMLAT 2 55 .rotr r[MEMLAT + 2] 56 .rotp p[MEMLAT + 1] 57 58 mov ret0 = in0 // return value = dest 59 .save pr, saved_pr 60 mov saved_pr = pr // save the predicate registers 61 .save ar.lc, saved_lc 62 mov saved_lc = ar.lc // save the loop counter 63 mov ar.ec = 0 // ec is not guaranteed to 64 // be zero upon function entry 65 .body 66 cmp.geu p6, p5 = 24, in2 67(p6) br.cond.spnt .short_len 68 sub tmp = r0, in0 ;; // tmp = -dest 69 mov len = in2 // len 70 mov dest = in0 // dest 71 mov src = in1 // src 72 and tmp = 7, tmp ;; // loopcnt = -dest % 8 73 cmp.eq p6, p7 = tmp, r0 74 adds loopcnt = -1, tmp // --loopcnt 75(p6) br.cond.sptk .dest_aligned ;; 76 sub len = len, tmp // len -= -dest % 8 77 mov ar.lc = loopcnt 78.l1: // copy -dest % 8 bytes 79(p5) ld1 c = [src], 1 // c = *src++ 80 ;; 81 st1 [dest] = c, 1 // *dest++ = c 82 cmp.ne p5, p7 = c, r0 83 br.cloop.dptk .l1 ;; 84(p7) br.cond.dpnt .found0_align 85 86.dest_aligned: // p7 should be cleared here 87 shr.u c = len, 3 // c = len / 8 88 and sh1 = 7, src // sh1 = src % 8 89 and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src 90 adds c = (MEMLAT-1), c // c = (len / 8) + MEMLAT - 1 91 sub thresh = 8, sh1 92 mov pr.rot = 1 << 16 // set rotating predicates 93 shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) 94 mov ar.lc = c // "infinite" loop 95 sub sh2 = 64, sh1 // sh2 = 64 - sh1 96 cmp.eq p6, p0 = sh1, r0 // is the src aligned? 97(p6) br.cond.sptk .src_aligned 98 adds c = -(MEMLAT-1), c ;; // c = (len / 8) 99 ld8 r[1] = [asrc],8 100 mov ar.lc = c ;; 101 102 .align 32 103.l2: 104(p6) st8 [dest] = value, 8 // store val to dest 105 ld8.s r[0] = [asrc], 8 106 shr.u value = r[1], sh1 ;; // value = w0 >> sh1 107 czx1.r pos = value ;; // do we have an "early" zero 108 cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? 109 adds len = -8, len // len -= 8 110(p7) br.cond.dpnt .nonalign_found0 111 chk.s r[0], .recovery2 // it is safe to do that only 112.back2: // after the previous test 113 shl tmp = r[0], sh2 // tmp = w1 << sh2 114 ;; 115 or value = value, tmp ;; // value |= tmp 116 czx1.r pos = value ;; 117 cmp.ne p7, p6 = 8, pos 118(p7) br.cond.dpnt .nonalign_found0 119 br.ctop.dptk .l2 ;; 120 adds len = 8, len 121 br.cond.sptk .not_found0 ;; 122.nonalign_found0: 123 cmp.gtu p6, p0 = -8, len 124(p6) br.cond.dptk .found0 125 adds len = 8, len 126 br.cond.sptk .not_found0 ;; 127 128 .align 32 129.src_aligned: 130.l3: 131(p[0]) ld8.s r[0] = [src], 8 132(p[MEMLAT]) chk.s r[MEMLAT], .recovery3 133.back3: 134(p[MEMLAT]) mov value = r[MEMLAT] 135(p[MEMLAT]) czx1.r pos = r[MEMLAT] ;; 136(p[MEMLAT]) cmp.ne p7, p0 = 8, pos 137(p[MEMLAT]) adds len = -8, len // len -= 8 138(p7) br.cond.dpnt .found0 139(p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 140 br.ctop.dptk .l3 ;; 141 142 chk.s r[MEMLAT-1], .recovery4 143.back4: 144 mov value = r[MEMLAT-1] 145 146.not_found0: 147 cmp.eq p5, p6 = len, r0 148 adds len = -1, len 149(p5) br.cond.dptk .restore_and_exit ;; 150 mov ar.lc = len 151.l4: 152(p6) extr.u c = value, 0, 8 // c = value & 0xff 153(p6) shr.u value = value, 8 ;; 154 st1 [dest] = c, 1 155 cmp.ne p6, p0 = c, r0 156 br.cloop.dptk .l4 157 br.cond.sptk .restore_and_exit 158 159.found0_align: 160 mov pos = 0 161 adds len = -8, len 162 mov value = 0 ;; 163.found0: 164 shl tmp = pos, 3 165 shr.u loopcnt = len, 4 // loopcnt = len / 16 166 mov c = -1 ;; 167 cmp.eq p6, p0 = loopcnt, r0 168 adds loopcnt = -1, loopcnt 169 shl c = c, tmp ;; 170 and len = 0xf, len 171 andcm value = value, c 172 mov ar.lc = loopcnt ;; 173 cmp.le p7, p0 = 8, len 174 adds dest2 = 16, dest 175 st8 [dest] = value, 8 176 and len = 0x7, len 177(p6) br.cond.dpnt .l6 ;; 178.l5: 179 st8 [dest] = r0, 16 180 st8 [dest2] = r0, 16 181 br.cloop.dptk .l5 ;; 182.l6: 183(p7) st8 [dest] = r0, 8 184 cmp.eq p5, p0 = len, r0 185 adds len = -1, len 186(p5) br.cond.dptk .restore_and_exit ;; 187 mov ar.lc = len ;; 188.l7: 189 st1 [dest] = r0, 1 190 br.cloop.dptk .l7 ;; 191.restore_and_exit: 192 mov ar.lc = saved_lc // restore the loop counter 193 mov pr = saved_pr, -1 // restore the predicate registers 194 br.ret.sptk.many b0 195 196.short_len: 197 cmp.eq p5, p0 = in2, r0 198 adds loopcnt = -1, in2 199(p5) br.cond.spnt .restore_and_exit ;; 200 mov ar.lc = loopcnt // p6 should be set when we get here 201.l8: 202(p6) ld1 c = [in1], 1 // c = *src++ 203 ;; 204 st1 [in0] = c, 1 // *dest++ = c 205(p6) cmp.ne p6, p0 = c, r0 206 br.cloop.dptk .l8 207 ;; 208 mov ar.lc = saved_lc // restore the loop counter 209 mov pr = saved_pr, -1 // restore the predicate registers 210 br.ret.sptk.many b0 211.recovery2: 212 add c = 8, len 213 add tmp = -8, asrc ;; 214 cmp.gtu p8, p5 = c, thresh ;; 215(p8) ld8 r[0] = [tmp] 216(p5) mov r[0] = r0 217 br.cond.sptk .back2 218.recovery3: 219 add tmp = -(MEMLAT + 1) * 8, src ;; 220 ld8 r[MEMLAT] = [tmp] 221 br.cond.sptk .back3 222.recovery4: 223 cmp.eq p5, p6 = len, r0 224 add tmp = -MEMLAT * 8, src ;; 225(p6) ld8 r[MEMLAT - 1] = [tmp] 226(p5) mov r[MEMLAT - 1] = r0 227 br.cond.sptk .back4 228END(strncpy) 229libc_hidden_builtin_def (strncpy) 230