1/* Optimized version of the standard strcpy() function. 2 This file is part of the GNU C Library. 3 Copyright (C) 2000-2022 Free Software Foundation, Inc. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* Return: dest 20 21 Inputs: 22 in0: dest 23 in1: src 24 25 In this form, it assumes little endian mode. For big endian mode, 26 the two shifts in .l2 must be inverted: 27 28 shl value = r[1], sh1 // value = w0 << sh1 29 shr.u tmp = r[0], sh2 // tmp = w1 >> sh2 30 */ 31 32#include <sysdep.h> 33#undef ret 34 35#define saved_lc r15 36#define saved_pr r16 37#define thresh r17 38#define dest r19 39#define src r20 40#define len r21 41#define asrc r22 42#define tmp r23 43#define pos r24 44#define w0 r25 45#define w1 r26 46#define c r27 47#define sh2 r28 48#define sh1 r29 49#define loopcnt r30 50#define value r31 51 52ENTRY(strcpy) 53 .prologue 54 alloc r2 = ar.pfs, 2, 0, 30, 32 55 56#define MEMLAT 2 57 .rotr r[MEMLAT + 2] 58 .rotp p[MEMLAT + 1] 59 60 mov ret0 = in0 // return value = dest 61 .save pr, saved_pr 62 mov saved_pr = pr // save the predicate registers 63 .save ar.lc, saved_lc 64 mov saved_lc = ar.lc // save the loop counter 65 .body 66 sub tmp = r0, in0 ;; // tmp = -dest 67 mov dest = in0 // dest 68 mov src = in1 // src 69 and loopcnt = 7, tmp ;; // loopcnt = -dest % 8 70 cmp.eq p6, p0 = loopcnt, r0 71 adds loopcnt = -1, loopcnt // --loopcnt 72(p6) br.cond.sptk .dest_aligned ;; 73 mov ar.lc = loopcnt 74.l1: // copy -dest % 8 bytes 75 ld1 c = [src], 1 // c = *src++ 76 ;; 77 st1 [dest] = c, 1 // *dest++ = c 78 cmp.eq p6, p0 = c, r0 79(p6) br.cond.dpnt .restore_and_exit 80 br.cloop.dptk .l1 ;; 81.dest_aligned: 82 and sh1 = 7, src // sh1 = src % 8 83 mov ar.lc = -1 // "infinite" loop 84 and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src 85 sub thresh = 8, sh1 86 mov pr.rot = 1 << 16 // set rotating predicates 87 cmp.ne p7, p0 = r0, r0 // clear p7 88 shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) 89 sub sh2 = 64, sh1 // sh2 = 64 - sh1 90 cmp.eq p6, p0 = sh1, r0 // is the src aligned? 91(p6) br.cond.sptk .src_aligned ;; 92 ld8 r[1] = [asrc],8 ;; 93 94 .align 32 95.l2: 96 ld8.s r[0] = [asrc], 8 97 shr.u value = r[1], sh1 ;; // value = w0 >> sh1 98 czx1.r pos = value ;; // do we have an "early" zero 99 cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? 100(p7) br.cond.dpnt .found0 101 chk.s r[0], .recovery2 // it is safe to do that only 102.back2: // after the previous test 103 shl tmp = r[0], sh2 // tmp = w1 << sh2 104 ;; 105 or value = value, tmp ;; // value |= tmp 106 czx1.r pos = value ;; 107 cmp.ne p7, p0 = 8, pos 108(p7) br.cond.dpnt .found0 109 st8 [dest] = value, 8 // store val to dest 110 br.ctop.dptk .l2 ;; 111.src_aligned: 112.l3: 113(p[0]) ld8.s r[0] = [src], 8 114(p[MEMLAT]) chk.s r[MEMLAT], .recovery3 115.back3: 116(p[MEMLAT]) mov value = r[MEMLAT] 117(p[MEMLAT]) czx1.r pos = r[MEMLAT] ;; 118(p[MEMLAT]) cmp.ne p7, p0 = 8, pos 119(p7) br.cond.dpnt .found0 120(p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 121 br.ctop.dptk .l3 ;; 122.found0: 123 mov ar.lc = pos 124.l4: 125 extr.u c = value, 0, 8 // c = value & 0xff 126 shr.u value = value, 8 127 ;; 128 st1 [dest] = c, 1 129 br.cloop.dptk .l4 ;; 130.restore_and_exit: 131 mov ar.lc = saved_lc // restore the loop counter 132 mov pr = saved_pr, -1 // restore the predicate registers 133 br.ret.sptk.many b0 134.recovery2: 135 add tmp = -8, asrc ;; 136 ld8 r[0] = [tmp] 137 br.cond.sptk .back2 138.recovery3: 139 add tmp = -(MEMLAT + 1) * 8, src ;; 140 ld8 r[MEMLAT] = [tmp] 141 br.cond.sptk .back3 142END(strcpy) 143libc_hidden_builtin_def (strcpy) 144