1/* Copyright (C) 2006-2022 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library. If not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* Thumb requires excessive IT insns here. */ 20#define NO_THUMB 21#include <sysdep.h> 22#include <arm-features.h> 23 24/* 25 * Data preload for architectures that support it (ARM V5TE and above) 26 */ 27#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ 28 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ 29 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ 30 && !defined (__ARM_ARCH_5T__)) 31#define PLD(code...) code 32#else 33#define PLD(code...) 34#endif 35 36/* 37 * This can be used to enable code to cacheline align the source pointer. 38 * Experiments on tested architectures (StrongARM and XScale) didn't show 39 * this a worthwhile thing to do. That might be different in the future. 40 */ 41//#define CALGN(code...) code 42#define CALGN(code...) 43 44/* 45 * Endian independent macros for shifting bytes within registers. 46 */ 47#ifndef __ARMEB__ 48#define PULL lsr 49#define PUSH lsl 50#else 51#define PULL lsl 52#define PUSH lsr 53#endif 54 55 .text 56 .syntax unified 57 58/* 59 * Prototype: void *memmove(void *dest, const void *src, size_t n); 60 * 61 * Note: 62 * 63 * If the memory regions don't overlap, we simply branch to memcpy which is 64 * normally a bit faster. Otherwise the copy is done going downwards. 65 */ 66 67ENTRY(memmove) 68 69 subs ip, r0, r1 70 cmphi r2, ip 71#if !IS_IN (libc) 72 bls memcpy 73#else 74 bls HIDDEN_JUMPTARGET(memcpy) 75#endif 76 77 push {r0, r4, lr} 78 cfi_adjust_cfa_offset (12) 79 cfi_rel_offset (r4, 4) 80 cfi_rel_offset (lr, 8) 81 82 cfi_remember_state 83 84 add r1, r1, r2 85 add r0, r0, r2 86 subs r2, r2, #4 87 blo 8f 88 ands ip, r0, #3 89 PLD( pld [r1, #-4] ) 90 bne 9f 91 ands ip, r1, #3 92 bne 10f 93 941: subs r2, r2, #(28) 95 push {r5 - r8} 96 cfi_adjust_cfa_offset (16) 97 cfi_rel_offset (r5, 0) 98 cfi_rel_offset (r6, 4) 99 cfi_rel_offset (r7, 8) 100 cfi_rel_offset (r8, 12) 101 blo 5f 102 103 CALGN( ands ip, r1, #31 ) 104 CALGN( sbcsne r4, ip, r2 ) @ C is always set here 105 CALGN( bcs 2f ) 106 CALGN( adr r4, 6f ) 107 CALGN( subs r2, r2, ip ) @ C is set here 108#ifndef ARM_ALWAYS_BX 109 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 110#else 111 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 112 CALGN( bx r4 ) 113#endif 114 115 PLD( pld [r1, #-4] ) 1162: PLD( cmp r2, #96 ) 117 PLD( pld [r1, #-32] ) 118 PLD( blo 4f ) 119 PLD( pld [r1, #-64] ) 120 PLD( pld [r1, #-96] ) 121 1223: PLD( pld [r1, #-128] ) 1234: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} 124 subs r2, r2, #32 125 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} 126 bhs 3b 127 1285: ands ip, r2, #28 129 rsb ip, ip, #32 130#ifndef ARM_ALWAYS_BX 131 /* C is always clear here. */ 132 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 133 b 7f 134#else 135 beq 7f 136 push {r10} 137 cfi_adjust_cfa_offset (4) 138 cfi_rel_offset (r10, 0) 1390: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 140 /* If alignment is not perfect, then there will be some 141 padding (nop) instructions between this BX and label 6. 142 The computation above assumed that two instructions 143 later is exactly the right spot. */ 144 add r10, #(6f - (0b + PC_OFS)) 145 bx r10 146#endif 147 .p2align ARM_BX_ALIGN_LOG2 1486: nop 149 .p2align ARM_BX_ALIGN_LOG2 150 ldr r3, [r1, #-4]! 151 .p2align ARM_BX_ALIGN_LOG2 152 ldr r4, [r1, #-4]! 153 .p2align ARM_BX_ALIGN_LOG2 154 ldr r5, [r1, #-4]! 155 .p2align ARM_BX_ALIGN_LOG2 156 ldr r6, [r1, #-4]! 157 .p2align ARM_BX_ALIGN_LOG2 158 ldr r7, [r1, #-4]! 159 .p2align ARM_BX_ALIGN_LOG2 160 ldr r8, [r1, #-4]! 161 .p2align ARM_BX_ALIGN_LOG2 162 ldr lr, [r1, #-4]! 163 164#ifndef ARM_ALWAYS_BX 165 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 166 nop 167#else 1680: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 169 /* If alignment is not perfect, then there will be some 170 padding (nop) instructions between this BX and label 66. 171 The computation above assumed that two instructions 172 later is exactly the right spot. */ 173 add r10, #(66f - (0b + PC_OFS)) 174 bx r10 175#endif 176 .p2align ARM_BX_ALIGN_LOG2 17766: nop 178 .p2align ARM_BX_ALIGN_LOG2 179 str r3, [r0, #-4]! 180 .p2align ARM_BX_ALIGN_LOG2 181 str r4, [r0, #-4]! 182 .p2align ARM_BX_ALIGN_LOG2 183 str r5, [r0, #-4]! 184 .p2align ARM_BX_ALIGN_LOG2 185 str r6, [r0, #-4]! 186 .p2align ARM_BX_ALIGN_LOG2 187 str r7, [r0, #-4]! 188 .p2align ARM_BX_ALIGN_LOG2 189 str r8, [r0, #-4]! 190 .p2align ARM_BX_ALIGN_LOG2 191 str lr, [r0, #-4]! 192 193#ifdef ARM_ALWAYS_BX 194 pop {r10} 195 cfi_adjust_cfa_offset (-4) 196 cfi_restore (r10) 197#endif 198 199 CALGN( bcs 2b ) 200 2017: pop {r5 - r8} 202 cfi_adjust_cfa_offset (-16) 203 cfi_restore (r5) 204 cfi_restore (r6) 205 cfi_restore (r7) 206 cfi_restore (r8) 207 2088: movs r2, r2, lsl #31 209 ldrbne r3, [r1, #-1]! 210 ldrbcs r4, [r1, #-1]! 211 ldrbcs ip, [r1, #-1] 212 strbne r3, [r0, #-1]! 213 strbcs r4, [r0, #-1]! 214 strbcs ip, [r0, #-1] 215 216#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ 217 || defined (ARM_ALWAYS_BX)) 218 pop {r0, r4, lr} 219 cfi_adjust_cfa_offset (-12) 220 cfi_restore (r4) 221 cfi_restore (lr) 222 bx lr 223#else 224 pop {r0, r4, pc} 225#endif 226 227 cfi_restore_state 228 2299: cmp ip, #2 230 ldrbgt r3, [r1, #-1]! 231 ldrbge r4, [r1, #-1]! 232 ldrb lr, [r1, #-1]! 233 strbgt r3, [r0, #-1]! 234 strbge r4, [r0, #-1]! 235 subs r2, r2, ip 236 strb lr, [r0, #-1]! 237 blo 8b 238 ands ip, r1, #3 239 beq 1b 240 24110: bic r1, r1, #3 242 cmp ip, #2 243 ldr r3, [r1, #0] 244 beq 17f 245 blt 18f 246 247 248 .macro backward_copy_shift push pull 249 250 subs r2, r2, #28 251 blo 14f 252 253 CALGN( ands ip, r1, #31 ) 254 CALGN( rsb ip, ip, #32 ) 255 CALGN( sbcsne r4, ip, r2 ) @ C is always set here 256 CALGN( subcc r2, r2, ip ) 257 CALGN( bcc 15f ) 258 25911: push {r5 - r8, r10} 260 cfi_adjust_cfa_offset (20) 261 cfi_rel_offset (r5, 0) 262 cfi_rel_offset (r6, 4) 263 cfi_rel_offset (r7, 8) 264 cfi_rel_offset (r8, 12) 265 cfi_rel_offset (r10, 16) 266 267 PLD( pld [r1, #-4] ) 268 PLD( cmp r2, #96 ) 269 PLD( pld [r1, #-32] ) 270 PLD( blo 13f ) 271 PLD( pld [r1, #-64] ) 272 PLD( pld [r1, #-96] ) 273 27412: PLD( pld [r1, #-128] ) 27513: ldmdb r1!, {r7, r8, r10, ip} 276 mov lr, r3, PUSH #\push 277 subs r2, r2, #32 278 ldmdb r1!, {r3, r4, r5, r6} 279 orr lr, lr, ip, PULL #\pull 280 mov ip, ip, PUSH #\push 281 orr ip, ip, r10, PULL #\pull 282 mov r10, r10, PUSH #\push 283 orr r10, r10, r8, PULL #\pull 284 mov r8, r8, PUSH #\push 285 orr r8, r8, r7, PULL #\pull 286 mov r7, r7, PUSH #\push 287 orr r7, r7, r6, PULL #\pull 288 mov r6, r6, PUSH #\push 289 orr r6, r6, r5, PULL #\pull 290 mov r5, r5, PUSH #\push 291 orr r5, r5, r4, PULL #\pull 292 mov r4, r4, PUSH #\push 293 orr r4, r4, r3, PULL #\pull 294 stmdb r0!, {r4 - r8, r10, ip, lr} 295 bhs 12b 296 297 pop {r5 - r8, r10} 298 cfi_adjust_cfa_offset (-20) 299 cfi_restore (r5) 300 cfi_restore (r6) 301 cfi_restore (r7) 302 cfi_restore (r8) 303 cfi_restore (r10) 304 30514: ands ip, r2, #28 306 beq 16f 307 30815: mov lr, r3, PUSH #\push 309 ldr r3, [r1, #-4]! 310 subs ip, ip, #4 311 orr lr, lr, r3, PULL #\pull 312 str lr, [r0, #-4]! 313 bgt 15b 314 CALGN( cmp r2, #0 ) 315 CALGN( bge 11b ) 316 31716: add r1, r1, #(\pull / 8) 318 b 8b 319 320 .endm 321 322 323 backward_copy_shift push=8 pull=24 324 32517: backward_copy_shift push=16 pull=16 326 32718: backward_copy_shift push=24 pull=8 328 329 330END(memmove) 331libc_hidden_builtin_def (memmove) 332