1/* Copyright (C) 2006-2022 Free Software Foundation, Inc.
2   This file is part of the GNU C Library.
3
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library.  If not, see
17   <https://www.gnu.org/licenses/>.  */
18
19/* Thumb requires excessive IT insns here.  */
20#define NO_THUMB
21#include <sysdep.h>
22#include <arm-features.h>
23
24/*
25 * Data preload for architectures that support it (ARM V5TE and above)
26 */
27#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
28     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
29     && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
30     && !defined (__ARM_ARCH_5T__))
31#define PLD(code...)    code
32#else
33#define PLD(code...)
34#endif
35
36/*
37 * This can be used to enable code to cacheline align the source pointer.
38 * Experiments on tested architectures (StrongARM and XScale) didn't show
39 * this a worthwhile thing to do.  That might be different in the future.
40 */
41//#define CALGN(code...)        code
42#define CALGN(code...)
43
44/*
45 * Endian independent macros for shifting bytes within registers.
46 */
47#ifndef __ARMEB__
48#define PULL            lsr
49#define PUSH            lsl
50#else
51#define PULL            lsl
52#define PUSH            lsr
53#endif
54
55		.text
56		.syntax unified
57
58/*
59 * Prototype: void *memmove(void *dest, const void *src, size_t n);
60 *
61 * Note:
62 *
63 * If the memory regions don't overlap, we simply branch to memcpy which is
64 * normally a bit faster. Otherwise the copy is done going downwards.
65 */
66
67ENTRY(memmove)
68
69		subs	ip, r0, r1
70		cmphi	r2, ip
71#if !IS_IN (libc)
72		bls	memcpy
73#else
74		bls	HIDDEN_JUMPTARGET(memcpy)
75#endif
76
77		push	{r0, r4, lr}
78		cfi_adjust_cfa_offset (12)
79		cfi_rel_offset (r4, 4)
80		cfi_rel_offset (lr, 8)
81
82		cfi_remember_state
83
84		add	r1, r1, r2
85		add	r0, r0, r2
86		subs	r2, r2, #4
87		blo	8f
88		ands	ip, r0, #3
89	PLD(	pld	[r1, #-4]		)
90		bne	9f
91		ands	ip, r1, #3
92		bne	10f
93
941:		subs	r2, r2, #(28)
95		push	{r5 - r8}
96		cfi_adjust_cfa_offset (16)
97		cfi_rel_offset (r5, 0)
98		cfi_rel_offset (r6, 4)
99		cfi_rel_offset (r7, 8)
100		cfi_rel_offset (r8, 12)
101		blo     5f
102
103	CALGN(	ands	ip, r1, #31		)
104	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
105	CALGN(	bcs	2f			)
106	CALGN(	adr	r4, 6f			)
107	CALGN(	subs	r2, r2, ip		)  @ C is set here
108#ifndef ARM_ALWAYS_BX
109	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
110#else
111	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
112	CALGN(	bx	r4			)
113#endif
114
115	PLD(	pld	[r1, #-4]		)
1162:	PLD(	cmp	r2, #96			)
117	PLD(	pld	[r1, #-32]		)
118	PLD(    blo     4f                      )
119	PLD(	pld	[r1, #-64]		)
120	PLD(	pld	[r1, #-96]		)
121
1223:	PLD(	pld	[r1, #-128]		)
1234:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
124		subs	r2, r2, #32
125		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
126		bhs     3b
127
1285:		ands	ip, r2, #28
129		rsb	ip, ip, #32
130#ifndef ARM_ALWAYS_BX
131		/* C is always clear here.  */
132		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
133		b	7f
134#else
135		beq	7f
136		push	{r10}
137		cfi_adjust_cfa_offset (4)
138		cfi_rel_offset (r10, 0)
1390:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
140		/* If alignment is not perfect, then there will be some
141		   padding (nop) instructions between this BX and label 6.
142		   The computation above assumed that two instructions
143		   later is exactly the right spot.  */
144		add	r10, #(6f - (0b + PC_OFS))
145		bx	r10
146#endif
147		.p2align ARM_BX_ALIGN_LOG2
1486:		nop
149		.p2align ARM_BX_ALIGN_LOG2
150		ldr	r3, [r1, #-4]!
151		.p2align ARM_BX_ALIGN_LOG2
152		ldr	r4, [r1, #-4]!
153		.p2align ARM_BX_ALIGN_LOG2
154		ldr	r5, [r1, #-4]!
155		.p2align ARM_BX_ALIGN_LOG2
156		ldr	r6, [r1, #-4]!
157		.p2align ARM_BX_ALIGN_LOG2
158		ldr	r7, [r1, #-4]!
159		.p2align ARM_BX_ALIGN_LOG2
160		ldr	r8, [r1, #-4]!
161		.p2align ARM_BX_ALIGN_LOG2
162		ldr	lr, [r1, #-4]!
163
164#ifndef ARM_ALWAYS_BX
165		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
166		nop
167#else
1680:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
169		/* If alignment is not perfect, then there will be some
170		   padding (nop) instructions between this BX and label 66.
171		   The computation above assumed that two instructions
172		   later is exactly the right spot.  */
173		add	r10, #(66f - (0b + PC_OFS))
174		bx	r10
175#endif
176		.p2align ARM_BX_ALIGN_LOG2
17766:		nop
178		.p2align ARM_BX_ALIGN_LOG2
179		str	r3, [r0, #-4]!
180		.p2align ARM_BX_ALIGN_LOG2
181		str	r4, [r0, #-4]!
182		.p2align ARM_BX_ALIGN_LOG2
183		str	r5, [r0, #-4]!
184		.p2align ARM_BX_ALIGN_LOG2
185		str	r6, [r0, #-4]!
186		.p2align ARM_BX_ALIGN_LOG2
187		str	r7, [r0, #-4]!
188		.p2align ARM_BX_ALIGN_LOG2
189		str	r8, [r0, #-4]!
190		.p2align ARM_BX_ALIGN_LOG2
191		str	lr, [r0, #-4]!
192
193#ifdef ARM_ALWAYS_BX
194		pop	{r10}
195		cfi_adjust_cfa_offset (-4)
196		cfi_restore (r10)
197#endif
198
199	CALGN(	bcs	2b			)
200
2017:		pop	{r5 - r8}
202		cfi_adjust_cfa_offset (-16)
203		cfi_restore (r5)
204		cfi_restore (r6)
205		cfi_restore (r7)
206		cfi_restore (r8)
207
2088:		movs	r2, r2, lsl #31
209		ldrbne	r3, [r1, #-1]!
210		ldrbcs	r4, [r1, #-1]!
211		ldrbcs	ip, [r1, #-1]
212		strbne	r3, [r0, #-1]!
213		strbcs	r4, [r0, #-1]!
214		strbcs	ip, [r0, #-1]
215
216#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
217     || defined (ARM_ALWAYS_BX))
218		pop	{r0, r4, lr}
219		cfi_adjust_cfa_offset (-12)
220		cfi_restore (r4)
221		cfi_restore (lr)
222		bx      lr
223#else
224		pop	{r0, r4, pc}
225#endif
226
227		cfi_restore_state
228
2299:		cmp	ip, #2
230		ldrbgt	r3, [r1, #-1]!
231		ldrbge	r4, [r1, #-1]!
232		ldrb	lr, [r1, #-1]!
233		strbgt	r3, [r0, #-1]!
234		strbge	r4, [r0, #-1]!
235		subs	r2, r2, ip
236		strb	lr, [r0, #-1]!
237		blo	8b
238		ands	ip, r1, #3
239		beq	1b
240
24110:		bic	r1, r1, #3
242		cmp	ip, #2
243		ldr	r3, [r1, #0]
244		beq	17f
245		blt	18f
246
247
248		.macro	backward_copy_shift push pull
249
250		subs	r2, r2, #28
251		blo	14f
252
253	CALGN(	ands	ip, r1, #31		)
254	CALGN(	rsb	ip, ip, #32		)
255	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
256	CALGN(	subcc	r2, r2, ip		)
257	CALGN(	bcc	15f			)
258
25911:		push	{r5 - r8, r10}
260		cfi_adjust_cfa_offset (20)
261		cfi_rel_offset (r5, 0)
262		cfi_rel_offset (r6, 4)
263		cfi_rel_offset (r7, 8)
264		cfi_rel_offset (r8, 12)
265		cfi_rel_offset (r10, 16)
266
267	PLD(	pld	[r1, #-4]		)
268	PLD(	cmp	r2, #96			)
269	PLD(	pld	[r1, #-32]		)
270	PLD(	blo	13f			)
271	PLD(	pld	[r1, #-64]		)
272	PLD(	pld	[r1, #-96]		)
273
27412:	PLD(	pld	[r1, #-128]		)
27513:		ldmdb   r1!, {r7, r8, r10, ip}
276		mov     lr, r3, PUSH #\push
277		subs    r2, r2, #32
278		ldmdb   r1!, {r3, r4, r5, r6}
279		orr     lr, lr, ip, PULL #\pull
280		mov     ip, ip, PUSH #\push
281		orr     ip, ip, r10, PULL #\pull
282		mov     r10, r10, PUSH #\push
283		orr     r10, r10, r8, PULL #\pull
284		mov     r8, r8, PUSH #\push
285		orr     r8, r8, r7, PULL #\pull
286		mov     r7, r7, PUSH #\push
287		orr     r7, r7, r6, PULL #\pull
288		mov     r6, r6, PUSH #\push
289		orr     r6, r6, r5, PULL #\pull
290		mov     r5, r5, PUSH #\push
291		orr     r5, r5, r4, PULL #\pull
292		mov     r4, r4, PUSH #\push
293		orr     r4, r4, r3, PULL #\pull
294		stmdb   r0!, {r4 - r8, r10, ip, lr}
295		bhs	12b
296
297		pop	{r5 - r8, r10}
298		cfi_adjust_cfa_offset (-20)
299		cfi_restore (r5)
300		cfi_restore (r6)
301		cfi_restore (r7)
302		cfi_restore (r8)
303		cfi_restore (r10)
304
30514:		ands	ip, r2, #28
306		beq	16f
307
30815:		mov     lr, r3, PUSH #\push
309		ldr	r3, [r1, #-4]!
310		subs	ip, ip, #4
311		orr	lr, lr, r3, PULL #\pull
312		str	lr, [r0, #-4]!
313		bgt	15b
314	CALGN(	cmp	r2, #0			)
315	CALGN(	bge	11b			)
316
31716:		add	r1, r1, #(\pull / 8)
318		b	8b
319
320		.endm
321
322
323		backward_copy_shift	push=8	pull=24
324
32517:		backward_copy_shift	push=16	pull=16
326
32718:		backward_copy_shift	push=24	pull=8
328
329
330END(memmove)
331libc_hidden_builtin_def (memmove)
332