1/* Highly optimized version for i586.
2   Copyright (C) 1997-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include "asm-syntax.h"
21
22#define PARMS	4+8	/* space for 2 saved regs */
23#define RTN	PARMS
24#define DEST	RTN
25#define SRC	DEST+4
26#define LEN	SRC+4
27
28        .text
29#if defined PIC && IS_IN (libc)
30ENTRY (__memcpy_chk)
31	movl	12(%esp), %eax
32	cmpl	%eax, 16(%esp)
33	jb	HIDDEN_JUMPTARGET (__chk_fail)
34END (__memcpy_chk)
35#endif
36ENTRY (memcpy)
37
38	pushl	%edi
39	cfi_adjust_cfa_offset (4)
40	pushl	%esi
41	cfi_adjust_cfa_offset (4)
42
43	movl	DEST(%esp), %edi
44	cfi_rel_offset (edi, 4)
45	movl	SRC(%esp), %esi
46	cfi_rel_offset (esi, 0)
47	movl	LEN(%esp), %ecx
48	movl	%edi, %eax
49
50	/* We need this in any case.  */
51	cld
52
53	/* Cutoff for the big loop is a size of 32 bytes since otherwise
54	   the loop will never be entered.  */
55	cmpl	$32, %ecx
56	jbe	L(1)
57
58	negl	%eax
59	andl	$3, %eax
60	subl	%eax, %ecx
61	xchgl	%eax, %ecx
62
63	rep; movsb
64
65	movl	%eax, %ecx
66	subl	$32, %ecx
67	js	L(2)
68
69	/* Read ahead to make sure we write in the cache since the stupid
70	   i586 designers haven't implemented read-on-write-miss.  */
71	movl	(%edi), %eax
72L(3):	movl	28(%edi), %edx
73
74	/* Now correct the loop counter.  Please note that in the following
75	   code the flags are not changed anymore.  */
76	subl	$32, %ecx
77
78	movl	(%esi), %eax
79	movl	4(%esi), %edx
80	movl	%eax, (%edi)
81	movl	%edx, 4(%edi)
82	movl	8(%esi), %eax
83	movl	12(%esi), %edx
84	movl	%eax, 8(%edi)
85	movl	%edx, 12(%edi)
86	movl	16(%esi), %eax
87	movl	20(%esi), %edx
88	movl	%eax, 16(%edi)
89	movl	%edx, 20(%edi)
90	movl	24(%esi), %eax
91	movl	28(%esi), %edx
92	movl	%eax, 24(%edi)
93	movl	%edx, 28(%edi)
94
95	leal	32(%esi), %esi
96	leal	32(%edi), %edi
97
98	jns	L(3)
99
100	/* Correct extra loop counter modification.  */
101L(2):	addl	$32, %ecx
102#ifndef USE_AS_MEMPCPY
103	movl	DEST(%esp), %eax
104#endif
105
106L(1):	rep; movsb
107
108#ifdef USE_AS_MEMPCPY
109	movl	%edi, %eax
110#endif
111
112	popl	%esi
113	cfi_adjust_cfa_offset (-4)
114	cfi_restore (esi)
115	popl	%edi
116	cfi_adjust_cfa_offset (-4)
117	cfi_restore (edi)
118
119	ret
120END (memcpy)
121#ifndef USE_AS_MEMPCPY
122libc_hidden_builtin_def (memcpy)
123#endif
124