1/* memset/bzero -- set memory area to CH/0
2   Highly optimized version for ix86, x>=5.
3   Copyright (C) 1996-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23#define PARMS	4+4	/* space for 1 saved reg */
24#define RTN	PARMS
25#define DEST	RTN
26#define CHR	DEST+4
27#define LEN	CHR+4
28
29        .text
30#if defined SHARED && IS_IN (libc)
31ENTRY (__memset_chk)
32	movl	12(%esp), %eax
33	cmpl	%eax, 16(%esp)
34	jb	HIDDEN_JUMPTARGET (__chk_fail)
35END (__memset_chk)
36#endif
37ENTRY (memset)
38
39	pushl	%edi
40	cfi_adjust_cfa_offset (4)
41
42	movl	DEST(%esp), %edi
43	cfi_rel_offset (edi, 0)
44	movl	LEN(%esp), %edx
45	movb	CHR(%esp), %al
46	movb	%al, %ah
47	movl	%eax, %ecx
48	shll	$16, %eax
49	movw	%cx, %ax
50	cld
51
52/* If less than 36 bytes to write, skip tricky code (it wouldn't work).  */
53	cmpl	$36, %edx
54	movl	%edx, %ecx	/* needed when branch is taken! */
55	jl	L(2)
56
57/* First write 0-3 bytes to make the pointer 32-bit aligned.  */
58	movl	%edi, %ecx	/* Copy ptr to ecx... */
59	negl	%ecx		/* ...and negate that and... */
60	andl	$3, %ecx	/* ...mask to get byte count.  */
61	subl	%ecx, %edx	/* adjust global byte count */
62	rep
63	stosb
64
65	subl	$32, %edx	/* offset count for unrolled loop */
66	movl	(%edi), %ecx	/* Fetch destination cache line */
67
68	.align	2, 0x90		/* supply 0x90 for broken assemblers */
69L(1):	movl	28(%edi), %ecx	/* allocate cache line for destination */
70	subl	$32, %edx	/* decr loop count */
71	movl	%eax, 0(%edi)	/* store words pairwise */
72	movl	%eax, 4(%edi)
73	movl	%eax, 8(%edi)
74	movl	%eax, 12(%edi)
75	movl	%eax, 16(%edi)
76	movl	%eax, 20(%edi)
77	movl	%eax, 24(%edi)
78	movl	%eax, 28(%edi)
79	leal	32(%edi), %edi	/* update destination pointer */
80	jge	L(1)
81
82	leal	32(%edx), %ecx	/* reset offset count */
83
84/* Write last 0-7 full 32-bit words (up to 8 words if loop was skipped).  */
85L(2):	shrl	$2, %ecx	/* convert byte count to longword count */
86	rep
87	stosl
88
89/* Finally write the last 0-3 bytes.  */
90	movl	%edx, %ecx
91	andl	$3, %ecx
92	rep
93	stosb
94
95	/* Load result (only if used as memset).  */
96	movl DEST(%esp), %eax	/* start address of destination is result */
97	popl	%edi
98	cfi_adjust_cfa_offset (-4)
99	cfi_restore (edi)
100
101	ret
102END (memset)
103libc_hidden_builtin_def (memset)
104