1/* strcat(dest, src) -- Append SRC on the end of DEST.
2   For Intel 80x86, x>=4.
3   Copyright (C) 1994-2022 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23#define PARMS	4+4	/* space for 1 saved reg */
24#define RTN	PARMS
25#define DEST	RTN
26#define SRC	DEST+4
27
28	.text
29ENTRY (strcat)
30
31	pushl %edi		/* Save callee-safe register.  */
32	cfi_adjust_cfa_offset (4)
33
34	movl DEST(%esp), %edx
35	movl SRC(%esp), %ecx
36
37	testb $0xff, (%ecx)	/* Is source string empty? */
38	jz L(8)			/* yes => return */
39
40	/* Test the first bytes separately until destination is aligned.  */
41	testl $3, %edx		/* destination pointer aligned? */
42	jz L(1)			/* yes => begin scan loop */
43	testb $0xff, (%edx)	/* is end of string? */
44	jz L(2)			/* yes => start appending */
45	incl %edx		/* increment source pointer */
46
47	testl $3, %edx		/* destination pointer aligned? */
48	jz L(1)			/* yes => begin scan loop */
49	testb $0xff, (%edx)	/* is end of string? */
50	jz L(2)			/* yes => start appending */
51	incl %edx		/* increment source pointer */
52
53	testl $3, %edx		/* destination pointer aligned? */
54	jz L(1)			/* yes => begin scan loop */
55	testb $0xff, (%edx)	/* is end of string? */
56	jz L(2)			/* yes => start appending */
57	incl %edx		/* increment source pointer */
58
59	/* Now we are aligned.  Begin scan loop.  */
60	jmp L(1)
61
62	cfi_rel_offset (edi, 0)
63	ALIGN(4)
64
65L(4):	addl $16,%edx		/* increment destination pointer for round */
66
67L(1):	movl (%edx), %eax	/* get word (= 4 bytes) in question */
68	movl $0xfefefeff, %edi	/* magic value */
69
70	/* If you compare this with the algorithm in memchr.S you will
71	   notice that here is an `xorl' statement missing.  But you must
72	   not forget that we are looking for C == 0 and `xorl $0, %eax'
73	   is a no-op.  */
74
75	addl %eax, %edi		/* add the magic value to the word.  We get
76				   carry bits reported for each byte which
77				   is *not* 0 */
78
79	/* According to the algorithm we had to reverse the effect of the
80	   XOR first and then test the overflow bits.  But because the
81	   following XOR would destroy the carry flag and it would (in a
82	   representation with more than 32 bits) not alter then last
83	   overflow, we can now test this condition.  If no carry is signaled
84	   no overflow must have occurred in the last byte => it was 0.	*/
85	jnc L(3)
86
87	/* We are only interested in carry bits that change due to the
88	   previous add, so remove original bits */
89	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
90
91	/* Now test for the other three overflow bits.  */
92	orl $0xfefefeff, %edi	/* set all non-carry bits */
93	incl %edi		/* add 1: if one carry bit was *not* set
94				   the addition will not result in 0.  */
95
96	/* If at least one byte of the word is C we don't get 0 in %ecx.  */
97	jnz L(3)
98
99	movl 4(%edx), %eax	/* get word from source */
100	movl $0xfefefeff, %edi	/* magic value */
101	addl %eax, %edi		/* add the magic value to the word.  We get
102				   carry bits reported for each byte which
103				   is *not* 0 */
104	jnc L(5)		/* highest byte is C => stop copying */
105	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
106	orl $0xfefefeff, %edi	/* set all non-carry bits */
107	incl %edi		/* add 1: if one carry bit was *not* set
108				   the addition will not result in 0.  */
109	jnz L(5)		/* one byte is NUL => stop copying */
110
111	movl 8(%edx), %eax	/* get word from source */
112	movl $0xfefefeff, %edi	/* magic value */
113	addl %eax, %edi		/* add the magic value to the word.  We get
114				   carry bits reported for each byte which
115				   is *not* 0 */
116	jnc L(6)		/* highest byte is C => stop copying */
117	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
118	orl $0xfefefeff, %edi	/* set all non-carry bits */
119	incl %edi		/* add 1: if one carry bit was *not* set
120				   the addition will not result in 0.  */
121	jnz L(6)		/* one byte is NUL => stop copying */
122
123	movl 12(%edx), %eax	/* get word from source */
124	movl $0xfefefeff, %edi	/* magic value */
125	addl %eax, %edi		/* add the magic value to the word.  We get
126				   carry bits reported for each byte which
127				   is *not* 0 */
128	jnc L(7)		/* highest byte is C => stop copying */
129	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
130	orl $0xfefefeff, %edi	/* set all non-carry bits */
131	incl %edi		/* add 1: if one carry bit was *not* set
132				   the addition will not result in 0.  */
133	jz L(4)			/* no byte is NUL => carry on copying */
134
135L(7):	addl $4, %edx		/* adjust source pointer */
136L(6):	addl $4, %edx
137L(5):	addl $4, %edx
138
139L(3):	testb %al, %al		/* is first byte NUL? */
140	jz L(2)			/* yes => start copying */
141	incl %edx		/* increment source pointer */
142
143	testb %ah, %ah		/* is second byte NUL? */
144	jz L(2)			/* yes => start copying */
145	incl %edx		/* increment source pointer */
146
147	testl $0xff0000, %eax	/* is third byte NUL? */
148	jz L(2)			/* yes => start copying */
149	incl %edx		/* increment source pointer */
150
151L(2):	subl %ecx, %edx		/* reduce number of loop variants */
152
153	/* Now we have to align the source pointer.  */
154	testl $3, %ecx		/* pointer correctly aligned? */
155	jz L(29)		/* yes => start copy loop */
156	movb (%ecx), %al	/* get first byte */
157	movb %al, (%ecx,%edx)	/* and store it */
158	andb %al, %al		/* is byte NUL? */
159	jz L(8)			/* yes => return */
160	incl %ecx		/* increment pointer */
161
162	testl $3, %ecx		/* pointer correctly aligned? */
163	jz L(29)		/* yes => start copy loop */
164	movb (%ecx), %al	/* get first byte */
165	movb %al, (%ecx,%edx)	/* and store it */
166	andb %al, %al		/* is byte NUL? */
167	jz L(8)			/* yes => return */
168	incl %ecx		/* increment pointer */
169
170	testl $3, %ecx		/* pointer correctly aligned? */
171	jz L(29)		/* yes => start copy loop */
172	movb (%ecx), %al	/* get first byte */
173	movb %al, (%ecx,%edx)	/* and store it */
174	andb %al, %al		/* is byte NUL? */
175	jz L(8)			/* yes => return */
176	incl %ecx		/* increment pointer */
177
178	/* Now we are aligned.  */
179	jmp L(29)		/* start copy loop */
180
181	ALIGN(4)
182
183L(28):	movl %eax, 12(%ecx,%edx)/* store word at destination */
184	addl $16, %ecx		/* adjust pointer for full round */
185
186L(29):	movl (%ecx), %eax	/* get word from source */
187	movl $0xfefefeff, %edi	/* magic value */
188	addl %eax, %edi		/* add the magic value to the word.  We get
189				   carry bits reported for each byte which
190				   is *not* 0 */
191	jnc L(9)		/* highest byte is C => stop copying */
192	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
193	orl $0xfefefeff, %edi	/* set all non-carry bits */
194	incl %edi		/* add 1: if one carry bit was *not* set
195				   the addition will not result in 0.  */
196	jnz L(9)		/* one byte is NUL => stop copying */
197	movl %eax, (%ecx,%edx)	/* store word to destination */
198
199	movl 4(%ecx), %eax	/* get word from source */
200	movl $0xfefefeff, %edi	/* magic value */
201	addl %eax, %edi		/* add the magic value to the word.  We get
202				   carry bits reported for each byte which
203				   is *not* 0 */
204	jnc L(91)		/* highest byte is C => stop copying */
205	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
206	orl $0xfefefeff, %edi	/* set all non-carry bits */
207	incl %edi		/* add 1: if one carry bit was *not* set
208				   the addition will not result in 0.  */
209	jnz L(91)		/* one byte is NUL => stop copying */
210	movl %eax, 4(%ecx,%edx)	/* store word to destination */
211
212	movl 8(%ecx), %eax	/* get word from source */
213	movl $0xfefefeff, %edi	/* magic value */
214	addl %eax, %edi		/* add the magic value to the word.  We get
215				   carry bits reported for each byte which
216				   is *not* 0 */
217	jnc L(92)		/* highest byte is C => stop copying */
218	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
219	orl $0xfefefeff, %edi	/* set all non-carry bits */
220	incl %edi		/* add 1: if one carry bit was *not* set
221				   the addition will not result in 0.  */
222	jnz L(92)		/* one byte is NUL => stop copying */
223	movl %eax, 8(%ecx,%edx)	/* store word to destination */
224
225	movl 12(%ecx), %eax	/* get word from source */
226	movl $0xfefefeff, %edi	/* magic value */
227	addl %eax, %edi		/* add the magic value to the word.  We get
228				   carry bits reported for each byte which
229				   is *not* 0 */
230	jnc L(93)		/* highest byte is C => stop copying */
231	xorl %eax, %edi		/* ((word^charmask)+magic)^(word^charmask) */
232	orl $0xfefefeff, %edi	/* set all non-carry bits */
233	incl %edi		/* add 1: if one carry bit was *not* set
234				   the addition will not result in 0.  */
235	jz L(28)		/* no is NUL => carry on copying */
236
237L(93):	addl $4, %ecx		/* adjust pointer */
238L(92):	addl $4, %ecx
239L(91):	addl $4, %ecx
240
241L(9):	movb %al, (%ecx,%edx)	/* store first byte of last word */
242	orb %al, %al		/* is it NUL? */
243	jz L(8)			/* yes => return */
244
245	movb %ah, 1(%ecx,%edx)	/* store second byte of last word */
246	orb %ah, %ah		/* is it NUL? */
247	jz L(8)			/* yes => return */
248
249	shrl $16, %eax		/* make upper bytes accessible */
250	movb %al, 2(%ecx,%edx)	/* store third byte of last word */
251	orb %al, %al		/* is it NUL? */
252	jz L(8)			/* yes => return */
253
254	movb %ah, 3(%ecx,%edx)	/* store fourth byte of last word */
255
256L(8):	movl DEST(%esp), %eax	/* start address of destination is result */
257	popl %edi		/* restore saved register */
258	cfi_adjust_cfa_offset (-4)
259	cfi_restore (edi)
260
261	ret
262END (strcat)
263libc_hidden_builtin_def (strcat)
264