1/* strcpy with SSE2 and unaligned load
2   Copyright (C) 2011-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19
20#if IS_IN (libc)
21
22# include <sysdep.h>
23
24
25# define CFI_PUSH(REG)                  \
26	cfi_adjust_cfa_offset (4);     \
27	cfi_rel_offset (REG, 0)
28
29# define CFI_POP(REG)                   \
30	cfi_adjust_cfa_offset (-4);    \
31	cfi_restore (REG)
32
33# define PUSH(REG) pushl REG; CFI_PUSH (REG)
34# define POP(REG) popl REG; CFI_POP (REG)
35
36# ifndef STRCPY
37#  define STRCPY  __strcpy_sse2
38# endif
39
40# define STR1  PARMS
41# define STR2  STR1+4
42# define LEN  STR2+4
43
44# ifdef USE_AS_STRNCPY
45#  define PARMS  16
46#  define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
47#  define RETURN  POP(%edi); POP(%esi); POP(%ebx); ret;          \
48	CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
49
50# ifdef PIC
51#  define JMPTBL(I, B)	I - B
52
53/* Load an entry in a jump table into ECX and branch to it. TABLE is a
54	jump table with relative offsets.
55	INDEX is a register contains the index into the jump table.
56	SCALE is the scale of INDEX. */
57
58#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)            \
59	/* We first load PC into ECX.  */                       \
60	SETUP_PIC_REG(cx);                                      \
61	/* Get the address of the jump table.  */               \
62	addl	$(TABLE - .), %ecx;                             \
63	/* Get the entry and convert the relative offset to the \
64	absolute	address.  */                            \
65	addl	(%ecx,INDEX,SCALE), %ecx;                       \
66	/* We loaded the jump table and adjusted ECX. Go.  */  \
67	_CET_NOTRACK jmp *%ecx
68# else
69#  define JMPTBL(I, B)	I
70
71/* Branch to an entry in a jump table.  TABLE is a jump table with
72	absolute	offsets.  INDEX is a register contains the index into the
73	jump	table.  SCALE is the scale of INDEX. */
74
75#  define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
76	_CET_NOTRACK jmp *TABLE(,INDEX,SCALE)
77# endif
78
79.text
80ENTRY (STRCPY)
81	ENTRANCE
82	mov	STR1(%esp), %edi
83	mov	STR2(%esp), %esi
84	movl	LEN(%esp), %ebx
85	test	%ebx, %ebx
86	jz	L(ExitZero)
87
88	mov	%esi, %ecx
89# ifndef USE_AS_STPCPY
90	mov	%edi, %eax      /* save result */
91# endif
92	and	$15, %ecx
93	jz	L(SourceStringAlignmentZero)
94
95	and	$-16, %esi
96	pxor	%xmm0, %xmm0
97	pxor	%xmm1, %xmm1
98
99	pcmpeqb	(%esi), %xmm1
100	add	%ecx, %ebx
101	pmovmskb %xmm1, %edx
102	shr	%cl, %edx
103# ifdef USE_AS_STPCPY
104	cmp	$16, %ebx
105	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
106# else
107	cmp	$17, %ebx
108	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
109# endif
110	test	%edx, %edx
111	jnz	L(CopyFrom1To16BytesTail)
112
113	pcmpeqb	16(%esi), %xmm0
114	pmovmskb %xmm0, %edx
115# ifdef USE_AS_STPCPY
116	cmp	$32, %ebx
117	jbe	L(CopyFrom1To32BytesCase2OrCase3)
118# else
119	cmp	$33, %ebx
120	jbe	L(CopyFrom1To32BytesCase2OrCase3)
121# endif
122	test	%edx, %edx
123	jnz	L(CopyFrom1To32Bytes)
124
125	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
126	movdqu	%xmm1, (%edi)
127
128	sub	%ecx, %edi
129
130/* If source address alignment != destination address alignment */
131	.p2align 4
132L(Unalign16Both):
133	mov	$16, %ecx
134	movdqa	(%esi, %ecx), %xmm1
135	movaps	16(%esi, %ecx), %xmm2
136	movdqu	%xmm1, (%edi, %ecx)
137	pcmpeqb	%xmm2, %xmm0
138	pmovmskb %xmm0, %edx
139	add	$16, %ecx
140	sub	$48, %ebx
141	jbe	L(CopyFrom1To16BytesCase2OrCase3)
142	test	%edx, %edx
143	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
144
145	movaps	16(%esi, %ecx), %xmm3
146	movdqu	%xmm2, (%edi, %ecx)
147	pcmpeqb	%xmm3, %xmm0
148	pmovmskb %xmm0, %edx
149	add	$16, %ecx
150	sub	$16, %ebx
151	jbe	L(CopyFrom1To16BytesCase2OrCase3)
152	test	%edx, %edx
153	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
154
155	movaps	16(%esi, %ecx), %xmm4
156	movdqu	%xmm3, (%edi, %ecx)
157	pcmpeqb	%xmm4, %xmm0
158	pmovmskb %xmm0, %edx
159	add	$16, %ecx
160	sub	$16, %ebx
161	jbe	L(CopyFrom1To16BytesCase2OrCase3)
162	test	%edx, %edx
163	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
164
165	movaps	16(%esi, %ecx), %xmm1
166	movdqu	%xmm4, (%edi, %ecx)
167	pcmpeqb	%xmm1, %xmm0
168	pmovmskb %xmm0, %edx
169	add	$16, %ecx
170	sub	$16, %ebx
171	jbe	L(CopyFrom1To16BytesCase2OrCase3)
172	test	%edx, %edx
173	jnz	L(CopyFrom1To16BytesUnalignedXmm1)
174
175	movaps	16(%esi, %ecx), %xmm2
176	movdqu	%xmm1, (%edi, %ecx)
177	pcmpeqb	%xmm2, %xmm0
178	pmovmskb %xmm0, %edx
179	add	$16, %ecx
180	sub	$16, %ebx
181	jbe	L(CopyFrom1To16BytesCase2OrCase3)
182	test	%edx, %edx
183	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
184
185	movaps	16(%esi, %ecx), %xmm3
186	movdqu	%xmm2, (%edi, %ecx)
187	pcmpeqb	%xmm3, %xmm0
188	pmovmskb %xmm0, %edx
189	add	$16, %ecx
190	sub	$16, %ebx
191	jbe	L(CopyFrom1To16BytesCase2OrCase3)
192	test	%edx, %edx
193	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
194
195	movdqu	%xmm3, (%edi, %ecx)
196	mov	%esi, %edx
197	lea	16(%esi, %ecx), %esi
198	and	$-0x40, %esi
199	sub	%esi, %edx
200	sub	%edx, %edi
201	lea	128(%ebx, %edx), %ebx
202
203L(Unaligned64Loop):
204	movaps	(%esi), %xmm2
205	movaps	%xmm2, %xmm4
206	movaps	16(%esi), %xmm5
207	movaps	32(%esi), %xmm3
208	movaps	%xmm3, %xmm6
209	movaps	48(%esi), %xmm7
210	pminub	%xmm5, %xmm2
211	pminub	%xmm7, %xmm3
212	pminub	%xmm2, %xmm3
213	pcmpeqb	%xmm0, %xmm3
214	pmovmskb %xmm3, %edx
215	sub	$64, %ebx
216	jbe	L(UnalignedLeaveCase2OrCase3)
217	test	%edx, %edx
218	jnz	L(Unaligned64Leave)
219L(Unaligned64Loop_start):
220	add	$64, %edi
221	add	$64, %esi
222	movdqu	%xmm4, -64(%edi)
223	movaps	(%esi), %xmm2
224	movdqa	%xmm2, %xmm4
225	movdqu	%xmm5, -48(%edi)
226	movaps	16(%esi), %xmm5
227	pminub	%xmm5, %xmm2
228	movaps	32(%esi), %xmm3
229	movdqu	%xmm6, -32(%edi)
230	movaps	%xmm3, %xmm6
231	movdqu	%xmm7, -16(%edi)
232	movaps	48(%esi), %xmm7
233	pminub	%xmm7, %xmm3
234	pminub	%xmm2, %xmm3
235	pcmpeqb	%xmm0, %xmm3
236	pmovmskb %xmm3, %edx
237	sub	$64, %ebx
238	jbe	L(UnalignedLeaveCase2OrCase3)
239	test	%edx, %edx
240	jz	L(Unaligned64Loop_start)
241L(Unaligned64Leave):
242	pxor	%xmm1, %xmm1
243
244	pcmpeqb	%xmm4, %xmm0
245	pcmpeqb	%xmm5, %xmm1
246	pmovmskb %xmm0, %edx
247	pmovmskb %xmm1, %ecx
248	test	%edx, %edx
249	jnz	L(CopyFrom1To16BytesUnaligned_0)
250	test	%ecx, %ecx
251	jnz	L(CopyFrom1To16BytesUnaligned_16)
252
253	pcmpeqb	%xmm6, %xmm0
254	pcmpeqb	%xmm7, %xmm1
255	pmovmskb %xmm0, %edx
256	pmovmskb %xmm1, %ecx
257	test	%edx, %edx
258	jnz	L(CopyFrom1To16BytesUnaligned_32)
259
260	bsf	%ecx, %edx
261	movdqu	%xmm4, (%edi)
262	movdqu	%xmm5, 16(%edi)
263	movdqu	%xmm6, 32(%edi)
264# ifdef USE_AS_STPCPY
265	lea	48(%edi, %edx), %eax
266# endif
267	movdqu	%xmm7, 48(%edi)
268	add	$15, %ebx
269	sub	%edx, %ebx
270	lea	49(%edi, %edx), %edi
271	jmp	L(StrncpyFillTailWithZero)
272
273/* If source address alignment == destination address alignment */
274
275L(SourceStringAlignmentZero):
276	pxor	%xmm0, %xmm0
277	movdqa	(%esi), %xmm1
278	pcmpeqb	%xmm1, %xmm0
279	pmovmskb %xmm0, %edx
280# ifdef USE_AS_STPCPY
281	cmp	$16, %ebx
282	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
283# else
284	cmp	$17, %ebx
285	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
286# endif
287	test	%edx, %edx
288	jnz	L(CopyFrom1To16BytesTail1)
289
290	pcmpeqb	16(%esi), %xmm0
291	movdqu	%xmm1, (%edi)
292	pmovmskb %xmm0, %edx
293# ifdef USE_AS_STPCPY
294	cmp	$32, %ebx
295	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
296# else
297	cmp	$33, %ebx
298	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
299# endif
300	test	%edx, %edx
301	jnz	L(CopyFrom1To32Bytes1)
302
303	jmp	L(Unalign16Both)
304
305/*-----------------End of main part---------------------------*/
306
307/* Case1 */
308	.p2align 4
309L(CopyFrom1To16BytesTail):
310	sub	%ecx, %ebx
311	add	%ecx, %esi
312	bsf	%edx, %edx
313	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
314
315	.p2align 4
316L(CopyFrom1To32Bytes1):
317	add	$16, %esi
318	add	$16, %edi
319	sub	$16, %ebx
320L(CopyFrom1To16BytesTail1):
321	bsf	%edx, %edx
322	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
323
324	.p2align 4
325L(CopyFrom1To32Bytes):
326	sub	%ecx, %ebx
327	bsf	%edx, %edx
328	add	%ecx, %esi
329	add	$16, %edx
330	sub	%ecx, %edx
331	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
332
333	.p2align 4
334L(CopyFrom1To16BytesUnaligned_0):
335	bsf	%edx, %edx
336# ifdef USE_AS_STPCPY
337	lea	(%edi, %edx), %eax
338# endif
339	movdqu	%xmm4, (%edi)
340	add	$63, %ebx
341	sub	%edx, %ebx
342	lea	1(%edi, %edx), %edi
343	jmp	L(StrncpyFillTailWithZero)
344
345	.p2align 4
346L(CopyFrom1To16BytesUnaligned_16):
347	bsf	%ecx, %edx
348	movdqu	%xmm4, (%edi)
349# ifdef USE_AS_STPCPY
350	lea	16(%edi, %edx), %eax
351# endif
352	movdqu	%xmm5, 16(%edi)
353	add	$47, %ebx
354	sub	%edx, %ebx
355	lea	17(%edi, %edx), %edi
356	jmp	L(StrncpyFillTailWithZero)
357
358	.p2align 4
359L(CopyFrom1To16BytesUnaligned_32):
360	bsf	%edx, %edx
361	movdqu	%xmm4, (%edi)
362	movdqu	%xmm5, 16(%edi)
363# ifdef USE_AS_STPCPY
364	lea	32(%edi, %edx), %eax
365# endif
366	movdqu	%xmm6, 32(%edi)
367	add	$31, %ebx
368	sub	%edx, %ebx
369	lea	33(%edi, %edx), %edi
370	jmp	L(StrncpyFillTailWithZero)
371
372	.p2align 4
373L(CopyFrom1To16BytesUnalignedXmm6):
374	movdqu	%xmm6, (%edi, %ecx)
375	jmp	L(CopyFrom1To16BytesXmmExit)
376
377	.p2align 4
378L(CopyFrom1To16BytesUnalignedXmm5):
379	movdqu	%xmm5, (%edi, %ecx)
380	jmp	L(CopyFrom1To16BytesXmmExit)
381
382	.p2align 4
383L(CopyFrom1To16BytesUnalignedXmm4):
384	movdqu	%xmm4, (%edi, %ecx)
385	jmp	L(CopyFrom1To16BytesXmmExit)
386
387	.p2align 4
388L(CopyFrom1To16BytesUnalignedXmm3):
389	movdqu	%xmm3, (%edi, %ecx)
390	jmp	L(CopyFrom1To16BytesXmmExit)
391
392	.p2align 4
393L(CopyFrom1To16BytesUnalignedXmm1):
394	movdqu	%xmm1, (%edi, %ecx)
395	jmp	L(CopyFrom1To16BytesXmmExit)
396
397	.p2align 4
398L(CopyFrom1To16BytesExit):
399	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
400
401/* Case2 */
402
403	.p2align 4
404L(CopyFrom1To16BytesCase2):
405	add	$16, %ebx
406	add	%ecx, %edi
407	add	%ecx, %esi
408	bsf	%edx, %edx
409	cmp	%ebx, %edx
410	jb	L(CopyFrom1To16BytesExit)
411	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
412
413	.p2align 4
414L(CopyFrom1To32BytesCase2):
415	sub	%ecx, %ebx
416	add	%ecx, %esi
417	bsf	%edx, %edx
418	add	$16, %edx
419	sub	%ecx, %edx
420	cmp	%ebx, %edx
421	jb	L(CopyFrom1To16BytesExit)
422	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
423
424L(CopyFrom1To16BytesTailCase2):
425	sub	%ecx, %ebx
426	add	%ecx, %esi
427	bsf	%edx, %edx
428	cmp	%ebx, %edx
429	jb	L(CopyFrom1To16BytesExit)
430	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
431
432L(CopyFrom1To16BytesTail1Case2):
433	bsf	%edx, %edx
434	cmp	%ebx, %edx
435	jb	L(CopyFrom1To16BytesExit)
436	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
437
438/* Case2 or Case3,  Case3 */
439
440	.p2align 4
441L(CopyFrom1To16BytesCase2OrCase3):
442	test	%edx, %edx
443	jnz	L(CopyFrom1To16BytesCase2)
444L(CopyFrom1To16BytesCase3):
445	add	$16, %ebx
446	add	%ecx, %edi
447	add	%ecx, %esi
448	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
449
450	.p2align 4
451L(CopyFrom1To32BytesCase2OrCase3):
452	test	%edx, %edx
453	jnz	L(CopyFrom1To32BytesCase2)
454	sub	%ecx, %ebx
455	add	%ecx, %esi
456	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
457
458	.p2align 4
459L(CopyFrom1To16BytesTailCase2OrCase3):
460	test	%edx, %edx
461	jnz	L(CopyFrom1To16BytesTailCase2)
462	sub	%ecx, %ebx
463	add	%ecx, %esi
464	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
465
466	.p2align 4
467L(CopyFrom1To32Bytes1Case2OrCase3):
468	add	$16, %edi
469	add	$16, %esi
470	sub	$16, %ebx
471L(CopyFrom1To16BytesTail1Case2OrCase3):
472	test	%edx, %edx
473	jnz	L(CopyFrom1To16BytesTail1Case2)
474	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
475
476	.p2align 4
477L(Exit0):
478# ifdef USE_AS_STPCPY
479	mov	%edi, %eax
480# endif
481	RETURN
482
483	.p2align 4
484L(Exit1):
485	movb	%dh, (%edi)
486# ifdef USE_AS_STPCPY
487	lea	(%edi), %eax
488# endif
489	sub	$1, %ebx
490	lea	1(%edi), %edi
491	jnz	L(StrncpyFillTailWithZero)
492	RETURN
493
494	.p2align 4
495L(Exit2):
496	movw	(%esi), %dx
497	movw	%dx, (%edi)
498# ifdef USE_AS_STPCPY
499	lea	1(%edi), %eax
500# endif
501	sub	$2, %ebx
502	lea	2(%edi), %edi
503	jnz	L(StrncpyFillTailWithZero)
504	RETURN
505
506	.p2align 4
507L(Exit3):
508	movw	(%esi), %cx
509	movw	%cx, (%edi)
510	movb	%dh, 2(%edi)
511# ifdef USE_AS_STPCPY
512	lea	2(%edi), %eax
513# endif
514	sub	$3, %ebx
515	lea	3(%edi), %edi
516	jnz	L(StrncpyFillTailWithZero)
517	RETURN
518
519	.p2align 4
520L(Exit4):
521	movl	(%esi), %edx
522	movl	%edx, (%edi)
523# ifdef USE_AS_STPCPY
524	lea	3(%edi), %eax
525# endif
526	sub	$4, %ebx
527	lea	4(%edi), %edi
528	jnz	L(StrncpyFillTailWithZero)
529	RETURN
530
531	.p2align 4
532L(Exit5):
533	movl	(%esi), %ecx
534	movb	%dh, 4(%edi)
535	movl	%ecx, (%edi)
536# ifdef USE_AS_STPCPY
537	lea	4(%edi), %eax
538# endif
539	sub	$5, %ebx
540	lea	5(%edi), %edi
541	jnz	L(StrncpyFillTailWithZero)
542	RETURN
543
544	.p2align 4
545L(Exit6):
546	movl	(%esi), %ecx
547	movw	4(%esi), %dx
548	movl	%ecx, (%edi)
549	movw	%dx, 4(%edi)
550# ifdef USE_AS_STPCPY
551	lea	5(%edi), %eax
552# endif
553	sub	$6, %ebx
554	lea	6(%edi), %edi
555	jnz	L(StrncpyFillTailWithZero)
556	RETURN
557
558	.p2align 4
559L(Exit7):
560	movl	(%esi), %ecx
561	movl	3(%esi), %edx
562	movl	%ecx, (%edi)
563	movl	%edx, 3(%edi)
564# ifdef USE_AS_STPCPY
565	lea	6(%edi), %eax
566# endif
567	sub	$7, %ebx
568	lea	7(%edi), %edi
569	jnz	L(StrncpyFillTailWithZero)
570	RETURN
571
572	.p2align 4
573L(Exit8):
574	movlpd	(%esi), %xmm0
575	movlpd	%xmm0, (%edi)
576# ifdef USE_AS_STPCPY
577	lea	7(%edi), %eax
578# endif
579	sub	$8, %ebx
580	lea	8(%edi), %edi
581	jnz	L(StrncpyFillTailWithZero)
582	RETURN
583
584	.p2align 4
585L(Exit9):
586	movlpd	(%esi), %xmm0
587	movb	%dh, 8(%edi)
588	movlpd	%xmm0, (%edi)
589# ifdef USE_AS_STPCPY
590	lea	8(%edi), %eax
591# endif
592	sub	$9, %ebx
593	lea	9(%edi), %edi
594	jnz	L(StrncpyFillTailWithZero)
595	RETURN
596
597	.p2align 4
598L(Exit10):
599	movlpd	(%esi), %xmm0
600	movw	8(%esi), %dx
601	movlpd	%xmm0, (%edi)
602	movw	%dx, 8(%edi)
603# ifdef USE_AS_STPCPY
604	lea	9(%edi), %eax
605# endif
606	sub	$10, %ebx
607	lea	10(%edi), %edi
608	jnz	L(StrncpyFillTailWithZero)
609	RETURN
610
611	.p2align 4
612L(Exit11):
613	movlpd	(%esi), %xmm0
614	movl	7(%esi), %edx
615	movlpd	%xmm0, (%edi)
616	movl	%edx, 7(%edi)
617# ifdef USE_AS_STPCPY
618	lea	10(%edi), %eax
619# endif
620	sub	$11, %ebx
621	lea	11(%edi), %edi
622	jnz	L(StrncpyFillTailWithZero)
623	RETURN
624
625	.p2align 4
626L(Exit12):
627	movlpd	(%esi), %xmm0
628	movl	8(%esi), %edx
629	movlpd	%xmm0, (%edi)
630	movl	%edx, 8(%edi)
631# ifdef USE_AS_STPCPY
632	lea	11(%edi), %eax
633# endif
634	sub	$12, %ebx
635	lea	12(%edi), %edi
636	jnz	L(StrncpyFillTailWithZero)
637	RETURN
638
639	.p2align 4
640L(Exit13):
641	movlpd	(%esi), %xmm0
642	movlpd	5(%esi), %xmm1
643	movlpd	%xmm0, (%edi)
644	movlpd	%xmm1, 5(%edi)
645# ifdef USE_AS_STPCPY
646	lea	12(%edi), %eax
647# endif
648	sub	$13, %ebx
649	lea	13(%edi), %edi
650	jnz	L(StrncpyFillTailWithZero)
651	RETURN
652
653	.p2align 4
654L(Exit14):
655	movlpd	(%esi), %xmm0
656	movlpd	6(%esi), %xmm1
657	movlpd	%xmm0, (%edi)
658	movlpd	%xmm1, 6(%edi)
659# ifdef USE_AS_STPCPY
660	lea	13(%edi), %eax
661# endif
662	sub	$14, %ebx
663	lea	14(%edi), %edi
664	jnz	L(StrncpyFillTailWithZero)
665	RETURN
666
667	.p2align 4
668L(Exit15):
669	movlpd	(%esi), %xmm0
670	movlpd	7(%esi), %xmm1
671	movlpd	%xmm0, (%edi)
672	movlpd	%xmm1, 7(%edi)
673# ifdef USE_AS_STPCPY
674	lea	14(%edi), %eax
675# endif
676	sub	$15, %ebx
677	lea	15(%edi), %edi
678	jnz	L(StrncpyFillTailWithZero)
679	RETURN
680
681	.p2align 4
682L(Exit16):
683	movdqu	(%esi), %xmm0
684	movdqu	%xmm0, (%edi)
685# ifdef USE_AS_STPCPY
686	lea	15(%edi), %eax
687# endif
688	sub	$16, %ebx
689	lea	16(%edi), %edi
690	jnz	L(StrncpyFillTailWithZero)
691	RETURN
692
693	.p2align 4
694L(Exit17):
695	movdqu	(%esi), %xmm0
696	movdqu	%xmm0, (%edi)
697	movb	%dh, 16(%edi)
698# ifdef USE_AS_STPCPY
699	lea	16(%edi), %eax
700# endif
701	sub	$17, %ebx
702	lea	17(%edi), %edi
703	jnz	L(StrncpyFillTailWithZero)
704	RETURN
705
706	.p2align 4
707L(Exit18):
708	movdqu	(%esi), %xmm0
709	movw	16(%esi), %cx
710	movdqu	%xmm0, (%edi)
711	movw	%cx, 16(%edi)
712# ifdef USE_AS_STPCPY
713	lea	17(%edi), %eax
714# endif
715	sub	$18, %ebx
716	lea	18(%edi), %edi
717	jnz	L(StrncpyFillTailWithZero)
718	RETURN
719
720	.p2align 4
721L(Exit19):
722	movdqu	(%esi), %xmm0
723	movl	15(%esi), %ecx
724	movdqu	%xmm0, (%edi)
725	movl	%ecx, 15(%edi)
726# ifdef USE_AS_STPCPY
727	lea	18(%edi), %eax
728# endif
729	sub	$19, %ebx
730	lea	19(%edi), %edi
731	jnz	L(StrncpyFillTailWithZero)
732	RETURN
733
734	.p2align 4
735L(Exit20):
736	movdqu	(%esi), %xmm0
737	movl	16(%esi), %ecx
738	movdqu	%xmm0, (%edi)
739	movl	%ecx, 16(%edi)
740# ifdef USE_AS_STPCPY
741	lea	19(%edi), %eax
742# endif
743	sub	$20, %ebx
744	lea	20(%edi), %edi
745	jnz	L(StrncpyFillTailWithZero)
746	RETURN
747
748	.p2align 4
749L(Exit21):
750	movdqu	(%esi), %xmm0
751	movl	16(%esi), %ecx
752	movdqu	%xmm0, (%edi)
753	movl	%ecx, 16(%edi)
754	movb	%dh, 20(%edi)
755# ifdef USE_AS_STPCPY
756	lea	20(%edi), %eax
757# endif
758	sub	$21, %ebx
759	lea	21(%edi), %edi
760	jnz	L(StrncpyFillTailWithZero)
761	RETURN
762
763	.p2align 4
764L(Exit22):
765	movdqu	(%esi), %xmm0
766	movlpd	14(%esi), %xmm3
767	movdqu	%xmm0, (%edi)
768	movlpd	%xmm3, 14(%edi)
769# ifdef USE_AS_STPCPY
770	lea	21(%edi), %eax
771# endif
772	sub	$22, %ebx
773	lea	22(%edi), %edi
774	jnz	L(StrncpyFillTailWithZero)
775	RETURN
776
777	.p2align 4
778L(Exit23):
779	movdqu	(%esi), %xmm0
780	movlpd	15(%esi), %xmm3
781	movdqu	%xmm0, (%edi)
782	movlpd	%xmm3, 15(%edi)
783# ifdef USE_AS_STPCPY
784	lea	22(%edi), %eax
785# endif
786	sub	$23, %ebx
787	lea	23(%edi), %edi
788	jnz	L(StrncpyFillTailWithZero)
789	RETURN
790
791	.p2align 4
792L(Exit24):
793	movdqu	(%esi), %xmm0
794	movlpd	16(%esi), %xmm2
795	movdqu	%xmm0, (%edi)
796	movlpd	%xmm2, 16(%edi)
797# ifdef USE_AS_STPCPY
798	lea	23(%edi), %eax
799# endif
800	sub	$24, %ebx
801	lea	24(%edi), %edi
802	jnz	L(StrncpyFillTailWithZero)
803	RETURN
804
805	.p2align 4
806L(Exit25):
807	movdqu	(%esi), %xmm0
808	movlpd	16(%esi), %xmm2
809	movdqu	%xmm0, (%edi)
810	movlpd	%xmm2, 16(%edi)
811	movb	%dh, 24(%edi)
812# ifdef USE_AS_STPCPY
813	lea	24(%edi), %eax
814# endif
815	sub	$25, %ebx
816	lea	25(%edi), %edi
817	jnz	L(StrncpyFillTailWithZero)
818	RETURN
819
820	.p2align 4
821L(Exit26):
822	movdqu	(%esi), %xmm0
823	movlpd	16(%esi), %xmm2
824	movw	24(%esi), %cx
825	movdqu	%xmm0, (%edi)
826	movlpd	%xmm2, 16(%edi)
827	movw	%cx, 24(%edi)
828# ifdef USE_AS_STPCPY
829	lea	25(%edi), %eax
830# endif
831	sub	$26, %ebx
832	lea	26(%edi), %edi
833	jnz	L(StrncpyFillTailWithZero)
834	RETURN
835
836	.p2align 4
837L(Exit27):
838	movdqu	(%esi), %xmm0
839	movlpd	16(%esi), %xmm2
840	movl	23(%esi), %ecx
841	movdqu	%xmm0, (%edi)
842	movlpd	%xmm2, 16(%edi)
843	movl	%ecx, 23(%edi)
844# ifdef USE_AS_STPCPY
845	lea	26(%edi), %eax
846# endif
847	sub	$27, %ebx
848	lea	27(%edi), %edi
849	jnz	L(StrncpyFillTailWithZero)
850	RETURN
851
852	.p2align 4
853L(Exit28):
854	movdqu	(%esi), %xmm0
855	movlpd	16(%esi), %xmm2
856	movl	24(%esi), %ecx
857	movdqu	%xmm0, (%edi)
858	movlpd	%xmm2, 16(%edi)
859	movl	%ecx, 24(%edi)
860# ifdef USE_AS_STPCPY
861	lea	27(%edi), %eax
862# endif
863	sub	$28, %ebx
864	lea	28(%edi), %edi
865	jnz	L(StrncpyFillTailWithZero)
866	RETURN
867
868	.p2align 4
869L(Exit29):
870	movdqu	(%esi), %xmm0
871	movdqu	13(%esi), %xmm2
872	movdqu	%xmm0, (%edi)
873	movdqu	%xmm2, 13(%edi)
874# ifdef USE_AS_STPCPY
875	lea	28(%edi), %eax
876# endif
877	sub	$29, %ebx
878	lea	29(%edi), %edi
879	jnz	L(StrncpyFillTailWithZero)
880	RETURN
881
882	.p2align 4
883L(Exit30):
884	movdqu	(%esi), %xmm0
885	movdqu	14(%esi), %xmm2
886	movdqu	%xmm0, (%edi)
887	movdqu	%xmm2, 14(%edi)
888# ifdef USE_AS_STPCPY
889	lea	29(%edi), %eax
890# endif
891	sub	$30, %ebx
892	lea	30(%edi), %edi
893	jnz	L(StrncpyFillTailWithZero)
894	RETURN
895
896
897	.p2align 4
898L(Exit31):
899	movdqu	(%esi), %xmm0
900	movdqu	15(%esi), %xmm2
901	movdqu	%xmm0, (%edi)
902	movdqu	%xmm2, 15(%edi)
903# ifdef USE_AS_STPCPY
904	lea	30(%edi), %eax
905# endif
906	sub	$31, %ebx
907	lea	31(%edi), %edi
908	jnz	L(StrncpyFillTailWithZero)
909	RETURN
910
911	.p2align 4
912L(Exit32):
913	movdqu	(%esi), %xmm0
914	movdqu	16(%esi), %xmm2
915	movdqu	%xmm0, (%edi)
916	movdqu	%xmm2, 16(%edi)
917# ifdef USE_AS_STPCPY
918	lea	31(%edi), %eax
919# endif
920	sub	$32, %ebx
921	lea	32(%edi), %edi
922	jnz	L(StrncpyFillTailWithZero)
923	RETURN
924
925	.p2align 4
926L(StrncpyExit1):
927	movb	(%esi), %dl
928	movb	%dl, (%edi)
929# ifdef USE_AS_STPCPY
930	lea	1(%edi), %eax
931# endif
932	RETURN
933
934	.p2align 4
935L(StrncpyExit2):
936	movw	(%esi), %dx
937	movw	%dx, (%edi)
938# ifdef USE_AS_STPCPY
939	lea	2(%edi), %eax
940# endif
941	RETURN
942
943	.p2align 4
944L(StrncpyExit3):
945	movw	(%esi), %cx
946	movb	2(%esi), %dl
947	movw	%cx, (%edi)
948	movb	%dl, 2(%edi)
949# ifdef USE_AS_STPCPY
950	lea	3(%edi), %eax
951# endif
952	RETURN
953
954	.p2align 4
955L(StrncpyExit4):
956	movl	(%esi), %edx
957	movl	%edx, (%edi)
958# ifdef USE_AS_STPCPY
959	lea	4(%edi), %eax
960# endif
961	RETURN
962
963	.p2align 4
964L(StrncpyExit5):
965	movl	(%esi), %ecx
966	movb	4(%esi), %dl
967	movl	%ecx, (%edi)
968	movb	%dl, 4(%edi)
969# ifdef USE_AS_STPCPY
970	lea	5(%edi), %eax
971# endif
972	RETURN
973
974	.p2align 4
975L(StrncpyExit6):
976	movl	(%esi), %ecx
977	movw	4(%esi), %dx
978	movl	%ecx, (%edi)
979	movw	%dx, 4(%edi)
980# ifdef USE_AS_STPCPY
981	lea	6(%edi), %eax
982# endif
983	RETURN
984
985	.p2align 4
986L(StrncpyExit7):
987	movl	(%esi), %ecx
988	movl	3(%esi), %edx
989	movl	%ecx, (%edi)
990	movl	%edx, 3(%edi)
991# ifdef USE_AS_STPCPY
992	lea	7(%edi), %eax
993# endif
994	RETURN
995
996	.p2align 4
997L(StrncpyExit8):
998	movlpd	(%esi), %xmm0
999	movlpd	%xmm0, (%edi)
1000# ifdef USE_AS_STPCPY
1001	lea	8(%edi), %eax
1002# endif
1003	RETURN
1004
1005	.p2align 4
1006L(StrncpyExit9):
1007	movlpd	(%esi), %xmm0
1008	movb	8(%esi), %dl
1009	movlpd	%xmm0, (%edi)
1010	movb	%dl, 8(%edi)
1011# ifdef USE_AS_STPCPY
1012	lea	9(%edi), %eax
1013# endif
1014	RETURN
1015
1016	.p2align 4
1017L(StrncpyExit10):
1018	movlpd	(%esi), %xmm0
1019	movw	8(%esi), %dx
1020	movlpd	%xmm0, (%edi)
1021	movw	%dx, 8(%edi)
1022# ifdef USE_AS_STPCPY
1023	lea	10(%edi), %eax
1024# endif
1025	RETURN
1026
1027	.p2align 4
1028L(StrncpyExit11):
1029	movlpd	(%esi), %xmm0
1030	movl	7(%esi), %edx
1031	movlpd	%xmm0, (%edi)
1032	movl	%edx, 7(%edi)
1033# ifdef USE_AS_STPCPY
1034	lea	11(%edi), %eax
1035# endif
1036	RETURN
1037
1038	.p2align 4
1039L(StrncpyExit12):
1040	movlpd	(%esi), %xmm0
1041	movl	8(%esi), %edx
1042	movlpd	%xmm0, (%edi)
1043	movl	%edx, 8(%edi)
1044# ifdef USE_AS_STPCPY
1045	lea	12(%edi), %eax
1046# endif
1047	RETURN
1048
1049	.p2align 4
1050L(StrncpyExit13):
1051	movlpd	(%esi), %xmm0
1052	movlpd	5(%esi), %xmm1
1053	movlpd	%xmm0, (%edi)
1054	movlpd	%xmm1, 5(%edi)
1055# ifdef USE_AS_STPCPY
1056	lea	13(%edi), %eax
1057# endif
1058	RETURN
1059
1060	.p2align 4
1061L(StrncpyExit14):
1062	movlpd	(%esi), %xmm0
1063	movlpd	6(%esi), %xmm1
1064	movlpd	%xmm0, (%edi)
1065	movlpd	%xmm1, 6(%edi)
1066# ifdef USE_AS_STPCPY
1067	lea	14(%edi), %eax
1068# endif
1069	RETURN
1070
1071	.p2align 4
1072L(StrncpyExit15):
1073	movlpd	(%esi), %xmm0
1074	movlpd	7(%esi), %xmm1
1075	movlpd	%xmm0, (%edi)
1076	movlpd	%xmm1, 7(%edi)
1077# ifdef USE_AS_STPCPY
1078	lea	15(%edi), %eax
1079# endif
1080	RETURN
1081
1082	.p2align 4
1083L(StrncpyExit16):
1084	movdqu	(%esi), %xmm0
1085	movdqu	%xmm0, (%edi)
1086# ifdef USE_AS_STPCPY
1087	lea	16(%edi), %eax
1088# endif
1089	RETURN
1090
1091	.p2align 4
1092L(StrncpyExit17):
1093	movdqu	(%esi), %xmm0
1094	movb	16(%esi), %cl
1095	movdqu	%xmm0, (%edi)
1096	movb	%cl, 16(%edi)
1097# ifdef USE_AS_STPCPY
1098	lea	17(%edi), %eax
1099# endif
1100	RETURN
1101
1102	.p2align 4
1103L(StrncpyExit18):
1104	movdqu	(%esi), %xmm0
1105	movw	16(%esi), %cx
1106	movdqu	%xmm0, (%edi)
1107	movw	%cx, 16(%edi)
1108# ifdef USE_AS_STPCPY
1109	lea	18(%edi), %eax
1110# endif
1111	RETURN
1112
1113	.p2align 4
1114L(StrncpyExit19):
1115	movdqu	(%esi), %xmm0
1116	movl	15(%esi), %ecx
1117	movdqu	%xmm0, (%edi)
1118	movl	%ecx, 15(%edi)
1119# ifdef USE_AS_STPCPY
1120	lea	19(%edi), %eax
1121# endif
1122	RETURN
1123
1124	.p2align 4
1125L(StrncpyExit20):
1126	movdqu	(%esi), %xmm0
1127	movl	16(%esi), %ecx
1128	movdqu	%xmm0, (%edi)
1129	movl	%ecx, 16(%edi)
1130# ifdef USE_AS_STPCPY
1131	lea	20(%edi), %eax
1132# endif
1133	RETURN
1134
1135	.p2align 4
1136L(StrncpyExit21):
1137	movdqu	(%esi), %xmm0
1138	movl	16(%esi), %ecx
1139	movb	20(%esi), %dl
1140	movdqu	%xmm0, (%edi)
1141	movl	%ecx, 16(%edi)
1142	movb	%dl, 20(%edi)
1143# ifdef USE_AS_STPCPY
1144	lea	21(%edi), %eax
1145# endif
1146	RETURN
1147
1148	.p2align 4
1149L(StrncpyExit22):
1150	movdqu	(%esi), %xmm0
1151	movlpd	14(%esi), %xmm3
1152	movdqu	%xmm0, (%edi)
1153	movlpd	%xmm3, 14(%edi)
1154# ifdef USE_AS_STPCPY
1155	lea	22(%edi), %eax
1156# endif
1157	RETURN
1158
1159	.p2align 4
1160L(StrncpyExit23):
1161	movdqu	(%esi), %xmm0
1162	movlpd	15(%esi), %xmm3
1163	movdqu	%xmm0, (%edi)
1164	movlpd	%xmm3, 15(%edi)
1165# ifdef USE_AS_STPCPY
1166	lea	23(%edi), %eax
1167# endif
1168	RETURN
1169
1170	.p2align 4
1171L(StrncpyExit24):
1172	movdqu	(%esi), %xmm0
1173	movlpd	16(%esi), %xmm2
1174	movdqu	%xmm0, (%edi)
1175	movlpd	%xmm2, 16(%edi)
1176# ifdef USE_AS_STPCPY
1177	lea	24(%edi), %eax
1178# endif
1179	RETURN
1180
1181	.p2align 4
1182L(StrncpyExit25):
1183	movdqu	(%esi), %xmm0
1184	movlpd	16(%esi), %xmm2
1185	movb	24(%esi), %cl
1186	movdqu	%xmm0, (%edi)
1187	movlpd	%xmm2, 16(%edi)
1188	movb	%cl, 24(%edi)
1189# ifdef USE_AS_STPCPY
1190	lea	25(%edi), %eax
1191# endif
1192	RETURN
1193
1194	.p2align 4
1195L(StrncpyExit26):
1196	movdqu	(%esi), %xmm0
1197	movlpd	16(%esi), %xmm2
1198	movw	24(%esi), %cx
1199	movdqu	%xmm0, (%edi)
1200	movlpd	%xmm2, 16(%edi)
1201	movw	%cx, 24(%edi)
1202# ifdef USE_AS_STPCPY
1203	lea	26(%edi), %eax
1204# endif
1205	RETURN
1206
1207	.p2align 4
1208L(StrncpyExit27):
1209	movdqu	(%esi), %xmm0
1210	movlpd	16(%esi), %xmm2
1211	movl	23(%esi), %ecx
1212	movdqu	%xmm0, (%edi)
1213	movlpd	%xmm2, 16(%edi)
1214	movl	%ecx, 23(%edi)
1215# ifdef USE_AS_STPCPY
1216	lea	27(%edi), %eax
1217# endif
1218	RETURN
1219
1220	.p2align 4
1221L(StrncpyExit28):
1222	movdqu	(%esi), %xmm0
1223	movlpd	16(%esi), %xmm2
1224	movl	24(%esi), %ecx
1225	movdqu	%xmm0, (%edi)
1226	movlpd	%xmm2, 16(%edi)
1227	movl	%ecx, 24(%edi)
1228# ifdef USE_AS_STPCPY
1229	lea	28(%edi), %eax
1230# endif
1231	RETURN
1232
1233	.p2align 4
1234L(StrncpyExit29):
1235	movdqu	(%esi), %xmm0
1236	movdqu	13(%esi), %xmm2
1237	movdqu	%xmm0, (%edi)
1238	movdqu	%xmm2, 13(%edi)
1239# ifdef USE_AS_STPCPY
1240	lea	29(%edi), %eax
1241# endif
1242	RETURN
1243
1244	.p2align 4
1245L(StrncpyExit30):
1246	movdqu	(%esi), %xmm0
1247	movdqu	14(%esi), %xmm2
1248	movdqu	%xmm0, (%edi)
1249	movdqu	%xmm2, 14(%edi)
1250# ifdef USE_AS_STPCPY
1251	lea	30(%edi), %eax
1252# endif
1253	RETURN
1254
1255	.p2align 4
1256L(StrncpyExit31):
1257	movdqu	(%esi), %xmm0
1258	movdqu	15(%esi), %xmm2
1259	movdqu	%xmm0, (%edi)
1260	movdqu	%xmm2, 15(%edi)
1261# ifdef USE_AS_STPCPY
1262	lea	31(%edi), %eax
1263# endif
1264	RETURN
1265
1266	.p2align 4
1267L(StrncpyExit32):
1268	movdqu	(%esi), %xmm0
1269	movdqu	16(%esi), %xmm2
1270	movdqu	%xmm0, (%edi)
1271	movdqu	%xmm2, 16(%edi)
1272# ifdef USE_AS_STPCPY
1273	lea	32(%edi), %eax
1274# endif
1275	RETURN
1276
1277	.p2align 4
1278L(StrncpyExit33):
1279	movdqu	(%esi), %xmm0
1280	movdqu	16(%esi), %xmm2
1281	movb	32(%esi), %cl
1282	movdqu	%xmm0, (%edi)
1283	movdqu	%xmm2, 16(%edi)
1284	movb	%cl, 32(%edi)
1285	RETURN
1286
1287	.p2align 4
1288L(Fill0):
1289	RETURN
1290
1291	.p2align 4
1292L(Fill1):
1293	movb	%dl, (%edi)
1294	RETURN
1295
1296	.p2align 4
1297L(Fill2):
1298	movw	%dx, (%edi)
1299	RETURN
1300
1301	.p2align 4
1302L(Fill3):
1303	movl	%edx, -1(%edi)
1304	RETURN
1305
1306	.p2align 4
1307L(Fill4):
1308	movl	%edx, (%edi)
1309	RETURN
1310
1311	.p2align 4
1312L(Fill5):
1313	movl	%edx, (%edi)
1314	movb	%dl, 4(%edi)
1315	RETURN
1316
1317	.p2align 4
1318L(Fill6):
1319	movl	%edx, (%edi)
1320	movw	%dx, 4(%edi)
1321	RETURN
1322
1323	.p2align 4
1324L(Fill7):
1325	movlpd	%xmm0, -1(%edi)
1326	RETURN
1327
1328	.p2align 4
1329L(Fill8):
1330	movlpd	%xmm0, (%edi)
1331	RETURN
1332
1333	.p2align 4
1334L(Fill9):
1335	movlpd	%xmm0, (%edi)
1336	movb	%dl, 8(%edi)
1337	RETURN
1338
1339	.p2align 4
1340L(Fill10):
1341	movlpd	%xmm0, (%edi)
1342	movw	%dx, 8(%edi)
1343	RETURN
1344
1345	.p2align 4
1346L(Fill11):
1347	movlpd	%xmm0, (%edi)
1348	movl	%edx, 7(%edi)
1349	RETURN
1350
1351	.p2align 4
1352L(Fill12):
1353	movlpd	%xmm0, (%edi)
1354	movl	%edx, 8(%edi)
1355	RETURN
1356
1357	.p2align 4
1358L(Fill13):
1359	movlpd	%xmm0, (%edi)
1360	movlpd	%xmm0, 5(%edi)
1361	RETURN
1362
1363	.p2align 4
1364L(Fill14):
1365	movlpd	%xmm0, (%edi)
1366	movlpd	%xmm0, 6(%edi)
1367	RETURN
1368
1369	.p2align 4
1370L(Fill15):
1371	movdqu	%xmm0, -1(%edi)
1372	RETURN
1373
1374	.p2align 4
1375L(Fill16):
1376	movdqu	%xmm0, (%edi)
1377	RETURN
1378
1379	.p2align 4
1380L(CopyFrom1To16BytesUnalignedXmm2):
1381	movdqu	%xmm2, (%edi, %ecx)
1382
1383	.p2align 4
1384L(CopyFrom1To16BytesXmmExit):
1385	bsf	%edx, %edx
1386	add	$15, %ebx
1387	add	%ecx, %edi
1388# ifdef USE_AS_STPCPY
1389	lea	(%edi, %edx), %eax
1390# endif
1391	sub	%edx, %ebx
1392	lea	1(%edi, %edx), %edi
1393
1394	.p2align 4
1395L(StrncpyFillTailWithZero):
1396	pxor	%xmm0, %xmm0
1397	xor	%edx, %edx
1398	sub	$16, %ebx
1399	jbe	L(StrncpyFillExit)
1400
1401	movdqu	%xmm0, (%edi)
1402	add	$16, %edi
1403
1404	mov	%edi, %esi
1405	and	$0xf, %esi
1406	sub	%esi, %edi
1407	add	%esi, %ebx
1408	sub	$64, %ebx
1409	jb	L(StrncpyFillLess64)
1410
1411L(StrncpyFillLoopMovdqa):
1412	movdqa	%xmm0, (%edi)
1413	movdqa	%xmm0, 16(%edi)
1414	movdqa	%xmm0, 32(%edi)
1415	movdqa	%xmm0, 48(%edi)
1416	add	$64, %edi
1417	sub	$64, %ebx
1418	jae	L(StrncpyFillLoopMovdqa)
1419
1420L(StrncpyFillLess64):
1421	add	$32, %ebx
1422	jl	L(StrncpyFillLess32)
1423	movdqa	%xmm0, (%edi)
1424	movdqa	%xmm0, 16(%edi)
1425	add	$32, %edi
1426	sub	$16, %ebx
1427	jl	L(StrncpyFillExit)
1428	movdqa	%xmm0, (%edi)
1429	add	$16, %edi
1430	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1431
1432L(StrncpyFillLess32):
1433	add	$16, %ebx
1434	jl	L(StrncpyFillExit)
1435	movdqa	%xmm0, (%edi)
1436	add	$16, %edi
1437	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1438
1439L(StrncpyFillExit):
1440	add	$16, %ebx
1441	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1442
1443	.p2align 4
1444L(UnalignedLeaveCase2OrCase3):
1445	test	%edx, %edx
1446	jnz	L(Unaligned64LeaveCase2)
1447L(Unaligned64LeaveCase3):
1448	lea	64(%ebx), %ecx
1449	and	$-16, %ecx
1450	add	$48, %ebx
1451	jl	L(CopyFrom1To16BytesCase3)
1452	movdqu	%xmm4, (%edi)
1453	sub	$16, %ebx
1454	jb	L(CopyFrom1To16BytesCase3)
1455	movdqu	%xmm5, 16(%edi)
1456	sub	$16, %ebx
1457	jb	L(CopyFrom1To16BytesCase3)
1458	movdqu	%xmm6, 32(%edi)
1459	sub	$16, %ebx
1460	jb	L(CopyFrom1To16BytesCase3)
1461	movdqu	%xmm7, 48(%edi)
1462# ifdef USE_AS_STPCPY
1463	lea	64(%edi), %eax
1464# endif
1465	RETURN
1466
1467	.p2align 4
1468L(Unaligned64LeaveCase2):
1469	xor	%ecx, %ecx
1470	pcmpeqb	%xmm4, %xmm0
1471	pmovmskb %xmm0, %edx
1472	add	$48, %ebx
1473	jle	L(CopyFrom1To16BytesCase2OrCase3)
1474	test	%edx, %edx
1475	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
1476
1477	pcmpeqb	%xmm5, %xmm0
1478	pmovmskb %xmm0, %edx
1479	movdqu	%xmm4, (%edi)
1480	add	$16, %ecx
1481	sub	$16, %ebx
1482	jbe	L(CopyFrom1To16BytesCase2OrCase3)
1483	test	%edx, %edx
1484	jnz	L(CopyFrom1To16BytesUnalignedXmm5)
1485
1486	pcmpeqb	%xmm6, %xmm0
1487	pmovmskb %xmm0, %edx
1488	movdqu	%xmm5, 16(%edi)
1489	add	$16, %ecx
1490	sub	$16, %ebx
1491	jbe	L(CopyFrom1To16BytesCase2OrCase3)
1492	test	%edx, %edx
1493	jnz	L(CopyFrom1To16BytesUnalignedXmm6)
1494
1495	pcmpeqb	%xmm7, %xmm0
1496	pmovmskb %xmm0, %edx
1497	movdqu	%xmm6, 32(%edi)
1498	lea	16(%edi, %ecx), %edi
1499	lea	16(%esi, %ecx), %esi
1500	bsf	%edx, %edx
1501	cmp	%ebx, %edx
1502	jb	L(CopyFrom1To16BytesExit)
1503	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
1504
1505	.p2align 4
1506L(ExitZero):
1507	movl	%edi, %eax
1508	RETURN
1509
1510END (STRCPY)
1511
1512	.p2align 4
1513	.section .rodata
1514L(ExitTable):
1515	.int	JMPTBL(L(Exit1), L(ExitTable))
1516	.int	JMPTBL(L(Exit2), L(ExitTable))
1517	.int	JMPTBL(L(Exit3), L(ExitTable))
1518	.int	JMPTBL(L(Exit4), L(ExitTable))
1519	.int	JMPTBL(L(Exit5), L(ExitTable))
1520	.int	JMPTBL(L(Exit6), L(ExitTable))
1521	.int	JMPTBL(L(Exit7), L(ExitTable))
1522	.int	JMPTBL(L(Exit8), L(ExitTable))
1523	.int	JMPTBL(L(Exit9), L(ExitTable))
1524	.int	JMPTBL(L(Exit10), L(ExitTable))
1525	.int	JMPTBL(L(Exit11), L(ExitTable))
1526	.int	JMPTBL(L(Exit12), L(ExitTable))
1527	.int	JMPTBL(L(Exit13), L(ExitTable))
1528	.int	JMPTBL(L(Exit14), L(ExitTable))
1529	.int	JMPTBL(L(Exit15), L(ExitTable))
1530	.int	JMPTBL(L(Exit16), L(ExitTable))
1531	.int	JMPTBL(L(Exit17), L(ExitTable))
1532	.int	JMPTBL(L(Exit18), L(ExitTable))
1533	.int	JMPTBL(L(Exit19), L(ExitTable))
1534	.int	JMPTBL(L(Exit20), L(ExitTable))
1535	.int	JMPTBL(L(Exit21), L(ExitTable))
1536	.int	JMPTBL(L(Exit22), L(ExitTable))
1537	.int    JMPTBL(L(Exit23), L(ExitTable))
1538	.int	JMPTBL(L(Exit24), L(ExitTable))
1539	.int	JMPTBL(L(Exit25), L(ExitTable))
1540	.int	JMPTBL(L(Exit26), L(ExitTable))
1541	.int	JMPTBL(L(Exit27), L(ExitTable))
1542	.int	JMPTBL(L(Exit28), L(ExitTable))
1543	.int	JMPTBL(L(Exit29), L(ExitTable))
1544	.int	JMPTBL(L(Exit30), L(ExitTable))
1545	.int	JMPTBL(L(Exit31), L(ExitTable))
1546	.int	JMPTBL(L(Exit32), L(ExitTable))
1547
1548L(ExitStrncpyTable):
1549	.int	JMPTBL(L(Exit0), L(ExitStrncpyTable))
1550	.int	JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1551	.int	JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1552	.int	JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1553	.int	JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1554	.int	JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1555	.int	JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1556	.int	JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1557	.int	JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1558	.int	JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1559	.int	JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1560	.int	JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1561	.int	JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1562	.int	JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1563	.int	JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1564	.int	JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1565	.int	JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1566	.int	JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1567	.int	JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1568	.int	JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1569	.int	JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1570	.int	JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1571	.int	JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1572	.int    JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1573	.int	JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1574	.int	JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1575	.int	JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1576	.int	JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1577	.int	JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1578	.int	JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1579	.int	JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1580	.int	JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1581	.int	JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1582	.int	JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1583
1584	.p2align 4
1585L(FillTable):
1586	.int	JMPTBL(L(Fill0), L(FillTable))
1587	.int	JMPTBL(L(Fill1), L(FillTable))
1588	.int	JMPTBL(L(Fill2), L(FillTable))
1589	.int	JMPTBL(L(Fill3), L(FillTable))
1590	.int	JMPTBL(L(Fill4), L(FillTable))
1591	.int	JMPTBL(L(Fill5), L(FillTable))
1592	.int	JMPTBL(L(Fill6), L(FillTable))
1593	.int	JMPTBL(L(Fill7), L(FillTable))
1594	.int	JMPTBL(L(Fill8), L(FillTable))
1595	.int	JMPTBL(L(Fill9), L(FillTable))
1596	.int	JMPTBL(L(Fill10), L(FillTable))
1597	.int	JMPTBL(L(Fill11), L(FillTable))
1598	.int	JMPTBL(L(Fill12), L(FillTable))
1599	.int	JMPTBL(L(Fill13), L(FillTable))
1600	.int	JMPTBL(L(Fill14), L(FillTable))
1601	.int	JMPTBL(L(Fill15), L(FillTable))
1602	.int	JMPTBL(L(Fill16), L(FillTable))
1603# else
1604#  define PARMS  4
1605#  define ENTRANCE
1606#  define RETURN  POP (%edi); ret; CFI_PUSH (%edi)
1607#  define RETURN1  ret
1608
1609	.text
1610ENTRY (STRCPY)
1611	ENTRANCE
1612	mov	STR1(%esp), %edx
1613	mov	STR2(%esp), %ecx
1614
1615	cmpb	$0, (%ecx)
1616	jz	L(ExitTail1)
1617	cmpb	$0, 1(%ecx)
1618	jz	L(ExitTail2)
1619	cmpb	$0, 2(%ecx)
1620	jz	L(ExitTail3)
1621	cmpb	$0, 3(%ecx)
1622	jz	L(ExitTail4)
1623	cmpb	$0, 4(%ecx)
1624	jz	L(ExitTail5)
1625	cmpb	$0, 5(%ecx)
1626	jz	L(ExitTail6)
1627	cmpb	$0, 6(%ecx)
1628	jz	L(ExitTail7)
1629	cmpb	$0, 7(%ecx)
1630	jz	L(ExitTail8)
1631	cmpb	$0, 8(%ecx)
1632	jz	L(ExitTail9)
1633	cmpb	$0, 9(%ecx)
1634	jz	L(ExitTail10)
1635	cmpb	$0, 10(%ecx)
1636	jz	L(ExitTail11)
1637	cmpb	$0, 11(%ecx)
1638	jz	L(ExitTail12)
1639	cmpb	$0, 12(%ecx)
1640	jz	L(ExitTail13)
1641	cmpb	$0, 13(%ecx)
1642	jz	L(ExitTail14)
1643	cmpb	$0, 14(%ecx)
1644	jz	L(ExitTail15)
1645	cmpb	$0, 15(%ecx)
1646	jz	L(ExitTail16)
1647
1648	PUSH	(%edi)
1649	PUSH	(%ebx)
1650
1651	mov	%edx, %edi
1652	lea	16(%ecx), %ebx
1653	and	$-16, %ebx
1654	pxor	%xmm0, %xmm0
1655	movdqu	(%ecx), %xmm1
1656	movdqu	%xmm1, (%edx)
1657	pcmpeqb	(%ebx), %xmm0
1658	pmovmskb %xmm0, %eax
1659	sub	%ecx, %ebx
1660	test	%eax, %eax
1661	jnz	L(CopyFrom1To16Bytes)
1662
1663	mov	%ecx, %eax
1664	lea	16(%ecx), %ecx
1665	and	$-16, %ecx
1666	sub	%ecx, %eax
1667	sub	%eax, %edx
1668	xor	%ebx, %ebx
1669
1670	.p2align 4
1671	movdqa	(%ecx), %xmm1
1672	movaps	16(%ecx), %xmm2
1673	movdqu	%xmm1, (%edx)
1674	pcmpeqb	%xmm2, %xmm0
1675	pmovmskb %xmm0, %eax
1676	add	$16, %ebx
1677	test	%eax, %eax
1678	jnz	L(CopyFrom1To16Bytes)
1679
1680	movaps	16(%ecx, %ebx), %xmm3
1681	movdqu	%xmm2, (%edx, %ebx)
1682	pcmpeqb	%xmm3, %xmm0
1683	pmovmskb %xmm0, %eax
1684	add	$16, %ebx
1685	test	%eax, %eax
1686	jnz	L(CopyFrom1To16Bytes)
1687
1688	movaps	16(%ecx, %ebx), %xmm4
1689	movdqu	%xmm3, (%edx, %ebx)
1690	pcmpeqb	%xmm4, %xmm0
1691	pmovmskb %xmm0, %eax
1692	add	$16, %ebx
1693	test	%eax, %eax
1694	jnz	L(CopyFrom1To16Bytes)
1695
1696	movaps	16(%ecx, %ebx), %xmm1
1697	movdqu	%xmm4, (%edx, %ebx)
1698	pcmpeqb	%xmm1, %xmm0
1699	pmovmskb %xmm0, %eax
1700	add	$16, %ebx
1701	test	%eax, %eax
1702	jnz	L(CopyFrom1To16Bytes)
1703
1704	movaps	16(%ecx, %ebx), %xmm2
1705	movdqu	%xmm1, (%edx, %ebx)
1706	pcmpeqb	%xmm2, %xmm0
1707	pmovmskb %xmm0, %eax
1708	add	$16, %ebx
1709	test	%eax, %eax
1710	jnz	L(CopyFrom1To16Bytes)
1711
1712	movaps	16(%ecx, %ebx), %xmm3
1713	movdqu	%xmm2, (%edx, %ebx)
1714	pcmpeqb	%xmm3, %xmm0
1715	pmovmskb %xmm0, %eax
1716	add	$16, %ebx
1717	test	%eax, %eax
1718	jnz	L(CopyFrom1To16Bytes)
1719
1720	movdqu	%xmm3, (%edx, %ebx)
1721	mov	%ecx, %eax
1722	lea	16(%ecx, %ebx), %ecx
1723	and	$-0x40, %ecx
1724	sub	%ecx, %eax
1725	sub	%eax, %edx
1726
1727L(Aligned64Loop):
1728	movaps	(%ecx), %xmm2
1729	movaps	%xmm2, %xmm4
1730	movaps	16(%ecx), %xmm5
1731	movaps	32(%ecx), %xmm3
1732	movaps	%xmm3, %xmm6
1733	movaps	48(%ecx), %xmm7
1734	pminub	%xmm5, %xmm2
1735	add	$64, %ecx
1736	pminub	%xmm7, %xmm3
1737	add	$64, %edx
1738	pminub	%xmm2, %xmm3
1739	pcmpeqb	%xmm0, %xmm3
1740	pmovmskb %xmm3, %eax
1741	test	%eax, %eax
1742	jnz	L(Aligned64Leave)
1743L(Aligned64Loop_start):
1744	movdqu	%xmm4, -64(%edx)
1745	movaps	(%ecx), %xmm2
1746	movdqa	%xmm2, %xmm4
1747	movdqu	%xmm5, -48(%edx)
1748	movaps	16(%ecx), %xmm5
1749	pminub	%xmm5, %xmm2
1750	movaps	32(%ecx), %xmm3
1751	movdqu	%xmm6, -32(%edx)
1752	movaps	%xmm3, %xmm6
1753	movdqu	%xmm7, -16(%edx)
1754	movaps	48(%ecx), %xmm7
1755	pminub	%xmm7, %xmm3
1756	pminub	%xmm2, %xmm3
1757	pcmpeqb	%xmm3, %xmm0
1758	pmovmskb %xmm0, %eax
1759	add	$64, %edx
1760	add	$64, %ecx
1761	test	%eax, %eax
1762	jz	L(Aligned64Loop_start)
1763L(Aligned64Leave):
1764	sub	$0xa0, %ebx
1765	pxor	%xmm0, %xmm0
1766	pcmpeqb	%xmm4, %xmm0
1767	pmovmskb %xmm0, %eax
1768	test	%eax, %eax
1769	jnz	L(CopyFrom1To16Bytes)
1770
1771	pcmpeqb	%xmm5, %xmm0
1772	pmovmskb %xmm0, %eax
1773	movdqu	%xmm4, -64(%edx)
1774	test	%eax, %eax
1775	lea	16(%ebx), %ebx
1776	jnz	L(CopyFrom1To16Bytes)
1777
1778	pcmpeqb	%xmm6, %xmm0
1779	pmovmskb %xmm0, %eax
1780	movdqu	%xmm5, -48(%edx)
1781	test	%eax, %eax
1782	lea	16(%ebx), %ebx
1783	jnz	L(CopyFrom1To16Bytes)
1784
1785	movdqu	%xmm6, -32(%edx)
1786	pcmpeqb	%xmm7, %xmm0
1787	pmovmskb %xmm0, %eax
1788	lea	16(%ebx), %ebx
1789
1790/*-----------------End of main part---------------------------*/
1791
1792	.p2align 4
1793L(CopyFrom1To16Bytes):
1794	add	%ebx, %edx
1795	add	%ebx, %ecx
1796
1797	POP	(%ebx)
1798	test	%al, %al
1799	jz	L(ExitHigh)
1800	test	$0x01, %al
1801	jnz	L(Exit1)
1802	test	$0x02, %al
1803	jnz	L(Exit2)
1804	test	$0x04, %al
1805	jnz	L(Exit3)
1806	test	$0x08, %al
1807	jnz	L(Exit4)
1808	test	$0x10, %al
1809	jnz	L(Exit5)
1810	test	$0x20, %al
1811	jnz	L(Exit6)
1812	test	$0x40, %al
1813	jnz	L(Exit7)
1814	/* Exit 8 */
1815	movl	(%ecx), %eax
1816	movl	%eax, (%edx)
1817	movl	4(%ecx), %eax
1818	movl	%eax, 4(%edx)
1819# ifdef USE_AS_STPCPY
1820	lea	7(%edx), %eax
1821# else
1822	movl	%edi, %eax
1823# endif
1824	RETURN
1825
1826	.p2align 4
1827L(ExitHigh):
1828	test	$0x01, %ah
1829	jnz	L(Exit9)
1830	test	$0x02, %ah
1831	jnz	L(Exit10)
1832	test	$0x04, %ah
1833	jnz	L(Exit11)
1834	test	$0x08, %ah
1835	jnz	L(Exit12)
1836	test	$0x10, %ah
1837	jnz	L(Exit13)
1838	test	$0x20, %ah
1839	jnz	L(Exit14)
1840	test	$0x40, %ah
1841	jnz	L(Exit15)
1842	/* Exit 16 */
1843	movlpd	(%ecx), %xmm0
1844	movlpd	%xmm0, (%edx)
1845	movlpd	8(%ecx), %xmm0
1846	movlpd	%xmm0, 8(%edx)
1847# ifdef USE_AS_STPCPY
1848	lea	15(%edx), %eax
1849# else
1850	movl	%edi, %eax
1851# endif
1852	RETURN
1853
1854	.p2align 4
1855L(Exit1):
1856	movb	(%ecx), %al
1857	movb	%al, (%edx)
1858# ifdef USE_AS_STPCPY
1859	lea	(%edx), %eax
1860# else
1861	movl	%edi, %eax
1862# endif
1863	RETURN
1864
1865	.p2align 4
1866L(Exit2):
1867	movw	(%ecx), %ax
1868	movw	%ax, (%edx)
1869# ifdef USE_AS_STPCPY
1870	lea	1(%edx), %eax
1871# else
1872	movl	%edi, %eax
1873# endif
1874	RETURN
1875
1876	.p2align 4
1877L(Exit3):
1878	movw	(%ecx), %ax
1879	movw	%ax, (%edx)
1880	movb	2(%ecx), %al
1881	movb	%al, 2(%edx)
1882# ifdef USE_AS_STPCPY
1883	lea	2(%edx), %eax
1884# else
1885	movl	%edi, %eax
1886# endif
1887	RETURN
1888
1889	.p2align 4
1890L(Exit4):
1891	movl	(%ecx), %eax
1892	movl	%eax, (%edx)
1893# ifdef USE_AS_STPCPY
1894	lea	3(%edx), %eax
1895# else
1896	movl	%edi, %eax
1897# endif
1898	RETURN
1899
1900	.p2align 4
1901L(Exit5):
1902	movl	(%ecx), %eax
1903	movl	%eax, (%edx)
1904	movb	4(%ecx), %al
1905	movb	%al, 4(%edx)
1906# ifdef USE_AS_STPCPY
1907	lea	4(%edx), %eax
1908# else
1909	movl	%edi, %eax
1910# endif
1911	RETURN
1912
1913	.p2align 4
1914L(Exit6):
1915	movl	(%ecx), %eax
1916	movl	%eax, (%edx)
1917	movw	4(%ecx), %ax
1918	movw	%ax, 4(%edx)
1919# ifdef USE_AS_STPCPY
1920	lea	5(%edx), %eax
1921# else
1922	movl	%edi, %eax
1923# endif
1924	RETURN
1925
1926	.p2align 4
1927L(Exit7):
1928	movl	(%ecx), %eax
1929	movl	%eax, (%edx)
1930	movl	3(%ecx), %eax
1931	movl	%eax, 3(%edx)
1932# ifdef USE_AS_STPCPY
1933	lea	6(%edx), %eax
1934# else
1935	movl	%edi, %eax
1936# endif
1937	RETURN
1938
1939	.p2align 4
1940L(Exit9):
1941	movl	(%ecx), %eax
1942	movl	%eax, (%edx)
1943	movl	4(%ecx), %eax
1944	movl	%eax, 4(%edx)
1945	movb	8(%ecx), %al
1946	movb	%al, 8(%edx)
1947# ifdef USE_AS_STPCPY
1948	lea	8(%edx), %eax
1949# else
1950	movl	%edi, %eax
1951# endif
1952	RETURN
1953
1954	.p2align 4
1955L(Exit10):
1956	movl	(%ecx), %eax
1957	movl	%eax, (%edx)
1958	movl	4(%ecx), %eax
1959	movl	%eax, 4(%edx)
1960	movw	8(%ecx), %ax
1961	movw	%ax, 8(%edx)
1962# ifdef USE_AS_STPCPY
1963	lea	9(%edx), %eax
1964# else
1965	movl	%edi, %eax
1966# endif
1967	RETURN
1968
1969	.p2align 4
1970L(Exit11):
1971	movl	(%ecx), %eax
1972	movl	%eax, (%edx)
1973	movl	4(%ecx), %eax
1974	movl	%eax, 4(%edx)
1975	movl	7(%ecx), %eax
1976	movl	%eax, 7(%edx)
1977# ifdef USE_AS_STPCPY
1978	lea	10(%edx), %eax
1979# else
1980	movl	%edi, %eax
1981# endif
1982	RETURN
1983
1984	.p2align 4
1985L(Exit12):
1986	movl	(%ecx), %eax
1987	movl	%eax, (%edx)
1988	movl	4(%ecx), %eax
1989	movl	%eax, 4(%edx)
1990	movl	8(%ecx), %eax
1991	movl	%eax, 8(%edx)
1992# ifdef USE_AS_STPCPY
1993	lea	11(%edx), %eax
1994# else
1995	movl	%edi, %eax
1996# endif
1997	RETURN
1998
1999	.p2align 4
2000L(Exit13):
2001	movlpd	(%ecx), %xmm0
2002	movlpd	%xmm0, (%edx)
2003	movlpd	5(%ecx), %xmm0
2004	movlpd	%xmm0, 5(%edx)
2005# ifdef USE_AS_STPCPY
2006	lea	12(%edx), %eax
2007# else
2008	movl	%edi, %eax
2009# endif
2010	RETURN
2011
2012	.p2align 4
2013L(Exit14):
2014	movlpd	(%ecx), %xmm0
2015	movlpd	%xmm0, (%edx)
2016	movlpd	6(%ecx), %xmm0
2017	movlpd	%xmm0, 6(%edx)
2018# ifdef USE_AS_STPCPY
2019	lea	13(%edx), %eax
2020# else
2021	movl	%edi, %eax
2022# endif
2023	RETURN
2024
2025	.p2align 4
2026L(Exit15):
2027	movlpd	(%ecx), %xmm0
2028	movlpd	%xmm0, (%edx)
2029	movlpd	7(%ecx), %xmm0
2030	movlpd	%xmm0, 7(%edx)
2031# ifdef USE_AS_STPCPY
2032	lea	14(%edx), %eax
2033# else
2034	movl	%edi, %eax
2035# endif
2036	RETURN
2037
2038CFI_POP (%edi)
2039
2040	.p2align 4
2041L(ExitTail1):
2042	movb	(%ecx), %al
2043	movb	%al, (%edx)
2044	movl	%edx, %eax
2045	RETURN1
2046
2047	.p2align 4
2048L(ExitTail2):
2049	movw	(%ecx), %ax
2050	movw	%ax, (%edx)
2051# ifdef USE_AS_STPCPY
2052	lea	1(%edx), %eax
2053# else
2054	movl	%edx, %eax
2055# endif
2056	RETURN1
2057
2058	.p2align 4
2059L(ExitTail3):
2060	movw	(%ecx), %ax
2061	movw	%ax, (%edx)
2062	movb	2(%ecx), %al
2063	movb	%al, 2(%edx)
2064# ifdef USE_AS_STPCPY
2065	lea	2(%edx), %eax
2066# else
2067	movl	%edx, %eax
2068# endif
2069	RETURN1
2070
2071	.p2align 4
2072L(ExitTail4):
2073	movl	(%ecx), %eax
2074	movl	%eax, (%edx)
2075# ifdef USE_AS_STPCPY
2076	lea	3(%edx), %eax
2077# else
2078	movl	%edx, %eax
2079# endif
2080	RETURN1
2081
2082	.p2align 4
2083L(ExitTail5):
2084	movl	(%ecx), %eax
2085	movl	%eax, (%edx)
2086	movb	4(%ecx), %al
2087	movb	%al, 4(%edx)
2088# ifdef USE_AS_STPCPY
2089	lea	4(%edx), %eax
2090# else
2091	movl	%edx, %eax
2092# endif
2093	RETURN1
2094
2095	.p2align 4
2096L(ExitTail6):
2097	movl	(%ecx), %eax
2098	movl	%eax, (%edx)
2099	movw	4(%ecx), %ax
2100	movw	%ax, 4(%edx)
2101# ifdef USE_AS_STPCPY
2102	lea	5(%edx), %eax
2103# else
2104	movl	%edx, %eax
2105# endif
2106	RETURN1
2107
2108	.p2align 4
2109L(ExitTail7):
2110	movl	(%ecx), %eax
2111	movl	%eax, (%edx)
2112	movl	3(%ecx), %eax
2113	movl	%eax, 3(%edx)
2114# ifdef USE_AS_STPCPY
2115	lea	6(%edx), %eax
2116# else
2117	movl	%edx, %eax
2118# endif
2119	RETURN1
2120
2121	.p2align 4
2122L(ExitTail8):
2123	movl	(%ecx), %eax
2124	movl	%eax, (%edx)
2125	movl	4(%ecx), %eax
2126	movl	%eax, 4(%edx)
2127# ifdef USE_AS_STPCPY
2128	lea	7(%edx), %eax
2129# else
2130	movl	%edx, %eax
2131# endif
2132	RETURN1
2133
2134	.p2align 4
2135L(ExitTail9):
2136	movl	(%ecx), %eax
2137	movl	%eax, (%edx)
2138	movl	4(%ecx), %eax
2139	movl	%eax, 4(%edx)
2140	movb	8(%ecx), %al
2141	movb	%al, 8(%edx)
2142# ifdef USE_AS_STPCPY
2143	lea	8(%edx), %eax
2144# else
2145	movl	%edx, %eax
2146# endif
2147	RETURN1
2148
2149	.p2align 4
2150L(ExitTail10):
2151	movl	(%ecx), %eax
2152	movl	%eax, (%edx)
2153	movl	4(%ecx), %eax
2154	movl	%eax, 4(%edx)
2155	movw	8(%ecx), %ax
2156	movw	%ax, 8(%edx)
2157# ifdef USE_AS_STPCPY
2158	lea	9(%edx), %eax
2159# else
2160	movl	%edx, %eax
2161# endif
2162	RETURN1
2163
2164	.p2align 4
2165L(ExitTail11):
2166	movl	(%ecx), %eax
2167	movl	%eax, (%edx)
2168	movl	4(%ecx), %eax
2169	movl	%eax, 4(%edx)
2170	movl	7(%ecx), %eax
2171	movl	%eax, 7(%edx)
2172# ifdef USE_AS_STPCPY
2173	lea	10(%edx), %eax
2174# else
2175	movl	%edx, %eax
2176# endif
2177	RETURN1
2178
2179	.p2align 4
2180L(ExitTail12):
2181	movl	(%ecx), %eax
2182	movl	%eax, (%edx)
2183	movl	4(%ecx), %eax
2184	movl	%eax, 4(%edx)
2185	movl	8(%ecx), %eax
2186	movl	%eax, 8(%edx)
2187# ifdef USE_AS_STPCPY
2188	lea	11(%edx), %eax
2189# else
2190	movl	%edx, %eax
2191# endif
2192	RETURN1
2193
2194	.p2align 4
2195L(ExitTail13):
2196	movlpd	(%ecx), %xmm0
2197	movlpd	%xmm0, (%edx)
2198	movlpd	5(%ecx), %xmm0
2199	movlpd	%xmm0, 5(%edx)
2200# ifdef USE_AS_STPCPY
2201	lea	12(%edx), %eax
2202# else
2203	movl	%edx, %eax
2204# endif
2205	RETURN1
2206
2207	.p2align 4
2208L(ExitTail14):
2209	movlpd	(%ecx), %xmm0
2210	movlpd	%xmm0, (%edx)
2211	movlpd	6(%ecx), %xmm0
2212	movlpd	%xmm0, 6(%edx)
2213# ifdef USE_AS_STPCPY
2214	lea	13(%edx), %eax
2215# else
2216	movl	%edx, %eax
2217# endif
2218	RETURN1
2219
2220	.p2align 4
2221L(ExitTail15):
2222	movlpd	(%ecx), %xmm0
2223	movlpd	%xmm0, (%edx)
2224	movlpd	7(%ecx), %xmm0
2225	movlpd	%xmm0, 7(%edx)
2226# ifdef USE_AS_STPCPY
2227	lea	14(%edx), %eax
2228# else
2229	movl	%edx, %eax
2230# endif
2231	RETURN1
2232
2233	.p2align 4
2234L(ExitTail16):
2235	movlpd	(%ecx), %xmm0
2236	movlpd	%xmm0, (%edx)
2237	movlpd	8(%ecx), %xmm0
2238	movlpd	%xmm0, 8(%edx)
2239# ifdef USE_AS_STPCPY
2240	lea	15(%edx), %eax
2241# else
2242	movl	%edx, %eax
2243# endif
2244	RETURN1
2245
2246END (STRCPY)
2247# endif
2248
2249#endif
2250