1/* strcpy with SSSE3
2   Copyright (C) 2011-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19
20#if IS_IN (libc)
21
22# ifndef USE_AS_STRCAT
23#  include <sysdep.h>
24
25#  define CFI_PUSH(REG)	\
26	cfi_adjust_cfa_offset (4);	\
27	cfi_rel_offset (REG, 0)
28
29#  define CFI_POP(REG)	\
30	cfi_adjust_cfa_offset (-4);	\
31	cfi_restore (REG)
32
33#  define PUSH(REG)	pushl REG; CFI_PUSH (REG)
34#  define POP(REG)	popl REG; CFI_POP (REG)
35
36#  ifndef STRCPY
37#   define STRCPY  __strcpy_ssse3
38#  endif
39
40#  ifdef USE_AS_STRNCPY
41#   define PARMS  8
42#   define ENTRANCE PUSH (%ebx)
43#   define RETURN  POP (%ebx); ret; CFI_PUSH (%ebx);
44#   define RETURN1  POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
45#  else
46#   define PARMS  4
47#   define ENTRANCE
48#   define RETURN  ret
49#   define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
50#  endif
51
52#  ifdef USE_AS_STPCPY
53#   define SAVE_RESULT(n)  lea	n(%edx), %eax
54#   define SAVE_RESULT_TAIL(n)  lea	n(%edx), %eax
55#  else
56#   define SAVE_RESULT(n)  movl	%edi, %eax
57#   define SAVE_RESULT_TAIL(n)  movl	%edx, %eax
58#  endif
59
60#  define STR1  PARMS
61#  define STR2  STR1+4
62#  define LEN  STR2+4
63
64/* In this code following instructions are used for copying:
65	movb	- 1 byte
66	movw	- 2 byte
67	movl	- 4 byte
68	movlpd	- 8 byte
69	movaps	- 16 byte - requires 16 byte alignment
70	of	sourse and destination adresses.
71*/
72
73.text
74ENTRY (STRCPY)
75	ENTRANCE
76	mov	STR1(%esp), %edx
77	mov	STR2(%esp), %ecx
78#  ifdef USE_AS_STRNCPY
79	movl	LEN(%esp), %ebx
80	cmp	$8, %ebx
81	jbe	L(StrncpyExit8Bytes)
82#  endif
83	cmpb	$0, (%ecx)
84	jz	L(ExitTail1)
85	cmpb	$0, 1(%ecx)
86	jz	L(ExitTail2)
87	cmpb	$0, 2(%ecx)
88	jz	L(ExitTail3)
89	cmpb	$0, 3(%ecx)
90	jz	L(ExitTail4)
91	cmpb	$0, 4(%ecx)
92	jz	L(ExitTail5)
93	cmpb	$0, 5(%ecx)
94	jz	L(ExitTail6)
95	cmpb	$0, 6(%ecx)
96	jz	L(ExitTail7)
97	cmpb	$0, 7(%ecx)
98	jz	L(ExitTail8)
99#  ifdef USE_AS_STRNCPY
100	cmp	$16, %ebx
101	jb	L(StrncpyExit15Bytes)
102#  endif
103	cmpb	$0, 8(%ecx)
104	jz	L(ExitTail9)
105	cmpb	$0, 9(%ecx)
106	jz	L(ExitTail10)
107	cmpb	$0, 10(%ecx)
108	jz	L(ExitTail11)
109	cmpb	$0, 11(%ecx)
110	jz	L(ExitTail12)
111	cmpb	$0, 12(%ecx)
112	jz	L(ExitTail13)
113	cmpb	$0, 13(%ecx)
114	jz	L(ExitTail14)
115	cmpb	$0, 14(%ecx)
116	jz	L(ExitTail15)
117#  ifdef USE_AS_STRNCPY
118	cmp	$16, %ebx
119	je	L(ExitTail16)
120#  endif
121	cmpb	$0, 15(%ecx)
122	jz	L(ExitTail16)
123
124	PUSH	(%edi)
125	mov	%edx, %edi
126# endif
127	PUSH	(%esi)
128# ifdef USE_AS_STRNCPY
129	mov	%ecx, %esi
130	sub	$16, %ebx
131	and	$0xf, %esi
132
133/* add 16 bytes ecx_offset to ebx */
134
135	add	%esi, %ebx
136# endif
137	lea	16(%ecx), %esi
138	and	$-16, %esi
139	pxor	%xmm0, %xmm0
140	movlpd	(%ecx), %xmm1
141	movlpd	%xmm1, (%edx)
142
143	pcmpeqb	(%esi), %xmm0
144	movlpd	8(%ecx), %xmm1
145	movlpd	%xmm1, 8(%edx)
146
147	pmovmskb %xmm0, %eax
148	sub	%ecx, %esi
149
150# ifdef USE_AS_STRNCPY
151	sub	$16, %ebx
152	jbe	L(CopyFrom1To16BytesCase2OrCase3)
153# endif
154	test	%eax, %eax
155	jnz	L(CopyFrom1To16Bytes)
156
157	mov	%edx, %eax
158	lea	16(%edx), %edx
159	and	$-16, %edx
160	sub	%edx, %eax
161
162# ifdef USE_AS_STRNCPY
163	add	%eax, %esi
164	lea	-1(%esi), %esi
165	and	$1<<31, %esi
166	test	%esi, %esi
167	jnz	L(ContinueCopy)
168	lea	16(%ebx), %ebx
169
170L(ContinueCopy):
171# endif
172	sub	%eax, %ecx
173	mov	%ecx, %eax
174	and	$0xf, %eax
175	mov	$0, %esi
176
177/* case: ecx_offset == edx_offset */
178
179	jz	L(Align16Both)
180
181	cmp	$8, %eax
182	jae	L(ShlHigh8)
183	cmp	$1, %eax
184	je	L(Shl1)
185	cmp	$2, %eax
186	je	L(Shl2)
187	cmp	$3, %eax
188	je	L(Shl3)
189	cmp	$4, %eax
190	je	L(Shl4)
191	cmp	$5, %eax
192	je	L(Shl5)
193	cmp	$6, %eax
194	je	L(Shl6)
195	jmp	L(Shl7)
196
197L(ShlHigh8):
198	je	L(Shl8)
199	cmp	$9, %eax
200	je	L(Shl9)
201	cmp	$10, %eax
202	je	L(Shl10)
203	cmp	$11, %eax
204	je	L(Shl11)
205	cmp	$12, %eax
206	je	L(Shl12)
207	cmp	$13, %eax
208	je	L(Shl13)
209	cmp	$14, %eax
210	je	L(Shl14)
211	jmp	L(Shl15)
212
213L(Align16Both):
214	movaps	(%ecx), %xmm1
215	movaps	16(%ecx), %xmm2
216	movaps	%xmm1, (%edx)
217	pcmpeqb	%xmm2, %xmm0
218	pmovmskb %xmm0, %eax
219	lea	16(%esi), %esi
220# ifdef USE_AS_STRNCPY
221	sub	$16, %ebx
222	jbe	L(CopyFrom1To16BytesCase2OrCase3)
223# endif
224	test	%eax, %eax
225	jnz	L(CopyFrom1To16Bytes)
226
227	movaps	16(%ecx, %esi), %xmm3
228	movaps	%xmm2, (%edx, %esi)
229	pcmpeqb	%xmm3, %xmm0
230	pmovmskb %xmm0, %eax
231	lea	16(%esi), %esi
232# ifdef USE_AS_STRNCPY
233	sub	$16, %ebx
234	jbe	L(CopyFrom1To16BytesCase2OrCase3)
235# endif
236	test	%eax, %eax
237	jnz	L(CopyFrom1To16Bytes)
238
239	movaps	16(%ecx, %esi), %xmm4
240	movaps	%xmm3, (%edx, %esi)
241	pcmpeqb	%xmm4, %xmm0
242	pmovmskb %xmm0, %eax
243	lea	16(%esi), %esi
244# ifdef USE_AS_STRNCPY
245	sub	$16, %ebx
246	jbe	L(CopyFrom1To16BytesCase2OrCase3)
247# endif
248	test	%eax, %eax
249	jnz	L(CopyFrom1To16Bytes)
250
251	movaps	16(%ecx, %esi), %xmm1
252	movaps	%xmm4, (%edx, %esi)
253	pcmpeqb	%xmm1, %xmm0
254	pmovmskb %xmm0, %eax
255	lea	16(%esi), %esi
256# ifdef USE_AS_STRNCPY
257	sub	$16, %ebx
258	jbe	L(CopyFrom1To16BytesCase2OrCase3)
259# endif
260	test	%eax, %eax
261	jnz	L(CopyFrom1To16Bytes)
262
263	movaps	16(%ecx, %esi), %xmm2
264	movaps	%xmm1, (%edx, %esi)
265	pcmpeqb	%xmm2, %xmm0
266	pmovmskb %xmm0, %eax
267	lea	16(%esi), %esi
268# ifdef USE_AS_STRNCPY
269	sub	$16, %ebx
270	jbe	L(CopyFrom1To16BytesCase2OrCase3)
271# endif
272	test	%eax, %eax
273	jnz	L(CopyFrom1To16Bytes)
274
275	movaps	16(%ecx, %esi), %xmm3
276	movaps	%xmm2, (%edx, %esi)
277	pcmpeqb	%xmm3, %xmm0
278	pmovmskb %xmm0, %eax
279	lea	16(%esi), %esi
280# ifdef USE_AS_STRNCPY
281	sub	$16, %ebx
282	jbe	L(CopyFrom1To16BytesCase2OrCase3)
283# endif
284	test	%eax, %eax
285	jnz	L(CopyFrom1To16Bytes)
286
287	movaps	%xmm3, (%edx, %esi)
288	mov	%ecx, %eax
289	lea	16(%ecx, %esi), %ecx
290	and	$-0x40, %ecx
291	sub	%ecx, %eax
292	sub	%eax, %edx
293# ifdef USE_AS_STRNCPY
294	lea	112(%ebx, %eax), %ebx
295# endif
296	mov	$-0x40, %esi
297
298L(Aligned64Loop):
299	movaps	(%ecx), %xmm2
300	movaps	32(%ecx), %xmm3
301	movaps	%xmm2, %xmm4
302	movaps	16(%ecx), %xmm5
303	movaps	%xmm3, %xmm6
304	movaps	48(%ecx), %xmm7
305	pminub	%xmm5, %xmm2
306	pminub	%xmm7, %xmm3
307	pminub	%xmm2, %xmm3
308	lea	64(%edx), %edx
309	pcmpeqb	%xmm0, %xmm3
310	lea	64(%ecx), %ecx
311	pmovmskb %xmm3, %eax
312# ifdef USE_AS_STRNCPY
313	sub	$64, %ebx
314	jbe	L(StrncpyLeaveCase2OrCase3)
315# endif
316	test	%eax, %eax
317	jnz	L(Aligned64Leave)
318	movaps	%xmm4, -64(%edx)
319	movaps	%xmm5, -48(%edx)
320	movaps	%xmm6, -32(%edx)
321	movaps	%xmm7, -16(%edx)
322	jmp	L(Aligned64Loop)
323
324L(Aligned64Leave):
325# ifdef USE_AS_STRNCPY
326	lea	48(%ebx), %ebx
327# endif
328	pcmpeqb	%xmm4, %xmm0
329	pmovmskb %xmm0, %eax
330	test	%eax, %eax
331	jnz	L(CopyFrom1To16Bytes)
332
333	pcmpeqb	%xmm5, %xmm0
334# ifdef USE_AS_STRNCPY
335	lea	-16(%ebx), %ebx
336# endif
337	pmovmskb %xmm0, %eax
338	movaps	%xmm4, -64(%edx)
339	test	%eax, %eax
340	lea	16(%esi), %esi
341	jnz	L(CopyFrom1To16Bytes)
342
343	pcmpeqb	%xmm6, %xmm0
344# ifdef USE_AS_STRNCPY
345	lea	-16(%ebx), %ebx
346# endif
347	pmovmskb %xmm0, %eax
348	movaps	%xmm5, -48(%edx)
349	test	%eax, %eax
350	lea	16(%esi), %esi
351	jnz	L(CopyFrom1To16Bytes)
352
353	movaps	%xmm6, -32(%edx)
354	pcmpeqb	%xmm7, %xmm0
355# ifdef USE_AS_STRNCPY
356	lea	-16(%ebx), %ebx
357# endif
358	pmovmskb %xmm0, %eax
359	lea	16(%esi), %esi
360	jmp	L(CopyFrom1To16Bytes)
361
362	.p2align 4
363L(Shl1):
364	movaps	-1(%ecx), %xmm1
365	movaps	15(%ecx), %xmm2
366L(Shl1Start):
367	pcmpeqb	%xmm2, %xmm0
368	pmovmskb %xmm0, %eax
369	movaps	%xmm2, %xmm3
370# ifdef USE_AS_STRNCPY
371	sub	$16, %ebx
372	jbe	L(StrncpyExit1Case2OrCase3)
373# endif
374	test	%eax, %eax
375	jnz	L(Shl1LoopExit)
376
377	palignr	$1, %xmm1, %xmm2
378	movaps	%xmm3, %xmm1
379	movaps	%xmm2, (%edx)
380	movaps	31(%ecx), %xmm2
381
382	pcmpeqb	%xmm2, %xmm0
383	lea	16(%edx), %edx
384	pmovmskb %xmm0, %eax
385	lea	16(%ecx), %ecx
386	movaps	%xmm2, %xmm3
387# ifdef USE_AS_STRNCPY
388	sub	$16, %ebx
389	jbe	L(StrncpyExit1Case2OrCase3)
390# endif
391	test	%eax, %eax
392	jnz	L(Shl1LoopExit)
393
394	palignr	$1, %xmm1, %xmm2
395	movaps	%xmm2, (%edx)
396	movaps	31(%ecx), %xmm2
397	movaps	%xmm3, %xmm1
398
399	pcmpeqb	%xmm2, %xmm0
400	lea	16(%edx), %edx
401	pmovmskb %xmm0, %eax
402	lea	16(%ecx), %ecx
403	movaps	%xmm2, %xmm3
404# ifdef USE_AS_STRNCPY
405	sub	$16, %ebx
406	jbe	L(StrncpyExit1Case2OrCase3)
407# endif
408	test	%eax, %eax
409	jnz	L(Shl1LoopExit)
410
411	palignr	$1, %xmm1, %xmm2
412	movaps	%xmm2, (%edx)
413	movaps	31(%ecx), %xmm2
414
415	pcmpeqb	%xmm2, %xmm0
416	lea	16(%edx), %edx
417	pmovmskb %xmm0, %eax
418	lea	16(%ecx), %ecx
419# ifdef USE_AS_STRNCPY
420	sub	$16, %ebx
421	jbe	L(StrncpyExit1Case2OrCase3)
422# endif
423	test	%eax, %eax
424	jnz	L(Shl1LoopExit)
425
426	palignr	$1, %xmm3, %xmm2
427	movaps	%xmm2, (%edx)
428	lea	31(%ecx), %ecx
429	lea	16(%edx), %edx
430
431	mov	%ecx, %eax
432	and	$-0x40, %ecx
433	sub	%ecx, %eax
434	lea	-15(%ecx), %ecx
435	sub	%eax, %edx
436# ifdef USE_AS_STRNCPY
437	add	%eax, %ebx
438# endif
439	movaps	-1(%ecx), %xmm1
440
441L(Shl1LoopStart):
442	movaps	15(%ecx), %xmm2
443	movaps	31(%ecx), %xmm3
444	movaps	%xmm3, %xmm6
445	movaps	47(%ecx), %xmm4
446	movaps	%xmm4, %xmm7
447	movaps	63(%ecx), %xmm5
448	pminub	%xmm2, %xmm6
449	pminub	%xmm5, %xmm7
450	pminub	%xmm6, %xmm7
451	pcmpeqb	%xmm0, %xmm7
452	pmovmskb %xmm7, %eax
453	movaps	%xmm5, %xmm7
454	palignr	$1, %xmm4, %xmm5
455	test	%eax, %eax
456	palignr	$1, %xmm3, %xmm4
457	jnz	L(Shl1Start)
458# ifdef USE_AS_STRNCPY
459	sub	$64, %ebx
460	jbe	L(StrncpyLeave1)
461# endif
462	palignr	$1, %xmm2, %xmm3
463	lea	64(%ecx), %ecx
464	palignr	$1, %xmm1, %xmm2
465	movaps	%xmm7, %xmm1
466	movaps	%xmm5, 48(%edx)
467	movaps	%xmm4, 32(%edx)
468	movaps	%xmm3, 16(%edx)
469	movaps	%xmm2, (%edx)
470	lea	64(%edx), %edx
471	jmp	L(Shl1LoopStart)
472
473L(Shl1LoopExit):
474	movlpd	(%ecx), %xmm0
475	movlpd	%xmm0, (%edx)
476	movlpd	7(%ecx), %xmm0
477	movlpd	%xmm0, 7(%edx)
478	mov	$15, %esi
479	jmp	L(CopyFrom1To16Bytes)
480
481	.p2align 4
482L(Shl2):
483	movaps	-2(%ecx), %xmm1
484	movaps	14(%ecx), %xmm2
485L(Shl2Start):
486	pcmpeqb	%xmm2, %xmm0
487	pmovmskb %xmm0, %eax
488	movaps	%xmm2, %xmm3
489# ifdef USE_AS_STRNCPY
490	sub	$16, %ebx
491	jbe	L(StrncpyExit2Case2OrCase3)
492# endif
493	test	%eax, %eax
494	jnz	L(Shl2LoopExit)
495
496	palignr	$2, %xmm1, %xmm2
497	movaps	%xmm3, %xmm1
498	movaps	%xmm2, (%edx)
499	movaps	30(%ecx), %xmm2
500
501	pcmpeqb	%xmm2, %xmm0
502	lea	16(%edx), %edx
503	pmovmskb %xmm0, %eax
504	lea	16(%ecx), %ecx
505	movaps	%xmm2, %xmm3
506# ifdef USE_AS_STRNCPY
507	sub	$16, %ebx
508	jbe	L(StrncpyExit2Case2OrCase3)
509# endif
510	test	%eax, %eax
511	jnz	L(Shl2LoopExit)
512
513	palignr	$2, %xmm1, %xmm2
514	movaps	%xmm2, (%edx)
515	movaps	30(%ecx), %xmm2
516	movaps	%xmm3, %xmm1
517
518	pcmpeqb	%xmm2, %xmm0
519	lea	16(%edx), %edx
520	pmovmskb %xmm0, %eax
521	lea	16(%ecx), %ecx
522	movaps	%xmm2, %xmm3
523# ifdef USE_AS_STRNCPY
524	sub	$16, %ebx
525	jbe	L(StrncpyExit2Case2OrCase3)
526# endif
527	test	%eax, %eax
528	jnz	L(Shl2LoopExit)
529
530	palignr	$2, %xmm1, %xmm2
531	movaps	%xmm2, (%edx)
532	movaps	30(%ecx), %xmm2
533
534	pcmpeqb	%xmm2, %xmm0
535	lea	16(%edx), %edx
536	pmovmskb %xmm0, %eax
537	lea	16(%ecx), %ecx
538# ifdef USE_AS_STRNCPY
539	sub	$16, %ebx
540	jbe	L(StrncpyExit2Case2OrCase3)
541# endif
542	test	%eax, %eax
543	jnz	L(Shl2LoopExit)
544
545	palignr	$2, %xmm3, %xmm2
546	movaps	%xmm2, (%edx)
547	lea	30(%ecx), %ecx
548	lea	16(%edx), %edx
549
550	mov	%ecx, %eax
551	and	$-0x40, %ecx
552	sub	%ecx, %eax
553	lea	-14(%ecx), %ecx
554	sub	%eax, %edx
555# ifdef USE_AS_STRNCPY
556	add	%eax, %ebx
557# endif
558	movaps	-2(%ecx), %xmm1
559
560L(Shl2LoopStart):
561	movaps	14(%ecx), %xmm2
562	movaps	30(%ecx), %xmm3
563	movaps	%xmm3, %xmm6
564	movaps	46(%ecx), %xmm4
565	movaps	%xmm4, %xmm7
566	movaps	62(%ecx), %xmm5
567	pminub	%xmm2, %xmm6
568	pminub	%xmm5, %xmm7
569	pminub	%xmm6, %xmm7
570	pcmpeqb	%xmm0, %xmm7
571	pmovmskb %xmm7, %eax
572	movaps	%xmm5, %xmm7
573	palignr	$2, %xmm4, %xmm5
574	test	%eax, %eax
575	palignr	$2, %xmm3, %xmm4
576	jnz	L(Shl2Start)
577# ifdef USE_AS_STRNCPY
578	sub	$64, %ebx
579	jbe	L(StrncpyLeave2)
580# endif
581	palignr	$2, %xmm2, %xmm3
582	lea	64(%ecx), %ecx
583	palignr	$2, %xmm1, %xmm2
584	movaps	%xmm7, %xmm1
585	movaps	%xmm5, 48(%edx)
586	movaps	%xmm4, 32(%edx)
587	movaps	%xmm3, 16(%edx)
588	movaps	%xmm2, (%edx)
589	lea	64(%edx), %edx
590	jmp	L(Shl2LoopStart)
591
592L(Shl2LoopExit):
593	movlpd	(%ecx), %xmm0
594	movlpd	6(%ecx), %xmm1
595	movlpd	%xmm0, (%edx)
596	movlpd	%xmm1, 6(%edx)
597	mov	$14, %esi
598	jmp	L(CopyFrom1To16Bytes)
599
600	.p2align 4
601L(Shl3):
602	movaps	-3(%ecx), %xmm1
603	movaps	13(%ecx), %xmm2
604L(Shl3Start):
605	pcmpeqb	%xmm2, %xmm0
606	pmovmskb %xmm0, %eax
607	movaps	%xmm2, %xmm3
608# ifdef USE_AS_STRNCPY
609	sub	$16, %ebx
610	jbe	L(StrncpyExit3Case2OrCase3)
611# endif
612	test	%eax, %eax
613	jnz	L(Shl3LoopExit)
614
615	palignr	$3, %xmm1, %xmm2
616	movaps	%xmm3, %xmm1
617	movaps	%xmm2, (%edx)
618	movaps	29(%ecx), %xmm2
619
620	pcmpeqb	%xmm2, %xmm0
621	lea	16(%edx), %edx
622	pmovmskb %xmm0, %eax
623	lea	16(%ecx), %ecx
624	movaps	%xmm2, %xmm3
625# ifdef USE_AS_STRNCPY
626	sub	$16, %ebx
627	jbe	L(StrncpyExit3Case2OrCase3)
628# endif
629	test	%eax, %eax
630	jnz	L(Shl3LoopExit)
631
632	palignr	$3, %xmm1, %xmm2
633	movaps	%xmm2, (%edx)
634	movaps	29(%ecx), %xmm2
635	movaps	%xmm3, %xmm1
636
637	pcmpeqb	%xmm2, %xmm0
638	lea	16(%edx), %edx
639	pmovmskb %xmm0, %eax
640	lea	16(%ecx), %ecx
641	movaps	%xmm2, %xmm3
642# ifdef USE_AS_STRNCPY
643	sub	$16, %ebx
644	jbe	L(StrncpyExit3Case2OrCase3)
645# endif
646	test	%eax, %eax
647	jnz	L(Shl3LoopExit)
648
649	palignr	$3, %xmm1, %xmm2
650	movaps	%xmm2, (%edx)
651	movaps	29(%ecx), %xmm2
652
653	pcmpeqb	%xmm2, %xmm0
654	lea	16(%edx), %edx
655	pmovmskb %xmm0, %eax
656	lea	16(%ecx), %ecx
657# ifdef USE_AS_STRNCPY
658	sub	$16, %ebx
659	jbe	L(StrncpyExit3Case2OrCase3)
660# endif
661	test	%eax, %eax
662	jnz	L(Shl3LoopExit)
663
664	palignr	$3, %xmm3, %xmm2
665	movaps	%xmm2, (%edx)
666	lea	29(%ecx), %ecx
667	lea	16(%edx), %edx
668
669	mov	%ecx, %eax
670	and	$-0x40, %ecx
671	sub	%ecx, %eax
672	lea	-13(%ecx), %ecx
673	sub	%eax, %edx
674# ifdef USE_AS_STRNCPY
675	add	%eax, %ebx
676# endif
677	movaps	-3(%ecx), %xmm1
678
679L(Shl3LoopStart):
680	movaps	13(%ecx), %xmm2
681	movaps	29(%ecx), %xmm3
682	movaps	%xmm3, %xmm6
683	movaps	45(%ecx), %xmm4
684	movaps	%xmm4, %xmm7
685	movaps	61(%ecx), %xmm5
686	pminub	%xmm2, %xmm6
687	pminub	%xmm5, %xmm7
688	pminub	%xmm6, %xmm7
689	pcmpeqb	%xmm0, %xmm7
690	pmovmskb %xmm7, %eax
691	movaps	%xmm5, %xmm7
692	palignr	$3, %xmm4, %xmm5
693	test	%eax, %eax
694	palignr	$3, %xmm3, %xmm4
695	jnz	L(Shl3Start)
696# ifdef USE_AS_STRNCPY
697	sub	$64, %ebx
698	jbe	L(StrncpyLeave3)
699# endif
700	palignr	$3, %xmm2, %xmm3
701	lea	64(%ecx), %ecx
702	palignr	$3, %xmm1, %xmm2
703	movaps	%xmm7, %xmm1
704	movaps	%xmm5, 48(%edx)
705	movaps	%xmm4, 32(%edx)
706	movaps	%xmm3, 16(%edx)
707	movaps	%xmm2, (%edx)
708	lea	64(%edx), %edx
709	jmp	L(Shl3LoopStart)
710
711L(Shl3LoopExit):
712	movlpd	(%ecx), %xmm0
713	movlpd	5(%ecx), %xmm1
714	movlpd	%xmm0, (%edx)
715	movlpd	%xmm1, 5(%edx)
716	mov	$13, %esi
717	jmp	L(CopyFrom1To16Bytes)
718
719	.p2align 4
720L(Shl4):
721	movaps	-4(%ecx), %xmm1
722	movaps	12(%ecx), %xmm2
723L(Shl4Start):
724	pcmpeqb	%xmm2, %xmm0
725	pmovmskb %xmm0, %eax
726	movaps	%xmm2, %xmm3
727# ifdef USE_AS_STRNCPY
728	sub	$16, %ebx
729	jbe	L(StrncpyExit4Case2OrCase3)
730# endif
731	test	%eax, %eax
732	jnz	L(Shl4LoopExit)
733
734	palignr	$4, %xmm1, %xmm2
735	movaps	%xmm3, %xmm1
736	movaps	%xmm2, (%edx)
737	movaps	28(%ecx), %xmm2
738
739	pcmpeqb	%xmm2, %xmm0
740	lea	16(%edx), %edx
741	pmovmskb %xmm0, %eax
742	lea	16(%ecx), %ecx
743	movaps	%xmm2, %xmm3
744# ifdef USE_AS_STRNCPY
745	sub	$16, %ebx
746	jbe	L(StrncpyExit4Case2OrCase3)
747# endif
748	test	%eax, %eax
749	jnz	L(Shl4LoopExit)
750
751	palignr	$4, %xmm1, %xmm2
752	movaps	%xmm2, (%edx)
753	movaps	28(%ecx), %xmm2
754	movaps	%xmm3, %xmm1
755
756	pcmpeqb	%xmm2, %xmm0
757	lea	16(%edx), %edx
758	pmovmskb %xmm0, %eax
759	lea	16(%ecx), %ecx
760	movaps	%xmm2, %xmm3
761# ifdef USE_AS_STRNCPY
762	sub	$16, %ebx
763	jbe	L(StrncpyExit4Case2OrCase3)
764# endif
765	test	%eax, %eax
766	jnz	L(Shl4LoopExit)
767
768	palignr	$4, %xmm1, %xmm2
769	movaps	%xmm2, (%edx)
770	movaps	28(%ecx), %xmm2
771
772	pcmpeqb	%xmm2, %xmm0
773	lea	16(%edx), %edx
774	pmovmskb %xmm0, %eax
775	lea	16(%ecx), %ecx
776# ifdef USE_AS_STRNCPY
777	sub	$16, %ebx
778	jbe	L(StrncpyExit4Case2OrCase3)
779# endif
780	test	%eax, %eax
781	jnz	L(Shl4LoopExit)
782
783	palignr	$4, %xmm3, %xmm2
784	movaps	%xmm2, (%edx)
785	lea	28(%ecx), %ecx
786	lea	16(%edx), %edx
787
788	mov	%ecx, %eax
789	and	$-0x40, %ecx
790	sub	%ecx, %eax
791	lea	-12(%ecx), %ecx
792	sub	%eax, %edx
793# ifdef USE_AS_STRNCPY
794	add	%eax, %ebx
795# endif
796	movaps	-4(%ecx), %xmm1
797
798L(Shl4LoopStart):
799	movaps	12(%ecx), %xmm2
800	movaps	28(%ecx), %xmm3
801	movaps	%xmm3, %xmm6
802	movaps	44(%ecx), %xmm4
803	movaps	%xmm4, %xmm7
804	movaps	60(%ecx), %xmm5
805	pminub	%xmm2, %xmm6
806	pminub	%xmm5, %xmm7
807	pminub	%xmm6, %xmm7
808	pcmpeqb	%xmm0, %xmm7
809	pmovmskb %xmm7, %eax
810	movaps	%xmm5, %xmm7
811	palignr	$4, %xmm4, %xmm5
812	test	%eax, %eax
813	palignr	$4, %xmm3, %xmm4
814	jnz	L(Shl4Start)
815# ifdef USE_AS_STRNCPY
816	sub	$64, %ebx
817	jbe	L(StrncpyLeave4)
818# endif
819	palignr	$4, %xmm2, %xmm3
820	lea	64(%ecx), %ecx
821	palignr	$4, %xmm1, %xmm2
822	movaps	%xmm7, %xmm1
823	movaps	%xmm5, 48(%edx)
824	movaps	%xmm4, 32(%edx)
825	movaps	%xmm3, 16(%edx)
826	movaps	%xmm2, (%edx)
827	lea	64(%edx), %edx
828	jmp	L(Shl4LoopStart)
829
830L(Shl4LoopExit):
831	movlpd	(%ecx), %xmm0
832	movl	8(%ecx), %esi
833	movlpd	%xmm0, (%edx)
834	movl	%esi, 8(%edx)
835	mov	$12, %esi
836	jmp	L(CopyFrom1To16Bytes)
837
838	.p2align 4
839L(Shl5):
840	movaps	-5(%ecx), %xmm1
841	movaps	11(%ecx), %xmm2
842L(Shl5Start):
843	pcmpeqb	%xmm2, %xmm0
844	pmovmskb %xmm0, %eax
845	movaps	%xmm2, %xmm3
846# ifdef USE_AS_STRNCPY
847	sub	$16, %ebx
848	jbe	L(StrncpyExit5Case2OrCase3)
849# endif
850	test	%eax, %eax
851	jnz	L(Shl5LoopExit)
852
853	palignr	$5, %xmm1, %xmm2
854	movaps	%xmm3, %xmm1
855	movaps	%xmm2, (%edx)
856	movaps	27(%ecx), %xmm2
857
858	pcmpeqb	%xmm2, %xmm0
859	lea	16(%edx), %edx
860	pmovmskb %xmm0, %eax
861	lea	16(%ecx), %ecx
862	movaps	%xmm2, %xmm3
863# ifdef USE_AS_STRNCPY
864	sub	$16, %ebx
865	jbe	L(StrncpyExit5Case2OrCase3)
866# endif
867	test	%eax, %eax
868	jnz	L(Shl5LoopExit)
869
870	palignr	$5, %xmm1, %xmm2
871	movaps	%xmm2, (%edx)
872	movaps	27(%ecx), %xmm2
873	movaps	%xmm3, %xmm1
874
875	pcmpeqb	%xmm2, %xmm0
876	lea	16(%edx), %edx
877	pmovmskb %xmm0, %eax
878	lea	16(%ecx), %ecx
879	movaps	%xmm2, %xmm3
880# ifdef USE_AS_STRNCPY
881	sub	$16, %ebx
882	jbe	L(StrncpyExit5Case2OrCase3)
883# endif
884	test	%eax, %eax
885	jnz	L(Shl5LoopExit)
886
887	palignr	$5, %xmm1, %xmm2
888	movaps	%xmm2, (%edx)
889	movaps	27(%ecx), %xmm2
890
891	pcmpeqb	%xmm2, %xmm0
892	lea	16(%edx), %edx
893	pmovmskb %xmm0, %eax
894	lea	16(%ecx), %ecx
895# ifdef USE_AS_STRNCPY
896	sub	$16, %ebx
897	jbe	L(StrncpyExit5Case2OrCase3)
898# endif
899	test	%eax, %eax
900	jnz	L(Shl5LoopExit)
901
902	palignr	$5, %xmm3, %xmm2
903	movaps	%xmm2, (%edx)
904	lea	27(%ecx), %ecx
905	lea	16(%edx), %edx
906
907	mov	%ecx, %eax
908	and	$-0x40, %ecx
909	sub	%ecx, %eax
910	lea	-11(%ecx), %ecx
911	sub	%eax, %edx
912# ifdef USE_AS_STRNCPY
913	add	%eax, %ebx
914# endif
915	movaps	-5(%ecx), %xmm1
916
917L(Shl5LoopStart):
918	movaps	11(%ecx), %xmm2
919	movaps	27(%ecx), %xmm3
920	movaps	%xmm3, %xmm6
921	movaps	43(%ecx), %xmm4
922	movaps	%xmm4, %xmm7
923	movaps	59(%ecx), %xmm5
924	pminub	%xmm2, %xmm6
925	pminub	%xmm5, %xmm7
926	pminub	%xmm6, %xmm7
927	pcmpeqb	%xmm0, %xmm7
928	pmovmskb %xmm7, %eax
929	movaps	%xmm5, %xmm7
930	palignr	$5, %xmm4, %xmm5
931	test	%eax, %eax
932	palignr	$5, %xmm3, %xmm4
933	jnz	L(Shl5Start)
934# ifdef USE_AS_STRNCPY
935	sub	$64, %ebx
936	jbe	L(StrncpyLeave5)
937# endif
938	palignr	$5, %xmm2, %xmm3
939	lea	64(%ecx), %ecx
940	palignr	$5, %xmm1, %xmm2
941	movaps	%xmm7, %xmm1
942	movaps	%xmm5, 48(%edx)
943	movaps	%xmm4, 32(%edx)
944	movaps	%xmm3, 16(%edx)
945	movaps	%xmm2, (%edx)
946	lea	64(%edx), %edx
947	jmp	L(Shl5LoopStart)
948
949L(Shl5LoopExit):
950	movlpd	(%ecx), %xmm0
951	movl	7(%ecx), %esi
952	movlpd	%xmm0, (%edx)
953	movl	%esi, 7(%edx)
954	mov	$11, %esi
955	jmp	L(CopyFrom1To16Bytes)
956
957	.p2align 4
958L(Shl6):
959	movaps	-6(%ecx), %xmm1
960	movaps	10(%ecx), %xmm2
961L(Shl6Start):
962	pcmpeqb	%xmm2, %xmm0
963	pmovmskb %xmm0, %eax
964	movaps	%xmm2, %xmm3
965# ifdef USE_AS_STRNCPY
966	sub	$16, %ebx
967	jbe	L(StrncpyExit6Case2OrCase3)
968# endif
969	test	%eax, %eax
970	jnz	L(Shl6LoopExit)
971
972	palignr	$6, %xmm1, %xmm2
973	movaps	%xmm3, %xmm1
974	movaps	%xmm2, (%edx)
975	movaps	26(%ecx), %xmm2
976
977	pcmpeqb	%xmm2, %xmm0
978	lea	16(%edx), %edx
979	pmovmskb %xmm0, %eax
980	lea	16(%ecx), %ecx
981	movaps	%xmm2, %xmm3
982# ifdef USE_AS_STRNCPY
983	sub	$16, %ebx
984	jbe	L(StrncpyExit6Case2OrCase3)
985# endif
986	test	%eax, %eax
987	jnz	L(Shl6LoopExit)
988
989	palignr	$6, %xmm1, %xmm2
990	movaps	%xmm2, (%edx)
991	movaps	26(%ecx), %xmm2
992	movaps	%xmm3, %xmm1
993
994	pcmpeqb	%xmm2, %xmm0
995	lea	16(%edx), %edx
996	pmovmskb %xmm0, %eax
997	lea	16(%ecx), %ecx
998	movaps	%xmm2, %xmm3
999# ifdef USE_AS_STRNCPY
1000	sub	$16, %ebx
1001	jbe	L(StrncpyExit6Case2OrCase3)
1002# endif
1003	test	%eax, %eax
1004	jnz	L(Shl6LoopExit)
1005
1006	palignr	$6, %xmm1, %xmm2
1007	movaps	%xmm2, (%edx)
1008	movaps	26(%ecx), %xmm2
1009
1010	pcmpeqb	%xmm2, %xmm0
1011	lea	16(%edx), %edx
1012	pmovmskb %xmm0, %eax
1013	lea	16(%ecx), %ecx
1014# ifdef USE_AS_STRNCPY
1015	sub	$16, %ebx
1016	jbe	L(StrncpyExit6Case2OrCase3)
1017# endif
1018	test	%eax, %eax
1019	jnz	L(Shl6LoopExit)
1020
1021	palignr	$6, %xmm3, %xmm2
1022	movaps	%xmm2, (%edx)
1023	lea	26(%ecx), %ecx
1024	lea	16(%edx), %edx
1025
1026	mov	%ecx, %eax
1027	and	$-0x40, %ecx
1028	sub	%ecx, %eax
1029	lea	-10(%ecx), %ecx
1030	sub	%eax, %edx
1031# ifdef USE_AS_STRNCPY
1032	add	%eax, %ebx
1033# endif
1034	movaps	-6(%ecx), %xmm1
1035
1036L(Shl6LoopStart):
1037	movaps	10(%ecx), %xmm2
1038	movaps	26(%ecx), %xmm3
1039	movaps	%xmm3, %xmm6
1040	movaps	42(%ecx), %xmm4
1041	movaps	%xmm4, %xmm7
1042	movaps	58(%ecx), %xmm5
1043	pminub	%xmm2, %xmm6
1044	pminub	%xmm5, %xmm7
1045	pminub	%xmm6, %xmm7
1046	pcmpeqb	%xmm0, %xmm7
1047	pmovmskb %xmm7, %eax
1048	movaps	%xmm5, %xmm7
1049	palignr	$6, %xmm4, %xmm5
1050	test	%eax, %eax
1051	palignr	$6, %xmm3, %xmm4
1052	jnz	L(Shl6Start)
1053# ifdef USE_AS_STRNCPY
1054	sub	$64, %ebx
1055	jbe	L(StrncpyLeave6)
1056# endif
1057	palignr	$6, %xmm2, %xmm3
1058	lea	64(%ecx), %ecx
1059	palignr	$6, %xmm1, %xmm2
1060	movaps	%xmm7, %xmm1
1061	movaps	%xmm5, 48(%edx)
1062	movaps	%xmm4, 32(%edx)
1063	movaps	%xmm3, 16(%edx)
1064	movaps	%xmm2, (%edx)
1065	lea	64(%edx), %edx
1066	jmp	L(Shl6LoopStart)
1067
1068L(Shl6LoopExit):
1069	movlpd	(%ecx), %xmm0
1070	movl	6(%ecx), %esi
1071	movlpd	%xmm0, (%edx)
1072	movl	%esi, 6(%edx)
1073	mov	$10, %esi
1074	jmp	L(CopyFrom1To16Bytes)
1075
1076	.p2align 4
1077L(Shl7):
1078	movaps	-7(%ecx), %xmm1
1079	movaps	9(%ecx), %xmm2
1080L(Shl7Start):
1081	pcmpeqb	%xmm2, %xmm0
1082	pmovmskb %xmm0, %eax
1083	movaps	%xmm2, %xmm3
1084# ifdef USE_AS_STRNCPY
1085	sub	$16, %ebx
1086	jbe	L(StrncpyExit7Case2OrCase3)
1087# endif
1088	test	%eax, %eax
1089	jnz	L(Shl7LoopExit)
1090
1091	palignr	$7, %xmm1, %xmm2
1092	movaps	%xmm3, %xmm1
1093	movaps	%xmm2, (%edx)
1094	movaps	25(%ecx), %xmm2
1095
1096	pcmpeqb	%xmm2, %xmm0
1097	lea	16(%edx), %edx
1098	pmovmskb %xmm0, %eax
1099	lea	16(%ecx), %ecx
1100	movaps	%xmm2, %xmm3
1101# ifdef USE_AS_STRNCPY
1102	sub	$16, %ebx
1103	jbe	L(StrncpyExit7Case2OrCase3)
1104# endif
1105	test	%eax, %eax
1106	jnz	L(Shl7LoopExit)
1107
1108	palignr	$7, %xmm1, %xmm2
1109	movaps	%xmm2, (%edx)
1110	movaps	25(%ecx), %xmm2
1111	movaps	%xmm3, %xmm1
1112
1113	pcmpeqb	%xmm2, %xmm0
1114	lea	16(%edx), %edx
1115	pmovmskb %xmm0, %eax
1116	lea	16(%ecx), %ecx
1117	movaps	%xmm2, %xmm3
1118# ifdef USE_AS_STRNCPY
1119	sub	$16, %ebx
1120	jbe	L(StrncpyExit7Case2OrCase3)
1121# endif
1122	test	%eax, %eax
1123	jnz	L(Shl7LoopExit)
1124
1125	palignr	$7, %xmm1, %xmm2
1126	movaps	%xmm2, (%edx)
1127	movaps	25(%ecx), %xmm2
1128
1129	pcmpeqb	%xmm2, %xmm0
1130	lea	16(%edx), %edx
1131	pmovmskb %xmm0, %eax
1132	lea	16(%ecx), %ecx
1133# ifdef USE_AS_STRNCPY
1134	sub	$16, %ebx
1135	jbe	L(StrncpyExit7Case2OrCase3)
1136# endif
1137	test	%eax, %eax
1138	jnz	L(Shl7LoopExit)
1139
1140	palignr	$7, %xmm3, %xmm2
1141	movaps	%xmm2, (%edx)
1142	lea	25(%ecx), %ecx
1143	lea	16(%edx), %edx
1144
1145	mov	%ecx, %eax
1146	and	$-0x40, %ecx
1147	sub	%ecx, %eax
1148	lea	-9(%ecx), %ecx
1149	sub	%eax, %edx
1150# ifdef USE_AS_STRNCPY
1151	add	%eax, %ebx
1152# endif
1153	movaps	-7(%ecx), %xmm1
1154
1155L(Shl7LoopStart):
1156	movaps	9(%ecx), %xmm2
1157	movaps	25(%ecx), %xmm3
1158	movaps	%xmm3, %xmm6
1159	movaps	41(%ecx), %xmm4
1160	movaps	%xmm4, %xmm7
1161	movaps	57(%ecx), %xmm5
1162	pminub	%xmm2, %xmm6
1163	pminub	%xmm5, %xmm7
1164	pminub	%xmm6, %xmm7
1165	pcmpeqb	%xmm0, %xmm7
1166	pmovmskb %xmm7, %eax
1167	movaps	%xmm5, %xmm7
1168	palignr	$7, %xmm4, %xmm5
1169	test	%eax, %eax
1170	palignr	$7, %xmm3, %xmm4
1171	jnz	L(Shl7Start)
1172# ifdef USE_AS_STRNCPY
1173	sub	$64, %ebx
1174	jbe	L(StrncpyLeave7)
1175# endif
1176	palignr	$7, %xmm2, %xmm3
1177	lea	64(%ecx), %ecx
1178	palignr	$7, %xmm1, %xmm2
1179	movaps	%xmm7, %xmm1
1180	movaps	%xmm5, 48(%edx)
1181	movaps	%xmm4, 32(%edx)
1182	movaps	%xmm3, 16(%edx)
1183	movaps	%xmm2, (%edx)
1184	lea	64(%edx), %edx
1185	jmp	L(Shl7LoopStart)
1186
1187L(Shl7LoopExit):
1188	movlpd	(%ecx), %xmm0
1189	movl	5(%ecx), %esi
1190	movlpd	%xmm0, (%edx)
1191	movl	%esi, 5(%edx)
1192	mov	$9, %esi
1193	jmp	L(CopyFrom1To16Bytes)
1194
1195	.p2align 4
1196L(Shl8):
1197	movaps	-8(%ecx), %xmm1
1198	movaps	8(%ecx), %xmm2
1199L(Shl8Start):
1200	pcmpeqb	%xmm2, %xmm0
1201	pmovmskb %xmm0, %eax
1202	movaps	%xmm2, %xmm3
1203# ifdef USE_AS_STRNCPY
1204	sub	$16, %ebx
1205	jbe	L(StrncpyExit8Case2OrCase3)
1206# endif
1207	test	%eax, %eax
1208	jnz	L(Shl8LoopExit)
1209
1210	palignr	$8, %xmm1, %xmm2
1211	movaps	%xmm3, %xmm1
1212	movaps	%xmm2, (%edx)
1213	movaps	24(%ecx), %xmm2
1214
1215	pcmpeqb	%xmm2, %xmm0
1216	lea	16(%edx), %edx
1217	pmovmskb %xmm0, %eax
1218	lea	16(%ecx), %ecx
1219	movaps	%xmm2, %xmm3
1220# ifdef USE_AS_STRNCPY
1221	sub	$16, %ebx
1222	jbe	L(StrncpyExit8Case2OrCase3)
1223# endif
1224	test	%eax, %eax
1225	jnz	L(Shl8LoopExit)
1226
1227	palignr	$8, %xmm1, %xmm2
1228	movaps	%xmm2, (%edx)
1229	movaps	24(%ecx), %xmm2
1230	movaps	%xmm3, %xmm1
1231
1232	pcmpeqb	%xmm2, %xmm0
1233	lea	16(%edx), %edx
1234	pmovmskb %xmm0, %eax
1235	lea	16(%ecx), %ecx
1236	movaps	%xmm2, %xmm3
1237# ifdef USE_AS_STRNCPY
1238	sub	$16, %ebx
1239	jbe	L(StrncpyExit8Case2OrCase3)
1240# endif
1241	test	%eax, %eax
1242	jnz	L(Shl8LoopExit)
1243
1244	palignr	$8, %xmm1, %xmm2
1245	movaps	%xmm2, (%edx)
1246	movaps	24(%ecx), %xmm2
1247
1248	pcmpeqb	%xmm2, %xmm0
1249	lea	16(%edx), %edx
1250	pmovmskb %xmm0, %eax
1251	lea	16(%ecx), %ecx
1252# ifdef USE_AS_STRNCPY
1253	sub	$16, %ebx
1254	jbe	L(StrncpyExit8Case2OrCase3)
1255# endif
1256	test	%eax, %eax
1257	jnz	L(Shl8LoopExit)
1258
1259	palignr	$8, %xmm3, %xmm2
1260	movaps	%xmm2, (%edx)
1261	lea	24(%ecx), %ecx
1262	lea	16(%edx), %edx
1263
1264	mov	%ecx, %eax
1265	and	$-0x40, %ecx
1266	sub	%ecx, %eax
1267	lea	-8(%ecx), %ecx
1268	sub	%eax, %edx
1269# ifdef USE_AS_STRNCPY
1270	add	%eax, %ebx
1271# endif
1272	movaps	-8(%ecx), %xmm1
1273
1274L(Shl8LoopStart):
1275	movaps	8(%ecx), %xmm2
1276	movaps	24(%ecx), %xmm3
1277	movaps	%xmm3, %xmm6
1278	movaps	40(%ecx), %xmm4
1279	movaps	%xmm4, %xmm7
1280	movaps	56(%ecx), %xmm5
1281	pminub	%xmm2, %xmm6
1282	pminub	%xmm5, %xmm7
1283	pminub	%xmm6, %xmm7
1284	pcmpeqb	%xmm0, %xmm7
1285	pmovmskb %xmm7, %eax
1286	movaps	%xmm5, %xmm7
1287	palignr	$8, %xmm4, %xmm5
1288	test	%eax, %eax
1289	palignr	$8, %xmm3, %xmm4
1290	jnz	L(Shl8Start)
1291# ifdef USE_AS_STRNCPY
1292	sub	$64, %ebx
1293	jbe	L(StrncpyLeave8)
1294# endif
1295	palignr	$8, %xmm2, %xmm3
1296	lea	64(%ecx), %ecx
1297	palignr	$8, %xmm1, %xmm2
1298	movaps	%xmm7, %xmm1
1299	movaps	%xmm5, 48(%edx)
1300	movaps	%xmm4, 32(%edx)
1301	movaps	%xmm3, 16(%edx)
1302	movaps	%xmm2, (%edx)
1303	lea	64(%edx), %edx
1304	jmp	L(Shl8LoopStart)
1305
1306L(Shl8LoopExit):
1307	movlpd	(%ecx), %xmm0
1308	movlpd	%xmm0, (%edx)
1309	mov	$8, %esi
1310	jmp	L(CopyFrom1To16Bytes)
1311
1312	.p2align 4
1313L(Shl9):
1314	movaps	-9(%ecx), %xmm1
1315	movaps	7(%ecx), %xmm2
1316L(Shl9Start):
1317	pcmpeqb	%xmm2, %xmm0
1318	pmovmskb %xmm0, %eax
1319	movaps	%xmm2, %xmm3
1320# ifdef USE_AS_STRNCPY
1321	sub	$16, %ebx
1322	jbe	L(StrncpyExit9Case2OrCase3)
1323# endif
1324	test	%eax, %eax
1325	jnz	L(Shl9LoopExit)
1326
1327	palignr	$9, %xmm1, %xmm2
1328	movaps	%xmm3, %xmm1
1329	movaps	%xmm2, (%edx)
1330	movaps	23(%ecx), %xmm2
1331
1332	pcmpeqb	%xmm2, %xmm0
1333	lea	16(%edx), %edx
1334	pmovmskb %xmm0, %eax
1335	lea	16(%ecx), %ecx
1336	movaps	%xmm2, %xmm3
1337# ifdef USE_AS_STRNCPY
1338	sub	$16, %ebx
1339	jbe	L(StrncpyExit9Case2OrCase3)
1340# endif
1341	test	%eax, %eax
1342	jnz	L(Shl9LoopExit)
1343
1344	palignr	$9, %xmm1, %xmm2
1345	movaps	%xmm2, (%edx)
1346	movaps	23(%ecx), %xmm2
1347	movaps	%xmm3, %xmm1
1348
1349	pcmpeqb	%xmm2, %xmm0
1350	lea	16(%edx), %edx
1351	pmovmskb %xmm0, %eax
1352	lea	16(%ecx), %ecx
1353	movaps	%xmm2, %xmm3
1354# ifdef USE_AS_STRNCPY
1355	sub	$16, %ebx
1356	jbe	L(StrncpyExit9Case2OrCase3)
1357# endif
1358	test	%eax, %eax
1359	jnz	L(Shl9LoopExit)
1360
1361	palignr	$9, %xmm1, %xmm2
1362	movaps	%xmm2, (%edx)
1363	movaps	23(%ecx), %xmm2
1364
1365	pcmpeqb	%xmm2, %xmm0
1366	lea	16(%edx), %edx
1367	pmovmskb %xmm0, %eax
1368	lea	16(%ecx), %ecx
1369# ifdef USE_AS_STRNCPY
1370	sub	$16, %ebx
1371	jbe	L(StrncpyExit9Case2OrCase3)
1372# endif
1373	test	%eax, %eax
1374	jnz	L(Shl9LoopExit)
1375
1376	palignr	$9, %xmm3, %xmm2
1377	movaps	%xmm2, (%edx)
1378	lea	23(%ecx), %ecx
1379	lea	16(%edx), %edx
1380
1381	mov	%ecx, %eax
1382	and	$-0x40, %ecx
1383	sub	%ecx, %eax
1384	lea	-7(%ecx), %ecx
1385	sub	%eax, %edx
1386# ifdef USE_AS_STRNCPY
1387	add	%eax, %ebx
1388# endif
1389	movaps	-9(%ecx), %xmm1
1390
1391L(Shl9LoopStart):
1392	movaps	7(%ecx), %xmm2
1393	movaps	23(%ecx), %xmm3
1394	movaps	%xmm3, %xmm6
1395	movaps	39(%ecx), %xmm4
1396	movaps	%xmm4, %xmm7
1397	movaps	55(%ecx), %xmm5
1398	pminub	%xmm2, %xmm6
1399	pminub	%xmm5, %xmm7
1400	pminub	%xmm6, %xmm7
1401	pcmpeqb	%xmm0, %xmm7
1402	pmovmskb %xmm7, %eax
1403	movaps	%xmm5, %xmm7
1404	palignr	$9, %xmm4, %xmm5
1405	test	%eax, %eax
1406	palignr	$9, %xmm3, %xmm4
1407	jnz	L(Shl9Start)
1408# ifdef USE_AS_STRNCPY
1409	sub	$64, %ebx
1410	jbe	L(StrncpyLeave9)
1411# endif
1412	palignr	$9, %xmm2, %xmm3
1413	lea	64(%ecx), %ecx
1414	palignr	$9, %xmm1, %xmm2
1415	movaps	%xmm7, %xmm1
1416	movaps	%xmm5, 48(%edx)
1417	movaps	%xmm4, 32(%edx)
1418	movaps	%xmm3, 16(%edx)
1419	movaps	%xmm2, (%edx)
1420	lea	64(%edx), %edx
1421	jmp	L(Shl9LoopStart)
1422
1423L(Shl9LoopExit):
1424	movlpd	-1(%ecx), %xmm0
1425	movlpd	%xmm0, -1(%edx)
1426	mov	$7, %esi
1427	jmp	L(CopyFrom1To16Bytes)
1428
1429	.p2align 4
1430L(Shl10):
1431	movaps	-10(%ecx), %xmm1
1432	movaps	6(%ecx), %xmm2
1433L(Shl10Start):
1434	pcmpeqb	%xmm2, %xmm0
1435	pmovmskb %xmm0, %eax
1436	movaps	%xmm2, %xmm3
1437# ifdef USE_AS_STRNCPY
1438	sub	$16, %ebx
1439	jbe	L(StrncpyExit10Case2OrCase3)
1440# endif
1441	test	%eax, %eax
1442	jnz	L(Shl10LoopExit)
1443
1444	palignr	$10, %xmm1, %xmm2
1445	movaps	%xmm3, %xmm1
1446	movaps	%xmm2, (%edx)
1447	movaps	22(%ecx), %xmm2
1448
1449	pcmpeqb	%xmm2, %xmm0
1450	lea	16(%edx), %edx
1451	pmovmskb %xmm0, %eax
1452	lea	16(%ecx), %ecx
1453	movaps	%xmm2, %xmm3
1454# ifdef USE_AS_STRNCPY
1455	sub	$16, %ebx
1456	jbe	L(StrncpyExit10Case2OrCase3)
1457# endif
1458	test	%eax, %eax
1459	jnz	L(Shl10LoopExit)
1460
1461	palignr	$10, %xmm1, %xmm2
1462	movaps	%xmm2, (%edx)
1463	movaps	22(%ecx), %xmm2
1464	movaps	%xmm3, %xmm1
1465
1466	pcmpeqb	%xmm2, %xmm0
1467	lea	16(%edx), %edx
1468	pmovmskb %xmm0, %eax
1469	lea	16(%ecx), %ecx
1470	movaps	%xmm2, %xmm3
1471# ifdef USE_AS_STRNCPY
1472	sub	$16, %ebx
1473	jbe	L(StrncpyExit10Case2OrCase3)
1474# endif
1475	test	%eax, %eax
1476	jnz	L(Shl10LoopExit)
1477
1478	palignr	$10, %xmm1, %xmm2
1479	movaps	%xmm2, (%edx)
1480	movaps	22(%ecx), %xmm2
1481
1482	pcmpeqb	%xmm2, %xmm0
1483	lea	16(%edx), %edx
1484	pmovmskb %xmm0, %eax
1485	lea	16(%ecx), %ecx
1486# ifdef USE_AS_STRNCPY
1487	sub	$16, %ebx
1488	jbe	L(StrncpyExit10Case2OrCase3)
1489# endif
1490	test	%eax, %eax
1491	jnz	L(Shl10LoopExit)
1492
1493	palignr	$10, %xmm3, %xmm2
1494	movaps	%xmm2, (%edx)
1495	lea	22(%ecx), %ecx
1496	lea	16(%edx), %edx
1497
1498	mov	%ecx, %eax
1499	and	$-0x40, %ecx
1500	sub	%ecx, %eax
1501	lea	-6(%ecx), %ecx
1502	sub	%eax, %edx
1503# ifdef USE_AS_STRNCPY
1504	add	%eax, %ebx
1505# endif
1506	movaps	-10(%ecx), %xmm1
1507
1508L(Shl10LoopStart):
1509	movaps	6(%ecx), %xmm2
1510	movaps	22(%ecx), %xmm3
1511	movaps	%xmm3, %xmm6
1512	movaps	38(%ecx), %xmm4
1513	movaps	%xmm4, %xmm7
1514	movaps	54(%ecx), %xmm5
1515	pminub	%xmm2, %xmm6
1516	pminub	%xmm5, %xmm7
1517	pminub	%xmm6, %xmm7
1518	pcmpeqb	%xmm0, %xmm7
1519	pmovmskb %xmm7, %eax
1520	movaps	%xmm5, %xmm7
1521	palignr	$10, %xmm4, %xmm5
1522	test	%eax, %eax
1523	palignr	$10, %xmm3, %xmm4
1524	jnz	L(Shl10Start)
1525# ifdef USE_AS_STRNCPY
1526	sub	$64, %ebx
1527	jbe	L(StrncpyLeave10)
1528# endif
1529	palignr	$10, %xmm2, %xmm3
1530	lea	64(%ecx), %ecx
1531	palignr	$10, %xmm1, %xmm2
1532	movaps	%xmm7, %xmm1
1533	movaps	%xmm5, 48(%edx)
1534	movaps	%xmm4, 32(%edx)
1535	movaps	%xmm3, 16(%edx)
1536	movaps	%xmm2, (%edx)
1537	lea	64(%edx), %edx
1538	jmp	L(Shl10LoopStart)
1539
1540L(Shl10LoopExit):
1541	movlpd	-2(%ecx), %xmm0
1542	movlpd	%xmm0, -2(%edx)
1543	mov	$6, %esi
1544	jmp	L(CopyFrom1To16Bytes)
1545
1546	.p2align 4
1547L(Shl11):
1548	movaps	-11(%ecx), %xmm1
1549	movaps	5(%ecx), %xmm2
1550L(Shl11Start):
1551	pcmpeqb	%xmm2, %xmm0
1552	pmovmskb %xmm0, %eax
1553	movaps	%xmm2, %xmm3
1554# ifdef USE_AS_STRNCPY
1555	sub	$16, %ebx
1556	jbe	L(StrncpyExit11Case2OrCase3)
1557# endif
1558	test	%eax, %eax
1559	jnz	L(Shl11LoopExit)
1560
1561	palignr	$11, %xmm1, %xmm2
1562	movaps	%xmm3, %xmm1
1563	movaps	%xmm2, (%edx)
1564	movaps	21(%ecx), %xmm2
1565
1566	pcmpeqb	%xmm2, %xmm0
1567	lea	16(%edx), %edx
1568	pmovmskb %xmm0, %eax
1569	lea	16(%ecx), %ecx
1570	movaps	%xmm2, %xmm3
1571# ifdef USE_AS_STRNCPY
1572	sub	$16, %ebx
1573	jbe	L(StrncpyExit11Case2OrCase3)
1574# endif
1575	test	%eax, %eax
1576	jnz	L(Shl11LoopExit)
1577
1578	palignr	$11, %xmm1, %xmm2
1579	movaps	%xmm2, (%edx)
1580	movaps	21(%ecx), %xmm2
1581	movaps	%xmm3, %xmm1
1582
1583	pcmpeqb	%xmm2, %xmm0
1584	lea	16(%edx), %edx
1585	pmovmskb %xmm0, %eax
1586	lea	16(%ecx), %ecx
1587	movaps	%xmm2, %xmm3
1588# ifdef USE_AS_STRNCPY
1589	sub	$16, %ebx
1590	jbe	L(StrncpyExit11Case2OrCase3)
1591# endif
1592	test	%eax, %eax
1593	jnz	L(Shl11LoopExit)
1594
1595	palignr	$11, %xmm1, %xmm2
1596	movaps	%xmm2, (%edx)
1597	movaps	21(%ecx), %xmm2
1598
1599	pcmpeqb	%xmm2, %xmm0
1600	lea	16(%edx), %edx
1601	pmovmskb %xmm0, %eax
1602	lea	16(%ecx), %ecx
1603# ifdef USE_AS_STRNCPY
1604	sub	$16, %ebx
1605	jbe	L(StrncpyExit11Case2OrCase3)
1606# endif
1607	test	%eax, %eax
1608	jnz	L(Shl11LoopExit)
1609
1610	palignr	$11, %xmm3, %xmm2
1611	movaps	%xmm2, (%edx)
1612	lea	21(%ecx), %ecx
1613	lea	16(%edx), %edx
1614
1615	mov	%ecx, %eax
1616	and	$-0x40, %ecx
1617	sub	%ecx, %eax
1618	lea	-5(%ecx), %ecx
1619	sub	%eax, %edx
1620# ifdef USE_AS_STRNCPY
1621	add	%eax, %ebx
1622# endif
1623	movaps	-11(%ecx), %xmm1
1624
1625L(Shl11LoopStart):
1626	movaps	5(%ecx), %xmm2
1627	movaps	21(%ecx), %xmm3
1628	movaps	%xmm3, %xmm6
1629	movaps	37(%ecx), %xmm4
1630	movaps	%xmm4, %xmm7
1631	movaps	53(%ecx), %xmm5
1632	pminub	%xmm2, %xmm6
1633	pminub	%xmm5, %xmm7
1634	pminub	%xmm6, %xmm7
1635	pcmpeqb	%xmm0, %xmm7
1636	pmovmskb %xmm7, %eax
1637	movaps	%xmm5, %xmm7
1638	palignr	$11, %xmm4, %xmm5
1639	test	%eax, %eax
1640	palignr	$11, %xmm3, %xmm4
1641	jnz	L(Shl11Start)
1642# ifdef USE_AS_STRNCPY
1643	sub	$64, %ebx
1644	jbe	L(StrncpyLeave11)
1645# endif
1646	palignr	$11, %xmm2, %xmm3
1647	lea	64(%ecx), %ecx
1648	palignr	$11, %xmm1, %xmm2
1649	movaps	%xmm7, %xmm1
1650	movaps	%xmm5, 48(%edx)
1651	movaps	%xmm4, 32(%edx)
1652	movaps	%xmm3, 16(%edx)
1653	movaps	%xmm2, (%edx)
1654	lea	64(%edx), %edx
1655	jmp	L(Shl11LoopStart)
1656
1657L(Shl11LoopExit):
1658	movlpd	-3(%ecx), %xmm0
1659	movlpd	%xmm0, -3(%edx)
1660	mov	$5, %esi
1661	jmp	L(CopyFrom1To16Bytes)
1662
1663	.p2align 4
1664L(Shl12):
1665	movaps	-12(%ecx), %xmm1
1666	movaps	4(%ecx), %xmm2
1667L(Shl12Start):
1668	pcmpeqb	%xmm2, %xmm0
1669	pmovmskb %xmm0, %eax
1670	movaps	%xmm2, %xmm3
1671# ifdef USE_AS_STRNCPY
1672	sub	$16, %ebx
1673	jbe	L(StrncpyExit12Case2OrCase3)
1674# endif
1675	test	%eax, %eax
1676	jnz	L(Shl12LoopExit)
1677
1678	palignr	$12, %xmm1, %xmm2
1679	movaps	%xmm3, %xmm1
1680	movaps	%xmm2, (%edx)
1681	movaps	20(%ecx), %xmm2
1682
1683	pcmpeqb	%xmm2, %xmm0
1684	lea	16(%edx), %edx
1685	pmovmskb %xmm0, %eax
1686	lea	16(%ecx), %ecx
1687	movaps	%xmm2, %xmm3
1688# ifdef USE_AS_STRNCPY
1689	sub	$16, %ebx
1690	jbe	L(StrncpyExit12Case2OrCase3)
1691# endif
1692	test	%eax, %eax
1693	jnz	L(Shl12LoopExit)
1694
1695	palignr	$12, %xmm1, %xmm2
1696	movaps	%xmm2, (%edx)
1697	movaps	20(%ecx), %xmm2
1698	movaps	%xmm3, %xmm1
1699
1700	pcmpeqb	%xmm2, %xmm0
1701	lea	16(%edx), %edx
1702	pmovmskb %xmm0, %eax
1703	lea	16(%ecx), %ecx
1704	movaps	%xmm2, %xmm3
1705# ifdef USE_AS_STRNCPY
1706	sub	$16, %ebx
1707	jbe	L(StrncpyExit12Case2OrCase3)
1708# endif
1709	test	%eax, %eax
1710	jnz	L(Shl12LoopExit)
1711
1712	palignr	$12, %xmm1, %xmm2
1713	movaps	%xmm2, (%edx)
1714	movaps	20(%ecx), %xmm2
1715
1716	pcmpeqb	%xmm2, %xmm0
1717	lea	16(%edx), %edx
1718	pmovmskb %xmm0, %eax
1719	lea	16(%ecx), %ecx
1720# ifdef USE_AS_STRNCPY
1721	sub	$16, %ebx
1722	jbe	L(StrncpyExit12Case2OrCase3)
1723# endif
1724	test	%eax, %eax
1725	jnz	L(Shl12LoopExit)
1726
1727	palignr	$12, %xmm3, %xmm2
1728	movaps	%xmm2, (%edx)
1729	lea	20(%ecx), %ecx
1730	lea	16(%edx), %edx
1731
1732	mov	%ecx, %eax
1733	and	$-0x40, %ecx
1734	sub	%ecx, %eax
1735	lea	-4(%ecx), %ecx
1736	sub	%eax, %edx
1737# ifdef USE_AS_STRNCPY
1738	add	%eax, %ebx
1739# endif
1740	movaps	-12(%ecx), %xmm1
1741
1742L(Shl12LoopStart):
1743	movaps	4(%ecx), %xmm2
1744	movaps	20(%ecx), %xmm3
1745	movaps	%xmm3, %xmm6
1746	movaps	36(%ecx), %xmm4
1747	movaps	%xmm4, %xmm7
1748	movaps	52(%ecx), %xmm5
1749	pminub	%xmm2, %xmm6
1750	pminub	%xmm5, %xmm7
1751	pminub	%xmm6, %xmm7
1752	pcmpeqb	%xmm0, %xmm7
1753	pmovmskb %xmm7, %eax
1754	movaps	%xmm5, %xmm7
1755	palignr	$12, %xmm4, %xmm5
1756	test	%eax, %eax
1757	palignr	$12, %xmm3, %xmm4
1758	jnz	L(Shl12Start)
1759# ifdef USE_AS_STRNCPY
1760	sub	$64, %ebx
1761	jbe	L(StrncpyLeave12)
1762# endif
1763	palignr	$12, %xmm2, %xmm3
1764	lea	64(%ecx), %ecx
1765	palignr	$12, %xmm1, %xmm2
1766	movaps	%xmm7, %xmm1
1767	movaps	%xmm5, 48(%edx)
1768	movaps	%xmm4, 32(%edx)
1769	movaps	%xmm3, 16(%edx)
1770	movaps	%xmm2, (%edx)
1771	lea	64(%edx), %edx
1772	jmp	L(Shl12LoopStart)
1773
1774L(Shl12LoopExit):
1775	movl	(%ecx), %esi
1776	movl	%esi, (%edx)
1777	mov	$4, %esi
1778	jmp	L(CopyFrom1To16Bytes)
1779
1780	.p2align 4
1781L(Shl13):
1782	movaps	-13(%ecx), %xmm1
1783	movaps	3(%ecx), %xmm2
1784L(Shl13Start):
1785	pcmpeqb	%xmm2, %xmm0
1786	pmovmskb %xmm0, %eax
1787	movaps	%xmm2, %xmm3
1788# ifdef USE_AS_STRNCPY
1789	sub	$16, %ebx
1790	jbe	L(StrncpyExit13Case2OrCase3)
1791# endif
1792	test	%eax, %eax
1793	jnz	L(Shl13LoopExit)
1794
1795	palignr	$13, %xmm1, %xmm2
1796	movaps	%xmm3, %xmm1
1797	movaps	%xmm2, (%edx)
1798	movaps	19(%ecx), %xmm2
1799
1800	pcmpeqb	%xmm2, %xmm0
1801	lea	16(%edx), %edx
1802	pmovmskb %xmm0, %eax
1803	lea	16(%ecx), %ecx
1804	movaps	%xmm2, %xmm3
1805# ifdef USE_AS_STRNCPY
1806	sub	$16, %ebx
1807	jbe	L(StrncpyExit13Case2OrCase3)
1808# endif
1809	test	%eax, %eax
1810	jnz	L(Shl13LoopExit)
1811
1812	palignr	$13, %xmm1, %xmm2
1813	movaps	%xmm2, (%edx)
1814	movaps	19(%ecx), %xmm2
1815	movaps	%xmm3, %xmm1
1816
1817	pcmpeqb	%xmm2, %xmm0
1818	lea	16(%edx), %edx
1819	pmovmskb %xmm0, %eax
1820	lea	16(%ecx), %ecx
1821	movaps	%xmm2, %xmm3
1822# ifdef USE_AS_STRNCPY
1823	sub	$16, %ebx
1824	jbe	L(StrncpyExit13Case2OrCase3)
1825# endif
1826	test	%eax, %eax
1827	jnz	L(Shl13LoopExit)
1828
1829	palignr	$13, %xmm1, %xmm2
1830	movaps	%xmm2, (%edx)
1831	movaps	19(%ecx), %xmm2
1832
1833	pcmpeqb	%xmm2, %xmm0
1834	lea	16(%edx), %edx
1835	pmovmskb %xmm0, %eax
1836	lea	16(%ecx), %ecx
1837# ifdef USE_AS_STRNCPY
1838	sub	$16, %ebx
1839	jbe	L(StrncpyExit13Case2OrCase3)
1840# endif
1841	test	%eax, %eax
1842	jnz	L(Shl13LoopExit)
1843
1844	palignr	$13, %xmm3, %xmm2
1845	movaps	%xmm2, (%edx)
1846	lea	19(%ecx), %ecx
1847	lea	16(%edx), %edx
1848
1849	mov	%ecx, %eax
1850	and	$-0x40, %ecx
1851	sub	%ecx, %eax
1852	lea	-3(%ecx), %ecx
1853	sub	%eax, %edx
1854# ifdef USE_AS_STRNCPY
1855	add	%eax, %ebx
1856# endif
1857	movaps	-13(%ecx), %xmm1
1858
1859L(Shl13LoopStart):
1860	movaps	3(%ecx), %xmm2
1861	movaps	19(%ecx), %xmm3
1862	movaps	%xmm3, %xmm6
1863	movaps	35(%ecx), %xmm4
1864	movaps	%xmm4, %xmm7
1865	movaps	51(%ecx), %xmm5
1866	pminub	%xmm2, %xmm6
1867	pminub	%xmm5, %xmm7
1868	pminub	%xmm6, %xmm7
1869	pcmpeqb	%xmm0, %xmm7
1870	pmovmskb %xmm7, %eax
1871	movaps	%xmm5, %xmm7
1872	palignr	$13, %xmm4, %xmm5
1873	test	%eax, %eax
1874	palignr	$13, %xmm3, %xmm4
1875	jnz	L(Shl13Start)
1876# ifdef USE_AS_STRNCPY
1877	sub	$64, %ebx
1878	jbe	L(StrncpyLeave13)
1879# endif
1880	palignr	$13, %xmm2, %xmm3
1881	lea	64(%ecx), %ecx
1882	palignr	$13, %xmm1, %xmm2
1883	movaps	%xmm7, %xmm1
1884	movaps	%xmm5, 48(%edx)
1885	movaps	%xmm4, 32(%edx)
1886	movaps	%xmm3, 16(%edx)
1887	movaps	%xmm2, (%edx)
1888	lea	64(%edx), %edx
1889	jmp	L(Shl13LoopStart)
1890
1891L(Shl13LoopExit):
1892	movl	-1(%ecx), %esi
1893	movl	%esi, -1(%edx)
1894	mov	$3, %esi
1895	jmp	L(CopyFrom1To16Bytes)
1896
1897	.p2align 4
1898L(Shl14):
1899	movaps	-14(%ecx), %xmm1
1900	movaps	2(%ecx), %xmm2
1901L(Shl14Start):
1902	pcmpeqb	%xmm2, %xmm0
1903	pmovmskb %xmm0, %eax
1904	movaps	%xmm2, %xmm3
1905# ifdef USE_AS_STRNCPY
1906	sub	$16, %ebx
1907	jbe	L(StrncpyExit14Case2OrCase3)
1908# endif
1909	test	%eax, %eax
1910	jnz	L(Shl14LoopExit)
1911
1912	palignr	$14, %xmm1, %xmm2
1913	movaps	%xmm3, %xmm1
1914	movaps	%xmm2, (%edx)
1915	movaps	18(%ecx), %xmm2
1916
1917	pcmpeqb	%xmm2, %xmm0
1918	lea	16(%edx), %edx
1919	pmovmskb %xmm0, %eax
1920	lea	16(%ecx), %ecx
1921	movaps	%xmm2, %xmm3
1922# ifdef USE_AS_STRNCPY
1923	sub	$16, %ebx
1924	jbe	L(StrncpyExit14Case2OrCase3)
1925# endif
1926	test	%eax, %eax
1927	jnz	L(Shl14LoopExit)
1928
1929	palignr	$14, %xmm1, %xmm2
1930	movaps	%xmm2, (%edx)
1931	movaps	18(%ecx), %xmm2
1932	movaps	%xmm3, %xmm1
1933
1934	pcmpeqb	%xmm2, %xmm0
1935	lea	16(%edx), %edx
1936	pmovmskb %xmm0, %eax
1937	lea	16(%ecx), %ecx
1938	movaps	%xmm2, %xmm3
1939# ifdef USE_AS_STRNCPY
1940	sub	$16, %ebx
1941	jbe	L(StrncpyExit14Case2OrCase3)
1942# endif
1943	test	%eax, %eax
1944	jnz	L(Shl14LoopExit)
1945
1946	palignr	$14, %xmm1, %xmm2
1947	movaps	%xmm2, (%edx)
1948	movaps	18(%ecx), %xmm2
1949
1950	pcmpeqb	%xmm2, %xmm0
1951	lea	16(%edx), %edx
1952	pmovmskb %xmm0, %eax
1953	lea	16(%ecx), %ecx
1954# ifdef USE_AS_STRNCPY
1955	sub	$16, %ebx
1956	jbe	L(StrncpyExit14Case2OrCase3)
1957# endif
1958	test	%eax, %eax
1959	jnz	L(Shl14LoopExit)
1960
1961	palignr	$14, %xmm3, %xmm2
1962	movaps	%xmm2, (%edx)
1963	lea	18(%ecx), %ecx
1964	lea	16(%edx), %edx
1965
1966	mov	%ecx, %eax
1967	and	$-0x40, %ecx
1968	sub	%ecx, %eax
1969	lea	-2(%ecx), %ecx
1970	sub	%eax, %edx
1971# ifdef USE_AS_STRNCPY
1972	add	%eax, %ebx
1973# endif
1974	movaps	-14(%ecx), %xmm1
1975
1976L(Shl14LoopStart):
1977	movaps	2(%ecx), %xmm2
1978	movaps	18(%ecx), %xmm3
1979	movaps	%xmm3, %xmm6
1980	movaps	34(%ecx), %xmm4
1981	movaps	%xmm4, %xmm7
1982	movaps	50(%ecx), %xmm5
1983	pminub	%xmm2, %xmm6
1984	pminub	%xmm5, %xmm7
1985	pminub	%xmm6, %xmm7
1986	pcmpeqb	%xmm0, %xmm7
1987	pmovmskb %xmm7, %eax
1988	movaps	%xmm5, %xmm7
1989	palignr	$14, %xmm4, %xmm5
1990	test	%eax, %eax
1991	palignr	$14, %xmm3, %xmm4
1992	jnz	L(Shl14Start)
1993# ifdef USE_AS_STRNCPY
1994	sub	$64, %ebx
1995	jbe	L(StrncpyLeave14)
1996# endif
1997	palignr	$14, %xmm2, %xmm3
1998	lea	64(%ecx), %ecx
1999	palignr	$14, %xmm1, %xmm2
2000	movaps	%xmm7, %xmm1
2001	movaps	%xmm5, 48(%edx)
2002	movaps	%xmm4, 32(%edx)
2003	movaps	%xmm3, 16(%edx)
2004	movaps	%xmm2, (%edx)
2005	lea	64(%edx), %edx
2006	jmp	L(Shl14LoopStart)
2007
2008L(Shl14LoopExit):
2009	movl	-2(%ecx), %esi
2010	movl	%esi, -2(%edx)
2011	mov	$2, %esi
2012	jmp	L(CopyFrom1To16Bytes)
2013
2014	.p2align 4
2015L(Shl15):
2016	movaps	-15(%ecx), %xmm1
2017	movaps	1(%ecx), %xmm2
2018L(Shl15Start):
2019	pcmpeqb	%xmm2, %xmm0
2020	pmovmskb %xmm0, %eax
2021	movaps	%xmm2, %xmm3
2022# ifdef USE_AS_STRNCPY
2023	sub	$16, %ebx
2024	jbe	L(StrncpyExit15Case2OrCase3)
2025# endif
2026	test	%eax, %eax
2027	jnz	L(Shl15LoopExit)
2028
2029	palignr	$15, %xmm1, %xmm2
2030	movaps	%xmm3, %xmm1
2031	movaps	%xmm2, (%edx)
2032	movaps	17(%ecx), %xmm2
2033
2034	pcmpeqb	%xmm2, %xmm0
2035	lea	16(%edx), %edx
2036	pmovmskb %xmm0, %eax
2037	lea	16(%ecx), %ecx
2038	movaps	%xmm2, %xmm3
2039# ifdef USE_AS_STRNCPY
2040	sub	$16, %ebx
2041	jbe	L(StrncpyExit15Case2OrCase3)
2042# endif
2043	test	%eax, %eax
2044	jnz	L(Shl15LoopExit)
2045
2046	palignr	$15, %xmm1, %xmm2
2047	movaps	%xmm2, (%edx)
2048	movaps	17(%ecx), %xmm2
2049	movaps	%xmm3, %xmm1
2050
2051	pcmpeqb	%xmm2, %xmm0
2052	lea	16(%edx), %edx
2053	pmovmskb %xmm0, %eax
2054	lea	16(%ecx), %ecx
2055	movaps	%xmm2, %xmm3
2056# ifdef USE_AS_STRNCPY
2057	sub	$16, %ebx
2058	jbe	L(StrncpyExit15Case2OrCase3)
2059# endif
2060	test	%eax, %eax
2061	jnz	L(Shl15LoopExit)
2062
2063	palignr	$15, %xmm1, %xmm2
2064	movaps	%xmm2, (%edx)
2065	movaps	17(%ecx), %xmm2
2066
2067	pcmpeqb	%xmm2, %xmm0
2068	lea	16(%edx), %edx
2069	pmovmskb %xmm0, %eax
2070	lea	16(%ecx), %ecx
2071# ifdef USE_AS_STRNCPY
2072	sub	$16, %ebx
2073	jbe	L(StrncpyExit15Case2OrCase3)
2074# endif
2075	test	%eax, %eax
2076	jnz	L(Shl15LoopExit)
2077
2078	palignr	$15, %xmm3, %xmm2
2079	movaps	%xmm2, (%edx)
2080	lea	17(%ecx), %ecx
2081	lea	16(%edx), %edx
2082
2083	mov	%ecx, %eax
2084	and	$-0x40, %ecx
2085	sub	%ecx, %eax
2086	lea	-1(%ecx), %ecx
2087	sub	%eax, %edx
2088# ifdef USE_AS_STRNCPY
2089	add	%eax, %ebx
2090# endif
2091	movaps	-15(%ecx), %xmm1
2092
2093L(Shl15LoopStart):
2094	movaps	1(%ecx), %xmm2
2095	movaps	17(%ecx), %xmm3
2096	movaps	%xmm3, %xmm6
2097	movaps	33(%ecx), %xmm4
2098	movaps	%xmm4, %xmm7
2099	movaps	49(%ecx), %xmm5
2100	pminub	%xmm2, %xmm6
2101	pminub	%xmm5, %xmm7
2102	pminub	%xmm6, %xmm7
2103	pcmpeqb	%xmm0, %xmm7
2104	pmovmskb %xmm7, %eax
2105	movaps	%xmm5, %xmm7
2106	palignr	$15, %xmm4, %xmm5
2107	test	%eax, %eax
2108	palignr	$15, %xmm3, %xmm4
2109	jnz	L(Shl15Start)
2110# ifdef USE_AS_STRNCPY
2111	sub	$64, %ebx
2112	jbe	L(StrncpyLeave15)
2113# endif
2114	palignr	$15, %xmm2, %xmm3
2115	lea	64(%ecx), %ecx
2116	palignr	$15, %xmm1, %xmm2
2117	movaps	%xmm7, %xmm1
2118	movaps	%xmm5, 48(%edx)
2119	movaps	%xmm4, 32(%edx)
2120	movaps	%xmm3, 16(%edx)
2121	movaps	%xmm2, (%edx)
2122	lea	64(%edx), %edx
2123	jmp	L(Shl15LoopStart)
2124
2125L(Shl15LoopExit):
2126	movl	-3(%ecx), %esi
2127	movl	%esi, -3(%edx)
2128	mov	$1, %esi
2129# ifdef USE_AS_STRCAT
2130	jmp	L(CopyFrom1To16Bytes)
2131# endif
2132
2133
2134# ifndef USE_AS_STRCAT
2135
2136	.p2align 4
2137L(CopyFrom1To16Bytes):
2138#  ifdef USE_AS_STRNCPY
2139	add	$16, %ebx
2140#  endif
2141	add	%esi, %edx
2142	add	%esi, %ecx
2143
2144	POP	(%esi)
2145	test	%al, %al
2146	jz	L(ExitHigh8)
2147
2148L(CopyFrom1To16BytesLess8):
2149	mov	%al, %ah
2150	and	$15, %ah
2151	jz	L(ExitHigh4)
2152
2153	test	$0x01, %al
2154	jnz	L(Exit1)
2155	test	$0x02, %al
2156	jnz	L(Exit2)
2157	test	$0x04, %al
2158	jnz	L(Exit3)
2159
2160	.p2align 4
2161L(Exit4):
2162	movl	(%ecx), %eax
2163	movl	%eax, (%edx)
2164	SAVE_RESULT	(3)
2165#  ifdef USE_AS_STRNCPY
2166	sub	$4, %ebx
2167	lea	4(%edx), %ecx
2168	jnz	L(StrncpyFillTailWithZero1)
2169#   ifdef USE_AS_STPCPY
2170	cmpb	$1, (%eax)
2171	sbb	$-1, %eax
2172#   endif
2173#  endif
2174	RETURN1
2175
2176	.p2align 4
2177L(ExitHigh4):
2178	test	$0x10, %al
2179	jnz	L(Exit5)
2180	test	$0x20, %al
2181	jnz	L(Exit6)
2182	test	$0x40, %al
2183	jnz	L(Exit7)
2184
2185	.p2align 4
2186L(Exit8):
2187	movlpd	(%ecx), %xmm0
2188	movlpd	%xmm0, (%edx)
2189	SAVE_RESULT	(7)
2190#  ifdef USE_AS_STRNCPY
2191	sub	$8, %ebx
2192	lea	8(%edx), %ecx
2193	jnz	L(StrncpyFillTailWithZero1)
2194#   ifdef USE_AS_STPCPY
2195	cmpb	$1, (%eax)
2196	sbb	$-1, %eax
2197#   endif
2198#  endif
2199	RETURN1
2200
2201	.p2align 4
2202L(ExitHigh8):
2203	mov	%ah, %al
2204	and	$15, %al
2205	jz	L(ExitHigh12)
2206
2207	test	$0x01, %ah
2208	jnz	L(Exit9)
2209	test	$0x02, %ah
2210	jnz	L(Exit10)
2211	test	$0x04, %ah
2212	jnz	L(Exit11)
2213
2214	.p2align 4
2215L(Exit12):
2216	movlpd	(%ecx), %xmm0
2217	movl	8(%ecx), %eax
2218	movlpd	%xmm0, (%edx)
2219	movl	%eax, 8(%edx)
2220	SAVE_RESULT	(11)
2221#  ifdef USE_AS_STRNCPY
2222	sub	$12, %ebx
2223	lea	12(%edx), %ecx
2224	jnz	L(StrncpyFillTailWithZero1)
2225#   ifdef USE_AS_STPCPY
2226	cmpb	$1, (%eax)
2227	sbb	$-1, %eax
2228#   endif
2229#  endif
2230	RETURN1
2231
2232	.p2align 4
2233L(ExitHigh12):
2234	test	$0x10, %ah
2235	jnz	L(Exit13)
2236	test	$0x20, %ah
2237	jnz	L(Exit14)
2238	test	$0x40, %ah
2239	jnz	L(Exit15)
2240
2241	.p2align 4
2242L(Exit16):
2243	movdqu	(%ecx), %xmm0
2244	movdqu	%xmm0, (%edx)
2245	SAVE_RESULT	(15)
2246#  ifdef USE_AS_STRNCPY
2247	sub	$16, %ebx
2248	lea	16(%edx), %ecx
2249	jnz	L(StrncpyFillTailWithZero1)
2250#   ifdef USE_AS_STPCPY
2251	cmpb	$1, (%eax)
2252	sbb	$-1, %eax
2253#   endif
2254#  endif
2255	RETURN1
2256
2257#   ifdef USE_AS_STRNCPY
2258
2259	CFI_PUSH(%esi)
2260
2261	.p2align 4
2262L(CopyFrom1To16BytesCase2):
2263	add	$16, %ebx
2264	add	%esi, %ecx
2265	add	%esi, %edx
2266
2267	POP	(%esi)
2268
2269	test	%al, %al
2270	jz	L(ExitHighCase2)
2271
2272	cmp	$8, %ebx
2273	ja	L(CopyFrom1To16BytesLess8)
2274
2275	test	$0x01, %al
2276	jnz	L(Exit1)
2277	cmp	$1, %ebx
2278	je	L(Exit1)
2279	test	$0x02, %al
2280	jnz	L(Exit2)
2281	cmp	$2, %ebx
2282	je	L(Exit2)
2283	test	$0x04, %al
2284	jnz	L(Exit3)
2285	cmp	$3, %ebx
2286	je	L(Exit3)
2287	test	$0x08, %al
2288	jnz	L(Exit4)
2289	cmp	$4, %ebx
2290	je	L(Exit4)
2291	test	$0x10, %al
2292	jnz	L(Exit5)
2293	cmp	$5, %ebx
2294	je	L(Exit5)
2295	test	$0x20, %al
2296	jnz	L(Exit6)
2297	cmp	$6, %ebx
2298	je	L(Exit6)
2299	test	$0x40, %al
2300	jnz	L(Exit7)
2301	cmp	$7, %ebx
2302	je	L(Exit7)
2303	jmp	L(Exit8)
2304
2305	.p2align 4
2306L(ExitHighCase2):
2307	cmp	$8, %ebx
2308	jbe	L(CopyFrom1To16BytesLess8Case3)
2309
2310	test	$0x01, %ah
2311	jnz	L(Exit9)
2312	cmp	$9, %ebx
2313	je	L(Exit9)
2314	test	$0x02, %ah
2315	jnz	L(Exit10)
2316	cmp	$10, %ebx
2317	je	L(Exit10)
2318	test	$0x04, %ah
2319	jnz	L(Exit11)
2320	cmp	$11, %ebx
2321	je	L(Exit11)
2322	test	$0x8, %ah
2323	jnz	L(Exit12)
2324	cmp	$12, %ebx
2325	je	L(Exit12)
2326	test	$0x10, %ah
2327	jnz	L(Exit13)
2328	cmp	$13, %ebx
2329	je	L(Exit13)
2330	test	$0x20, %ah
2331	jnz	L(Exit14)
2332	cmp	$14, %ebx
2333	je	L(Exit14)
2334	test	$0x40, %ah
2335	jnz	L(Exit15)
2336	cmp	$15, %ebx
2337	je	L(Exit15)
2338	jmp	L(Exit16)
2339
2340	CFI_PUSH(%esi)
2341
2342	.p2align 4
2343L(CopyFrom1To16BytesCase2OrCase3):
2344	test	%eax, %eax
2345	jnz	L(CopyFrom1To16BytesCase2)
2346
2347	.p2align 4
2348L(CopyFrom1To16BytesCase3):
2349	add	$16, %ebx
2350	add	%esi, %edx
2351	add	%esi, %ecx
2352
2353	POP	(%esi)
2354
2355	cmp	$8, %ebx
2356	ja	L(ExitHigh8Case3)
2357
2358L(CopyFrom1To16BytesLess8Case3):
2359	cmp	$4, %ebx
2360	ja	L(ExitHigh4Case3)
2361
2362	cmp	$1, %ebx
2363	je	L(Exit1)
2364	cmp	$2, %ebx
2365	je	L(Exit2)
2366	cmp	$3, %ebx
2367	je	L(Exit3)
2368	movl	(%ecx), %eax
2369	movl	%eax, (%edx)
2370	SAVE_RESULT	(4)
2371	RETURN1
2372
2373	.p2align 4
2374L(ExitHigh4Case3):
2375	cmp	$5, %ebx
2376	je	L(Exit5)
2377	cmp	$6, %ebx
2378	je	L(Exit6)
2379	cmp	$7, %ebx
2380	je	L(Exit7)
2381	movlpd	(%ecx), %xmm0
2382	movlpd	%xmm0, (%edx)
2383	SAVE_RESULT	(8)
2384	RETURN1
2385
2386	.p2align 4
2387L(ExitHigh8Case3):
2388	cmp	$12, %ebx
2389	ja	L(ExitHigh12Case3)
2390
2391	cmp	$9, %ebx
2392	je	L(Exit9)
2393	cmp	$10, %ebx
2394	je	L(Exit10)
2395	cmp	$11, %ebx
2396	je	L(Exit11)
2397	movlpd	(%ecx), %xmm0
2398	movl	8(%ecx), %eax
2399	movlpd	%xmm0, (%edx)
2400	movl	%eax, 8(%edx)
2401	SAVE_RESULT	(12)
2402	RETURN1
2403
2404	.p2align 4
2405L(ExitHigh12Case3):
2406	cmp	$13, %ebx
2407	je	L(Exit13)
2408	cmp	$14, %ebx
2409	je	L(Exit14)
2410	cmp	$15, %ebx
2411	je	L(Exit15)
2412	movlpd	(%ecx), %xmm0
2413	movlpd	8(%ecx), %xmm1
2414	movlpd	%xmm0, (%edx)
2415	movlpd	%xmm1, 8(%edx)
2416	SAVE_RESULT	(16)
2417	RETURN1
2418
2419#  endif
2420
2421	.p2align 4
2422L(Exit1):
2423	movb	(%ecx), %al
2424	movb	%al, (%edx)
2425	SAVE_RESULT	(0)
2426#  ifdef USE_AS_STRNCPY
2427	sub	$1, %ebx
2428	lea	1(%edx), %ecx
2429	jnz	L(StrncpyFillTailWithZero1)
2430#   ifdef USE_AS_STPCPY
2431	cmpb	$1, (%eax)
2432	sbb	$-1, %eax
2433#   endif
2434#  endif
2435	RETURN1
2436
2437	.p2align 4
2438L(Exit2):
2439	movw	(%ecx), %ax
2440	movw	%ax, (%edx)
2441	SAVE_RESULT	(1)
2442#  ifdef USE_AS_STRNCPY
2443	sub	$2, %ebx
2444	lea	2(%edx), %ecx
2445	jnz	L(StrncpyFillTailWithZero1)
2446#   ifdef USE_AS_STPCPY
2447	cmpb	$1, (%eax)
2448	sbb	$-1, %eax
2449#   endif
2450#  endif
2451	RETURN1
2452
2453	.p2align 4
2454L(Exit3):
2455	movw	(%ecx), %ax
2456	movw	%ax, (%edx)
2457	movb	2(%ecx), %al
2458	movb	%al, 2(%edx)
2459	SAVE_RESULT	(2)
2460#  ifdef USE_AS_STRNCPY
2461	sub	$3, %ebx
2462	lea	3(%edx), %ecx
2463	jnz	L(StrncpyFillTailWithZero1)
2464#   ifdef USE_AS_STPCPY
2465	cmpb	$1, (%eax)
2466	sbb	$-1, %eax
2467#   endif
2468#  endif
2469	RETURN1
2470
2471	.p2align 4
2472L(Exit5):
2473	movl	(%ecx), %eax
2474	movl	%eax, (%edx)
2475	movb	4(%ecx), %al
2476	movb	%al, 4(%edx)
2477	SAVE_RESULT	(4)
2478#  ifdef USE_AS_STRNCPY
2479	sub	$5, %ebx
2480	lea	5(%edx), %ecx
2481	jnz	L(StrncpyFillTailWithZero1)
2482#   ifdef USE_AS_STPCPY
2483	cmpb	$1, (%eax)
2484	sbb	$-1, %eax
2485#   endif
2486#  endif
2487	RETURN1
2488
2489	.p2align 4
2490L(Exit6):
2491	movl	(%ecx), %eax
2492	movl	%eax, (%edx)
2493	movw	4(%ecx), %ax
2494	movw	%ax, 4(%edx)
2495	SAVE_RESULT	(5)
2496#  ifdef USE_AS_STRNCPY
2497	sub	$6, %ebx
2498	lea	6(%edx), %ecx
2499	jnz	L(StrncpyFillTailWithZero1)
2500#   ifdef USE_AS_STPCPY
2501	cmpb	$1, (%eax)
2502	sbb	$-1, %eax
2503#   endif
2504#  endif
2505	RETURN1
2506
2507	.p2align 4
2508L(Exit7):
2509	movl	(%ecx), %eax
2510	movl	%eax, (%edx)
2511	movl	3(%ecx), %eax
2512	movl	%eax, 3(%edx)
2513	SAVE_RESULT	(6)
2514#  ifdef USE_AS_STRNCPY
2515	sub	$7, %ebx
2516	lea	7(%edx), %ecx
2517	jnz	L(StrncpyFillTailWithZero1)
2518#   ifdef USE_AS_STPCPY
2519	cmpb	$1, (%eax)
2520	sbb	$-1, %eax
2521#   endif
2522#  endif
2523	RETURN1
2524
2525	.p2align 4
2526L(Exit9):
2527	movlpd	(%ecx), %xmm0
2528	movb	8(%ecx), %al
2529	movlpd	%xmm0, (%edx)
2530	movb	%al, 8(%edx)
2531	SAVE_RESULT	(8)
2532#  ifdef USE_AS_STRNCPY
2533	sub	$9, %ebx
2534	lea	9(%edx), %ecx
2535	jnz	L(StrncpyFillTailWithZero1)
2536#   ifdef USE_AS_STPCPY
2537	cmpb	$1, (%eax)
2538	sbb	$-1, %eax
2539#   endif
2540#  endif
2541	RETURN1
2542
2543	.p2align 4
2544L(Exit10):
2545	movlpd	(%ecx), %xmm0
2546	movw	8(%ecx), %ax
2547	movlpd	%xmm0, (%edx)
2548	movw	%ax, 8(%edx)
2549	SAVE_RESULT	(9)
2550#  ifdef USE_AS_STRNCPY
2551	sub	$10, %ebx
2552	lea	10(%edx), %ecx
2553	jnz	L(StrncpyFillTailWithZero1)
2554#   ifdef USE_AS_STPCPY
2555	cmpb	$1, (%eax)
2556	sbb	$-1, %eax
2557#   endif
2558#  endif
2559	RETURN1
2560
2561	.p2align 4
2562L(Exit11):
2563	movlpd	(%ecx), %xmm0
2564	movl	7(%ecx), %eax
2565	movlpd	%xmm0, (%edx)
2566	movl	%eax, 7(%edx)
2567	SAVE_RESULT	(10)
2568#  ifdef USE_AS_STRNCPY
2569	sub	$11, %ebx
2570	lea	11(%edx), %ecx
2571	jnz	L(StrncpyFillTailWithZero1)
2572#   ifdef USE_AS_STPCPY
2573	cmpb	$1, (%eax)
2574	sbb	$-1, %eax
2575#   endif
2576#  endif
2577	RETURN1
2578
2579	.p2align 4
2580L(Exit13):
2581	movlpd	(%ecx), %xmm0
2582	movlpd	5(%ecx), %xmm1
2583	movlpd	%xmm0, (%edx)
2584	movlpd	%xmm1, 5(%edx)
2585	SAVE_RESULT	(12)
2586#  ifdef USE_AS_STRNCPY
2587	sub	$13, %ebx
2588	lea	13(%edx), %ecx
2589	jnz	L(StrncpyFillTailWithZero1)
2590#   ifdef USE_AS_STPCPY
2591	cmpb	$1, (%eax)
2592	sbb	$-1, %eax
2593#   endif
2594#  endif
2595	RETURN1
2596
2597	.p2align 4
2598L(Exit14):
2599	movlpd	(%ecx), %xmm0
2600	movlpd	6(%ecx), %xmm1
2601	movlpd	%xmm0, (%edx)
2602	movlpd	%xmm1, 6(%edx)
2603	SAVE_RESULT	(13)
2604#  ifdef USE_AS_STRNCPY
2605	sub	$14, %ebx
2606	lea	14(%edx), %ecx
2607	jnz	L(StrncpyFillTailWithZero1)
2608#   ifdef USE_AS_STPCPY
2609	cmpb	$1, (%eax)
2610	sbb	$-1, %eax
2611#   endif
2612#  endif
2613	RETURN1
2614
2615	.p2align 4
2616L(Exit15):
2617	movlpd	(%ecx), %xmm0
2618	movlpd	7(%ecx), %xmm1
2619	movlpd	%xmm0, (%edx)
2620	movlpd	%xmm1, 7(%edx)
2621	SAVE_RESULT	(14)
2622#  ifdef USE_AS_STRNCPY
2623	sub	$15, %ebx
2624	lea	15(%edx), %ecx
2625	jnz	L(StrncpyFillTailWithZero1)
2626#   ifdef USE_AS_STPCPY
2627	cmpb	$1, (%eax)
2628	sbb	$-1, %eax
2629#   endif
2630#  endif
2631	RETURN1
2632
2633CFI_POP	(%edi)
2634
2635#  ifdef USE_AS_STRNCPY
2636	.p2align 4
2637L(Fill0):
2638	RETURN
2639
2640	.p2align 4
2641L(Fill1):
2642	movb	%dl, (%ecx)
2643	RETURN
2644
2645	.p2align 4
2646L(Fill2):
2647	movw	%dx, (%ecx)
2648	RETURN
2649
2650	.p2align 4
2651L(Fill3):
2652	movw	%dx, (%ecx)
2653	movb	%dl, 2(%ecx)
2654	RETURN
2655
2656	.p2align 4
2657L(Fill4):
2658	movl	%edx, (%ecx)
2659	RETURN
2660
2661	.p2align 4
2662L(Fill5):
2663	movl	%edx, (%ecx)
2664	movb	%dl, 4(%ecx)
2665	RETURN
2666
2667	.p2align 4
2668L(Fill6):
2669	movl	%edx, (%ecx)
2670	movw	%dx, 4(%ecx)
2671	RETURN
2672
2673	.p2align 4
2674L(Fill7):
2675	movl	%edx, (%ecx)
2676	movl	%edx, 3(%ecx)
2677	RETURN
2678
2679	.p2align 4
2680L(Fill8):
2681	movlpd	%xmm0, (%ecx)
2682	RETURN
2683
2684	.p2align 4
2685L(Fill9):
2686	movlpd	%xmm0, (%ecx)
2687	movb	%dl, 8(%ecx)
2688	RETURN
2689
2690	.p2align 4
2691L(Fill10):
2692	movlpd	%xmm0, (%ecx)
2693	movw	%dx, 8(%ecx)
2694	RETURN
2695
2696	.p2align 4
2697L(Fill11):
2698	movlpd	%xmm0, (%ecx)
2699	movl	%edx, 7(%ecx)
2700	RETURN
2701
2702	.p2align 4
2703L(Fill12):
2704	movlpd	%xmm0, (%ecx)
2705	movl	%edx, 8(%ecx)
2706	RETURN
2707
2708	.p2align 4
2709L(Fill13):
2710	movlpd	%xmm0, (%ecx)
2711	movlpd	%xmm0, 5(%ecx)
2712	RETURN
2713
2714	.p2align 4
2715L(Fill14):
2716	movlpd	%xmm0, (%ecx)
2717	movlpd	%xmm0, 6(%ecx)
2718	RETURN
2719
2720	.p2align 4
2721L(Fill15):
2722	movlpd	%xmm0, (%ecx)
2723	movlpd	%xmm0, 7(%ecx)
2724	RETURN
2725
2726	.p2align 4
2727L(Fill16):
2728	movlpd	%xmm0, (%ecx)
2729	movlpd	%xmm0, 8(%ecx)
2730	RETURN
2731
2732	.p2align 4
2733L(StrncpyFillExit1):
2734	lea	16(%ebx), %ebx
2735L(FillFrom1To16Bytes):
2736	test	%ebx, %ebx
2737	jz	L(Fill0)
2738	cmp	$16, %ebx
2739	je	L(Fill16)
2740	cmp	$8, %ebx
2741	je	L(Fill8)
2742	jg	L(FillMore8)
2743	cmp	$4, %ebx
2744	je	L(Fill4)
2745	jg	L(FillMore4)
2746	cmp	$2, %ebx
2747	jl	L(Fill1)
2748	je	L(Fill2)
2749	jg	L(Fill3)
2750L(FillMore8):	/* but less than 16 */
2751	cmp	$12, %ebx
2752	je	L(Fill12)
2753	jl	L(FillLess12)
2754	cmp	$14, %ebx
2755	jl	L(Fill13)
2756	je	L(Fill14)
2757	jg	L(Fill15)
2758L(FillMore4):	/* but less than 8 */
2759	cmp	$6, %ebx
2760	jl	L(Fill5)
2761	je	L(Fill6)
2762	jg	L(Fill7)
2763L(FillLess12):	/* but more than 8 */
2764	cmp	$10, %ebx
2765	jl	L(Fill9)
2766	je	L(Fill10)
2767	jmp	L(Fill11)
2768
2769	CFI_PUSH(%edi)
2770
2771	.p2align 4
2772L(StrncpyFillTailWithZero1):
2773	POP	(%edi)
2774L(StrncpyFillTailWithZero):
2775	pxor	%xmm0, %xmm0
2776	xor	%edx, %edx
2777	sub	$16, %ebx
2778	jbe	L(StrncpyFillExit1)
2779
2780	movlpd	%xmm0, (%ecx)
2781	movlpd	%xmm0, 8(%ecx)
2782
2783	lea	16(%ecx), %ecx
2784
2785	mov	%ecx, %edx
2786	and	$0xf, %edx
2787	sub	%edx, %ecx
2788	add	%edx, %ebx
2789	xor	%edx, %edx
2790	sub	$64, %ebx
2791	jb	L(StrncpyFillLess64)
2792
2793L(StrncpyFillLoopMovdqa):
2794	movdqa	%xmm0, (%ecx)
2795	movdqa	%xmm0, 16(%ecx)
2796	movdqa	%xmm0, 32(%ecx)
2797	movdqa	%xmm0, 48(%ecx)
2798	lea	64(%ecx), %ecx
2799	sub	$64, %ebx
2800	jae	L(StrncpyFillLoopMovdqa)
2801
2802L(StrncpyFillLess64):
2803	add	$32, %ebx
2804	jl	L(StrncpyFillLess32)
2805	movdqa	%xmm0, (%ecx)
2806	movdqa	%xmm0, 16(%ecx)
2807	lea	32(%ecx), %ecx
2808	sub	$16, %ebx
2809	jl	L(StrncpyFillExit1)
2810	movdqa	%xmm0, (%ecx)
2811	lea	16(%ecx), %ecx
2812	jmp	L(FillFrom1To16Bytes)
2813
2814L(StrncpyFillLess32):
2815	add	$16, %ebx
2816	jl	L(StrncpyFillExit1)
2817	movdqa	%xmm0, (%ecx)
2818	lea	16(%ecx), %ecx
2819	jmp	L(FillFrom1To16Bytes)
2820#  endif
2821
2822	.p2align 4
2823L(ExitTail1):
2824	movb	(%ecx), %al
2825	movb	%al, (%edx)
2826	SAVE_RESULT_TAIL (0)
2827#  ifdef USE_AS_STRNCPY
2828	sub	$1, %ebx
2829	lea	1(%edx), %ecx
2830	jnz	L(StrncpyFillTailWithZero)
2831#   ifdef USE_AS_STPCPY
2832	cmpb	$1, (%eax)
2833	sbb	$-1, %eax
2834#   endif
2835#  endif
2836	RETURN
2837
2838	.p2align 4
2839L(ExitTail2):
2840	movw	(%ecx), %ax
2841	movw	%ax, (%edx)
2842	SAVE_RESULT_TAIL (1)
2843#  ifdef USE_AS_STRNCPY
2844	sub	$2, %ebx
2845	lea	2(%edx), %ecx
2846	jnz	L(StrncpyFillTailWithZero)
2847#   ifdef USE_AS_STPCPY
2848	cmpb	$1, (%eax)
2849	sbb	$-1, %eax
2850#   endif
2851#  endif
2852	RETURN
2853
2854	.p2align 4
2855L(ExitTail3):
2856	movw	(%ecx), %ax
2857	movw	%ax, (%edx)
2858	movb	2(%ecx), %al
2859	movb	%al, 2(%edx)
2860	SAVE_RESULT_TAIL (2)
2861#  ifdef USE_AS_STRNCPY
2862	sub	$3, %ebx
2863	lea	3(%edx), %ecx
2864	jnz	L(StrncpyFillTailWithZero)
2865#   ifdef USE_AS_STPCPY
2866	cmpb	$1, (%eax)
2867	sbb	$-1, %eax
2868#   endif
2869#  endif
2870	RETURN
2871
2872	.p2align 4
2873L(ExitTail4):
2874	movl	(%ecx), %eax
2875	movl	%eax, (%edx)
2876	SAVE_RESULT_TAIL (3)
2877#  ifdef USE_AS_STRNCPY
2878	sub	$4, %ebx
2879	lea	4(%edx), %ecx
2880	jnz	L(StrncpyFillTailWithZero)
2881#   ifdef USE_AS_STPCPY
2882	cmpb	$1, (%eax)
2883	sbb	$-1, %eax
2884#   endif
2885#  endif
2886	RETURN
2887
2888	.p2align 4
2889L(ExitTail5):
2890	movl	(%ecx), %eax
2891	movl	%eax, (%edx)
2892	movb	4(%ecx), %al
2893	movb	%al, 4(%edx)
2894	SAVE_RESULT_TAIL (4)
2895#  ifdef USE_AS_STRNCPY
2896	sub	$5, %ebx
2897	lea	5(%edx), %ecx
2898	jnz	L(StrncpyFillTailWithZero)
2899#   ifdef USE_AS_STPCPY
2900	cmpb	$1, (%eax)
2901	sbb	$-1, %eax
2902#   endif
2903#  endif
2904	RETURN
2905
2906	.p2align 4
2907L(ExitTail6):
2908	movl	(%ecx), %eax
2909	movl	%eax, (%edx)
2910	movw	4(%ecx), %ax
2911	movw	%ax, 4(%edx)
2912	SAVE_RESULT_TAIL (5)
2913#  ifdef USE_AS_STRNCPY
2914	sub	$6, %ebx
2915	lea	6(%edx), %ecx
2916	jnz	L(StrncpyFillTailWithZero)
2917#   ifdef USE_AS_STPCPY
2918	cmpb	$1, (%eax)
2919	sbb	$-1, %eax
2920#   endif
2921#  endif
2922	RETURN
2923
2924	.p2align 4
2925L(ExitTail7):
2926	movl	(%ecx), %eax
2927	movl	%eax, (%edx)
2928	movl	3(%ecx), %eax
2929	movl	%eax, 3(%edx)
2930	SAVE_RESULT_TAIL (6)
2931#  ifdef USE_AS_STRNCPY
2932	sub	$7, %ebx
2933	lea	7(%edx), %ecx
2934	jnz	L(StrncpyFillTailWithZero)
2935#   ifdef USE_AS_STPCPY
2936	cmpb	$1, (%eax)
2937	sbb	$-1, %eax
2938#   endif
2939#  endif
2940	RETURN
2941
2942	.p2align 4
2943L(ExitTail8):
2944	movlpd	(%ecx), %xmm0
2945	movlpd	%xmm0, (%edx)
2946	SAVE_RESULT_TAIL (7)
2947#  ifdef USE_AS_STRNCPY
2948	sub	$8, %ebx
2949	lea	8(%edx), %ecx
2950	jnz	L(StrncpyFillTailWithZero)
2951#  endif
2952	RETURN
2953
2954	.p2align 4
2955L(ExitTail9):
2956	movlpd	(%ecx), %xmm0
2957	movb	8(%ecx), %al
2958	movlpd	%xmm0, (%edx)
2959	movb	%al, 8(%edx)
2960	SAVE_RESULT_TAIL (8)
2961#  ifdef USE_AS_STRNCPY
2962	sub	$9, %ebx
2963	lea	9(%edx), %ecx
2964	jnz	L(StrncpyFillTailWithZero)
2965#   ifdef USE_AS_STPCPY
2966	cmpb	$1, (%eax)
2967	sbb	$-1, %eax
2968#   endif
2969#  endif
2970	RETURN
2971
2972	.p2align 4
2973L(ExitTail10):
2974	movlpd	(%ecx), %xmm0
2975	movw	8(%ecx), %ax
2976	movlpd	%xmm0, (%edx)
2977	movw	%ax, 8(%edx)
2978	SAVE_RESULT_TAIL (9)
2979#  ifdef USE_AS_STRNCPY
2980	sub	$10, %ebx
2981	lea	10(%edx), %ecx
2982	jnz	L(StrncpyFillTailWithZero)
2983#   ifdef USE_AS_STPCPY
2984	cmpb	$1, (%eax)
2985	sbb	$-1, %eax
2986#   endif
2987#  endif
2988	RETURN
2989
2990	.p2align 4
2991L(ExitTail11):
2992	movlpd	(%ecx), %xmm0
2993	movl	7(%ecx), %eax
2994	movlpd	%xmm0, (%edx)
2995	movl	%eax, 7(%edx)
2996	SAVE_RESULT_TAIL (10)
2997#  ifdef USE_AS_STRNCPY
2998	sub	$11, %ebx
2999	lea	11(%edx), %ecx
3000	jnz	L(StrncpyFillTailWithZero)
3001#   ifdef USE_AS_STPCPY
3002	cmpb	$1, (%eax)
3003	sbb	$-1, %eax
3004#   endif
3005#  endif
3006	RETURN
3007
3008	.p2align 4
3009L(ExitTail12):
3010	movlpd	(%ecx), %xmm0
3011	movl	8(%ecx), %eax
3012	movlpd	%xmm0, (%edx)
3013	movl	%eax, 8(%edx)
3014	SAVE_RESULT_TAIL (11)
3015#  ifdef USE_AS_STRNCPY
3016	sub	$12, %ebx
3017	lea	12(%edx), %ecx
3018	jnz	L(StrncpyFillTailWithZero)
3019#   ifdef USE_AS_STPCPY
3020	cmpb	$1, (%eax)
3021	sbb	$-1, %eax
3022#   endif
3023#  endif
3024	RETURN
3025
3026	.p2align 4
3027L(ExitTail13):
3028	movlpd	(%ecx), %xmm0
3029	movlpd	5(%ecx), %xmm1
3030	movlpd	%xmm0, (%edx)
3031	movlpd	%xmm1, 5(%edx)
3032	SAVE_RESULT_TAIL (12)
3033#  ifdef USE_AS_STRNCPY
3034	sub	$13, %ebx
3035	lea	13(%edx), %ecx
3036	jnz	L(StrncpyFillTailWithZero)
3037#   ifdef USE_AS_STPCPY
3038	cmpb	$1, (%eax)
3039	sbb	$-1, %eax
3040#   endif
3041#  endif
3042	RETURN
3043
3044	.p2align 4
3045L(ExitTail14):
3046	movlpd	(%ecx), %xmm0
3047	movlpd	6(%ecx), %xmm1
3048	movlpd	%xmm0, (%edx)
3049	movlpd	%xmm1, 6(%edx)
3050	SAVE_RESULT_TAIL (13)
3051#  ifdef USE_AS_STRNCPY
3052	sub	$14, %ebx
3053	lea	14(%edx), %ecx
3054	jnz	L(StrncpyFillTailWithZero)
3055#  ifdef USE_AS_STPCPY
3056	cmpb	$1, (%eax)
3057	sbb	$-1, %eax
3058#   endif
3059#  endif
3060	RETURN
3061
3062	.p2align 4
3063L(ExitTail15):
3064	movlpd	(%ecx), %xmm0
3065	movlpd	7(%ecx), %xmm1
3066	movlpd	%xmm0, (%edx)
3067	movlpd	%xmm1, 7(%edx)
3068	SAVE_RESULT_TAIL (14)
3069#  ifdef USE_AS_STRNCPY
3070	sub	$15, %ebx
3071	lea	15(%edx), %ecx
3072	jnz	L(StrncpyFillTailWithZero)
3073#  endif
3074	RETURN
3075
3076	.p2align 4
3077L(ExitTail16):
3078	movdqu	(%ecx), %xmm0
3079	movdqu	%xmm0, (%edx)
3080	SAVE_RESULT_TAIL (15)
3081#  ifdef USE_AS_STRNCPY
3082	sub	$16, %ebx
3083	lea	16(%edx), %ecx
3084	jnz	L(StrncpyFillTailWithZero)
3085#   ifdef USE_AS_STPCPY
3086	cmpb	$1, (%eax)
3087	sbb	$-1, %eax
3088#   endif
3089#  endif
3090	RETURN
3091# endif
3092
3093# ifdef USE_AS_STRNCPY
3094#  ifndef USE_AS_STRCAT
3095	CFI_PUSH (%esi)
3096	CFI_PUSH (%edi)
3097#  endif
3098	.p2align 4
3099L(StrncpyLeaveCase2OrCase3):
3100	test	%eax, %eax
3101	jnz	L(Aligned64LeaveCase2)
3102
3103L(Aligned64LeaveCase3):
3104	add	$48, %ebx
3105	jle	L(CopyFrom1To16BytesCase3)
3106	movaps	%xmm4, -64(%edx)
3107	lea	16(%esi), %esi
3108	sub	$16, %ebx
3109	jbe	L(CopyFrom1To16BytesCase3)
3110	movaps	%xmm5, -48(%edx)
3111	lea	16(%esi), %esi
3112	sub	$16, %ebx
3113	jbe	L(CopyFrom1To16BytesCase3)
3114	movaps	%xmm6, -32(%edx)
3115	lea	16(%esi), %esi
3116	lea	-16(%ebx), %ebx
3117	jmp	L(CopyFrom1To16BytesCase3)
3118
3119L(Aligned64LeaveCase2):
3120	pcmpeqb	%xmm4, %xmm0
3121	pmovmskb %xmm0, %eax
3122	add	$48, %ebx
3123	jle	L(CopyFrom1To16BytesCase2OrCase3)
3124	test	%eax, %eax
3125	jnz	L(CopyFrom1To16Bytes)
3126
3127	pcmpeqb	%xmm5, %xmm0
3128	pmovmskb %xmm0, %eax
3129	movaps	%xmm4, -64(%edx)
3130	lea	16(%esi), %esi
3131	sub	$16, %ebx
3132	jbe	L(CopyFrom1To16BytesCase2OrCase3)
3133	test	%eax, %eax
3134	jnz	L(CopyFrom1To16Bytes)
3135
3136	pcmpeqb	%xmm6, %xmm0
3137	pmovmskb %xmm0, %eax
3138	movaps	%xmm5, -48(%edx)
3139	lea	16(%esi), %esi
3140	sub	$16, %ebx
3141	jbe	L(CopyFrom1To16BytesCase2OrCase3)
3142	test	%eax, %eax
3143	jnz	L(CopyFrom1To16Bytes)
3144
3145	pcmpeqb	%xmm7, %xmm0
3146	pmovmskb %xmm0, %eax
3147	movaps	%xmm6, -32(%edx)
3148	lea	16(%esi), %esi
3149	lea	-16(%ebx), %ebx
3150	jmp	L(CopyFrom1To16BytesCase2)
3151
3152/*--------------------------------------------------*/
3153	.p2align 4
3154L(StrncpyExit1Case2OrCase3):
3155	movlpd	(%ecx), %xmm0
3156	movlpd	7(%ecx), %xmm1
3157	movlpd	%xmm0, (%edx)
3158	movlpd	%xmm1, 7(%edx)
3159	mov	$15, %esi
3160	test	%eax, %eax
3161	jnz	L(CopyFrom1To16BytesCase2)
3162	jmp	L(CopyFrom1To16BytesCase3)
3163
3164	.p2align 4
3165L(StrncpyExit2Case2OrCase3):
3166	movlpd	(%ecx), %xmm0
3167	movlpd	6(%ecx), %xmm1
3168	movlpd	%xmm0, (%edx)
3169	movlpd	%xmm1, 6(%edx)
3170	mov	$14, %esi
3171	test	%eax, %eax
3172	jnz	L(CopyFrom1To16BytesCase2)
3173	jmp	L(CopyFrom1To16BytesCase3)
3174
3175	.p2align 4
3176L(StrncpyExit3Case2OrCase3):
3177	movlpd	(%ecx), %xmm0
3178	movlpd	5(%ecx), %xmm1
3179	movlpd	%xmm0, (%edx)
3180	movlpd	%xmm1, 5(%edx)
3181	mov	$13, %esi
3182	test	%eax, %eax
3183	jnz	L(CopyFrom1To16BytesCase2)
3184	jmp	L(CopyFrom1To16BytesCase3)
3185
3186	.p2align 4
3187L(StrncpyExit4Case2OrCase3):
3188	movlpd	(%ecx), %xmm0
3189	movl	8(%ecx), %esi
3190	movlpd	%xmm0, (%edx)
3191	movl	%esi, 8(%edx)
3192	mov	$12, %esi
3193	test	%eax, %eax
3194	jnz	L(CopyFrom1To16BytesCase2)
3195	jmp	L(CopyFrom1To16BytesCase3)
3196
3197	.p2align 4
3198L(StrncpyExit5Case2OrCase3):
3199	movlpd	(%ecx), %xmm0
3200	movl	7(%ecx), %esi
3201	movlpd	%xmm0, (%edx)
3202	movl	%esi, 7(%edx)
3203	mov	$11, %esi
3204	test	%eax, %eax
3205	jnz	L(CopyFrom1To16BytesCase2)
3206	jmp	L(CopyFrom1To16BytesCase3)
3207
3208	.p2align 4
3209L(StrncpyExit6Case2OrCase3):
3210	movlpd	(%ecx), %xmm0
3211	movl	6(%ecx), %esi
3212	movlpd	%xmm0, (%edx)
3213	movl	%esi, 6(%edx)
3214	mov	$10, %esi
3215	test	%eax, %eax
3216	jnz	L(CopyFrom1To16BytesCase2)
3217	jmp	L(CopyFrom1To16BytesCase3)
3218
3219	.p2align 4
3220L(StrncpyExit7Case2OrCase3):
3221	movlpd	(%ecx), %xmm0
3222	movl	5(%ecx), %esi
3223	movlpd	%xmm0, (%edx)
3224	movl	%esi, 5(%edx)
3225	mov	$9, %esi
3226	test	%eax, %eax
3227	jnz	L(CopyFrom1To16BytesCase2)
3228	jmp	L(CopyFrom1To16BytesCase3)
3229
3230	.p2align 4
3231L(StrncpyExit8Case2OrCase3):
3232	movlpd	(%ecx), %xmm0
3233	movlpd	%xmm0, (%edx)
3234	mov	$8, %esi
3235	test	%eax, %eax
3236	jnz	L(CopyFrom1To16BytesCase2)
3237	jmp	L(CopyFrom1To16BytesCase3)
3238
3239	.p2align 4
3240L(StrncpyExit9Case2OrCase3):
3241	movlpd	(%ecx), %xmm0
3242	movlpd	%xmm0, (%edx)
3243	mov	$7, %esi
3244	test	%eax, %eax
3245	jnz	L(CopyFrom1To16BytesCase2)
3246	jmp	L(CopyFrom1To16BytesCase3)
3247
3248	.p2align 4
3249L(StrncpyExit10Case2OrCase3):
3250	movlpd	-1(%ecx), %xmm0
3251	movlpd	%xmm0, -1(%edx)
3252	mov	$6, %esi
3253	test	%eax, %eax
3254	jnz	L(CopyFrom1To16BytesCase2)
3255	jmp	L(CopyFrom1To16BytesCase3)
3256
3257	.p2align 4
3258L(StrncpyExit11Case2OrCase3):
3259	movlpd	-2(%ecx), %xmm0
3260	movlpd	%xmm0, -2(%edx)
3261	mov	$5, %esi
3262	test	%eax, %eax
3263	jnz	L(CopyFrom1To16BytesCase2)
3264	jmp	L(CopyFrom1To16BytesCase3)
3265
3266	.p2align 4
3267L(StrncpyExit12Case2OrCase3):
3268	movl	(%ecx), %esi
3269	movl	%esi, (%edx)
3270	mov	$4, %esi
3271	test	%eax, %eax
3272	jnz	L(CopyFrom1To16BytesCase2)
3273	jmp	L(CopyFrom1To16BytesCase3)
3274
3275	.p2align 4
3276L(StrncpyExit13Case2OrCase3):
3277	movl	-1(%ecx), %esi
3278	movl	%esi, -1(%edx)
3279	mov	$3, %esi
3280	test	%eax, %eax
3281	jnz	L(CopyFrom1To16BytesCase2)
3282	jmp	L(CopyFrom1To16BytesCase3)
3283
3284	.p2align 4
3285L(StrncpyExit14Case2OrCase3):
3286	movl	-2(%ecx), %esi
3287	movl	%esi, -2(%edx)
3288	mov	$2, %esi
3289	test	%eax, %eax
3290	jnz	L(CopyFrom1To16BytesCase2)
3291	jmp	L(CopyFrom1To16BytesCase3)
3292
3293	.p2align 4
3294L(StrncpyExit15Case2OrCase3):
3295	movl	-3(%ecx), %esi
3296	movl	%esi, -3(%edx)
3297	mov	$1, %esi
3298	test	%eax, %eax
3299	jnz	L(CopyFrom1To16BytesCase2)
3300	jmp	L(CopyFrom1To16BytesCase3)
3301
3302L(StrncpyLeave1):
3303	movaps	%xmm2, %xmm3
3304	add	$48, %ebx
3305	jle	L(StrncpyExit1)
3306	palignr	$1, %xmm1, %xmm2
3307	movaps	%xmm2, (%edx)
3308	movaps	31(%ecx), %xmm2
3309	lea	16(%esi), %esi
3310	sub	$16, %ebx
3311	jbe	L(StrncpyExit1)
3312	palignr	$1, %xmm3, %xmm2
3313	movaps	%xmm2, 16(%edx)
3314	lea	16(%esi), %esi
3315	sub	$16, %ebx
3316	jbe	L(StrncpyExit1)
3317	movaps	%xmm4, 32(%edx)
3318	lea	16(%esi), %esi
3319	sub	$16, %ebx
3320	jbe	L(StrncpyExit1)
3321	movaps	%xmm5, 48(%edx)
3322	lea	16(%esi), %esi
3323	lea	-16(%ebx), %ebx
3324L(StrncpyExit1):
3325	lea	15(%edx, %esi), %edx
3326	lea	15(%ecx, %esi), %ecx
3327	movdqu	-16(%ecx), %xmm0
3328	xor	%esi, %esi
3329	movdqu	%xmm0, -16(%edx)
3330	jmp	L(CopyFrom1To16BytesCase3)
3331
3332L(StrncpyLeave2):
3333	movaps	%xmm2, %xmm3
3334	add	$48, %ebx
3335	jle	L(StrncpyExit2)
3336	palignr	$2, %xmm1, %xmm2
3337	movaps	%xmm2, (%edx)
3338	movaps	30(%ecx), %xmm2
3339	lea	16(%esi), %esi
3340	sub	$16, %ebx
3341	jbe	L(StrncpyExit2)
3342	palignr	$2, %xmm3, %xmm2
3343	movaps	%xmm2, 16(%edx)
3344	lea	16(%esi), %esi
3345	sub	$16, %ebx
3346	jbe	L(StrncpyExit2)
3347	movaps	%xmm4, 32(%edx)
3348	lea	16(%esi), %esi
3349	sub	$16, %ebx
3350	jbe	L(StrncpyExit2)
3351	movaps	%xmm5, 48(%edx)
3352	lea	16(%esi), %esi
3353	lea	-16(%ebx), %ebx
3354L(StrncpyExit2):
3355	lea	14(%edx, %esi), %edx
3356	lea	14(%ecx, %esi), %ecx
3357	movdqu	-16(%ecx), %xmm0
3358	xor	%esi, %esi
3359	movdqu	%xmm0, -16(%edx)
3360	jmp	L(CopyFrom1To16BytesCase3)
3361
3362L(StrncpyLeave3):
3363	movaps	%xmm2, %xmm3
3364	add	$48, %ebx
3365	jle	L(StrncpyExit3)
3366	palignr	$3, %xmm1, %xmm2
3367	movaps	%xmm2, (%edx)
3368	movaps	29(%ecx), %xmm2
3369	lea	16(%esi), %esi
3370	sub	$16, %ebx
3371	jbe	L(StrncpyExit3)
3372	palignr	$3, %xmm3, %xmm2
3373	movaps	%xmm2, 16(%edx)
3374	lea	16(%esi), %esi
3375	sub	$16, %ebx
3376	jbe	L(StrncpyExit3)
3377	movaps	%xmm4, 32(%edx)
3378	lea	16(%esi), %esi
3379	sub	$16, %ebx
3380	jbe	L(StrncpyExit3)
3381	movaps	%xmm5, 48(%edx)
3382	lea	16(%esi), %esi
3383	lea	-16(%ebx), %ebx
3384L(StrncpyExit3):
3385	lea	13(%edx, %esi), %edx
3386	lea	13(%ecx, %esi), %ecx
3387	movdqu	-16(%ecx), %xmm0
3388	xor	%esi, %esi
3389	movdqu	%xmm0, -16(%edx)
3390	jmp	L(CopyFrom1To16BytesCase3)
3391
3392L(StrncpyLeave4):
3393	movaps	%xmm2, %xmm3
3394	add	$48, %ebx
3395	jle	L(StrncpyExit4)
3396	palignr	$4, %xmm1, %xmm2
3397	movaps	%xmm2, (%edx)
3398	movaps	28(%ecx), %xmm2
3399	lea	16(%esi), %esi
3400	sub	$16, %ebx
3401	jbe	L(StrncpyExit4)
3402	palignr	$4, %xmm3, %xmm2
3403	movaps	%xmm2, 16(%edx)
3404	lea	16(%esi), %esi
3405	sub	$16, %ebx
3406	jbe	L(StrncpyExit4)
3407	movaps	%xmm4, 32(%edx)
3408	lea	16(%esi), %esi
3409	sub	$16, %ebx
3410	jbe	L(StrncpyExit4)
3411	movaps	%xmm5, 48(%edx)
3412	lea	16(%esi), %esi
3413	lea	-16(%ebx), %ebx
3414L(StrncpyExit4):
3415	lea	12(%edx, %esi), %edx
3416	lea	12(%ecx, %esi), %ecx
3417	movlpd	-12(%ecx), %xmm0
3418	movl	-4(%ecx), %eax
3419	movlpd	%xmm0, -12(%edx)
3420	movl	%eax, -4(%edx)
3421	xor	%esi, %esi
3422	jmp	L(CopyFrom1To16BytesCase3)
3423
3424L(StrncpyLeave5):
3425	movaps	%xmm2, %xmm3
3426	add	$48, %ebx
3427	jle	L(StrncpyExit5)
3428	palignr	$5, %xmm1, %xmm2
3429	movaps	%xmm2, (%edx)
3430	movaps	27(%ecx), %xmm2
3431	lea	16(%esi), %esi
3432	sub	$16, %ebx
3433	jbe	L(StrncpyExit5)
3434	palignr	$5, %xmm3, %xmm2
3435	movaps	%xmm2, 16(%edx)
3436	lea	16(%esi), %esi
3437	sub	$16, %ebx
3438	jbe	L(StrncpyExit5)
3439	movaps	%xmm4, 32(%edx)
3440	lea	16(%esi), %esi
3441	sub	$16, %ebx
3442	jbe	L(StrncpyExit5)
3443	movaps	%xmm5, 48(%edx)
3444	lea	16(%esi), %esi
3445	lea	-16(%ebx), %ebx
3446L(StrncpyExit5):
3447	lea	11(%edx, %esi), %edx
3448	lea	11(%ecx, %esi), %ecx
3449	movlpd	-11(%ecx), %xmm0
3450	movl	-4(%ecx), %eax
3451	movlpd	%xmm0, -11(%edx)
3452	movl	%eax, -4(%edx)
3453	xor	%esi, %esi
3454	jmp	L(CopyFrom1To16BytesCase3)
3455
3456L(StrncpyLeave6):
3457	movaps	%xmm2, %xmm3
3458	add	$48, %ebx
3459	jle	L(StrncpyExit6)
3460	palignr	$6, %xmm1, %xmm2
3461	movaps	%xmm2, (%edx)
3462	movaps	26(%ecx), %xmm2
3463	lea	16(%esi), %esi
3464	sub	$16, %ebx
3465	jbe	L(StrncpyExit6)
3466	palignr	$6, %xmm3, %xmm2
3467	movaps	%xmm2, 16(%edx)
3468	lea	16(%esi), %esi
3469	sub	$16, %ebx
3470	jbe	L(StrncpyExit6)
3471	movaps	%xmm4, 32(%edx)
3472	lea	16(%esi), %esi
3473	sub	$16, %ebx
3474	jbe	L(StrncpyExit6)
3475	movaps	%xmm5, 48(%edx)
3476	lea	16(%esi), %esi
3477	lea	-16(%ebx), %ebx
3478L(StrncpyExit6):
3479	lea	10(%edx, %esi), %edx
3480	lea	10(%ecx, %esi), %ecx
3481
3482	movlpd	-10(%ecx), %xmm0
3483	movw	-2(%ecx), %ax
3484	movlpd	%xmm0, -10(%edx)
3485	movw	%ax, -2(%edx)
3486	xor	%esi, %esi
3487	jmp	L(CopyFrom1To16BytesCase3)
3488
3489L(StrncpyLeave7):
3490	movaps	%xmm2, %xmm3
3491	add	$48, %ebx
3492	jle	L(StrncpyExit7)
3493	palignr	$7, %xmm1, %xmm2
3494	movaps	%xmm2, (%edx)
3495	movaps	25(%ecx), %xmm2
3496	lea	16(%esi), %esi
3497	sub	$16, %ebx
3498	jbe	L(StrncpyExit7)
3499	palignr	$7, %xmm3, %xmm2
3500	movaps	%xmm2, 16(%edx)
3501	lea	16(%esi), %esi
3502	sub	$16, %ebx
3503	jbe	L(StrncpyExit7)
3504	movaps	%xmm4, 32(%edx)
3505	lea	16(%esi), %esi
3506	sub	$16, %ebx
3507	jbe	L(StrncpyExit7)
3508	movaps	%xmm5, 48(%edx)
3509	lea	16(%esi), %esi
3510	lea	-16(%ebx), %ebx
3511L(StrncpyExit7):
3512	lea	9(%edx, %esi), %edx
3513	lea	9(%ecx, %esi), %ecx
3514
3515	movlpd	-9(%ecx), %xmm0
3516	movb	-1(%ecx), %ah
3517	movlpd	%xmm0, -9(%edx)
3518	movb	%ah, -1(%edx)
3519	xor	%esi, %esi
3520	jmp	L(CopyFrom1To16BytesCase3)
3521
3522L(StrncpyLeave8):
3523	movaps	%xmm2, %xmm3
3524	add	$48, %ebx
3525	jle	L(StrncpyExit8)
3526	palignr	$8, %xmm1, %xmm2
3527	movaps	%xmm2, (%edx)
3528	movaps	24(%ecx), %xmm2
3529	lea	16(%esi), %esi
3530	sub	$16, %ebx
3531	jbe	L(StrncpyExit8)
3532	palignr	$8, %xmm3, %xmm2
3533	movaps	%xmm2, 16(%edx)
3534	lea	16(%esi), %esi
3535	sub	$16, %ebx
3536	jbe	L(StrncpyExit8)
3537	movaps	%xmm4, 32(%edx)
3538	lea	16(%esi), %esi
3539	sub	$16, %ebx
3540	jbe	L(StrncpyExit8)
3541	movaps	%xmm5, 48(%edx)
3542	lea	16(%esi), %esi
3543	lea	-16(%ebx), %ebx
3544L(StrncpyExit8):
3545	lea	8(%edx, %esi), %edx
3546	lea	8(%ecx, %esi), %ecx
3547	movlpd	-8(%ecx), %xmm0
3548	movlpd	%xmm0, -8(%edx)
3549	xor	%esi, %esi
3550	jmp	L(CopyFrom1To16BytesCase3)
3551
3552L(StrncpyLeave9):
3553	movaps	%xmm2, %xmm3
3554	add	$48, %ebx
3555	jle	L(StrncpyExit9)
3556	palignr	$9, %xmm1, %xmm2
3557	movaps	%xmm2, (%edx)
3558	movaps	23(%ecx), %xmm2
3559	lea	16(%esi), %esi
3560	sub	$16, %ebx
3561	jbe	L(StrncpyExit9)
3562	palignr	$9, %xmm3, %xmm2
3563	movaps	%xmm2, 16(%edx)
3564	lea	16(%esi), %esi
3565	sub	$16, %ebx
3566	jbe	L(StrncpyExit9)
3567	movaps	%xmm4, 32(%edx)
3568	lea	16(%esi), %esi
3569	sub	$16, %ebx
3570	jbe	L(StrncpyExit9)
3571	movaps	%xmm5, 48(%edx)
3572	lea	16(%esi), %esi
3573	lea	-16(%ebx), %ebx
3574L(StrncpyExit9):
3575	lea	7(%edx, %esi), %edx
3576	lea	7(%ecx, %esi), %ecx
3577
3578	movlpd	-8(%ecx), %xmm0
3579	movlpd	%xmm0, -8(%edx)
3580	xor	%esi, %esi
3581	jmp	L(CopyFrom1To16BytesCase3)
3582
3583L(StrncpyLeave10):
3584	movaps	%xmm2, %xmm3
3585	add	$48, %ebx
3586	jle	L(StrncpyExit10)
3587	palignr	$10, %xmm1, %xmm2
3588	movaps	%xmm2, (%edx)
3589	movaps	22(%ecx), %xmm2
3590	lea	16(%esi), %esi
3591	sub	$16, %ebx
3592	jbe	L(StrncpyExit10)
3593	palignr	$10, %xmm3, %xmm2
3594	movaps	%xmm2, 16(%edx)
3595	lea	16(%esi), %esi
3596	sub	$16, %ebx
3597	jbe	L(StrncpyExit10)
3598	movaps	%xmm4, 32(%edx)
3599	lea	16(%esi), %esi
3600	sub	$16, %ebx
3601	jbe	L(StrncpyExit10)
3602	movaps	%xmm5, 48(%edx)
3603	lea	16(%esi), %esi
3604	lea	-16(%ebx), %ebx
3605L(StrncpyExit10):
3606	lea	6(%edx, %esi), %edx
3607	lea	6(%ecx, %esi), %ecx
3608
3609	movlpd	-8(%ecx), %xmm0
3610	movlpd	%xmm0, -8(%edx)
3611	xor	%esi, %esi
3612	jmp	L(CopyFrom1To16BytesCase3)
3613
3614L(StrncpyLeave11):
3615	movaps	%xmm2, %xmm3
3616	add	$48, %ebx
3617	jle	L(StrncpyExit11)
3618	palignr	$11, %xmm1, %xmm2
3619	movaps	%xmm2, (%edx)
3620	movaps	21(%ecx), %xmm2
3621	lea	16(%esi), %esi
3622	sub	$16, %ebx
3623	jbe	L(StrncpyExit11)
3624	palignr	$11, %xmm3, %xmm2
3625	movaps	%xmm2, 16(%edx)
3626	lea	16(%esi), %esi
3627	sub	$16, %ebx
3628	jbe	L(StrncpyExit11)
3629	movaps	%xmm4, 32(%edx)
3630	lea	16(%esi), %esi
3631	sub	$16, %ebx
3632	jbe	L(StrncpyExit11)
3633	movaps	%xmm5, 48(%edx)
3634	lea	16(%esi), %esi
3635	lea	-16(%ebx), %ebx
3636L(StrncpyExit11):
3637	lea	5(%edx, %esi), %edx
3638	lea	5(%ecx, %esi), %ecx
3639	movl	-5(%ecx), %esi
3640	movb	-1(%ecx), %ah
3641	movl	%esi, -5(%edx)
3642	movb	%ah, -1(%edx)
3643	xor	%esi, %esi
3644	jmp	L(CopyFrom1To16BytesCase3)
3645
3646L(StrncpyLeave12):
3647	movaps	%xmm2, %xmm3
3648	add	$48, %ebx
3649	jle	L(StrncpyExit12)
3650	palignr	$12, %xmm1, %xmm2
3651	movaps	%xmm2, (%edx)
3652	movaps	20(%ecx), %xmm2
3653	lea	16(%esi), %esi
3654	sub	$16, %ebx
3655	jbe	L(StrncpyExit12)
3656	palignr	$12, %xmm3, %xmm2
3657	movaps	%xmm2, 16(%edx)
3658	lea	16(%esi), %esi
3659	sub	$16, %ebx
3660	jbe	L(StrncpyExit12)
3661	movaps	%xmm4, 32(%edx)
3662	lea	16(%esi), %esi
3663	sub	$16, %ebx
3664	jbe	L(StrncpyExit12)
3665	movaps	%xmm5, 48(%edx)
3666	lea	16(%esi), %esi
3667	lea	-16(%ebx), %ebx
3668L(StrncpyExit12):
3669	lea	4(%edx, %esi), %edx
3670	lea	4(%ecx, %esi), %ecx
3671	movl	-4(%ecx), %eax
3672	movl	%eax, -4(%edx)
3673	xor	%esi, %esi
3674	jmp	L(CopyFrom1To16BytesCase3)
3675
3676L(StrncpyLeave13):
3677	movaps	%xmm2, %xmm3
3678	add	$48, %ebx
3679	jle	L(StrncpyExit13)
3680	palignr	$13, %xmm1, %xmm2
3681	movaps	%xmm2, (%edx)
3682	movaps	19(%ecx), %xmm2
3683	lea	16(%esi), %esi
3684	sub	$16, %ebx
3685	jbe	L(StrncpyExit13)
3686	palignr	$13, %xmm3, %xmm2
3687	movaps	%xmm2, 16(%edx)
3688	lea	16(%esi), %esi
3689	sub	$16, %ebx
3690	jbe	L(StrncpyExit13)
3691	movaps	%xmm4, 32(%edx)
3692	lea	16(%esi), %esi
3693	sub	$16, %ebx
3694	jbe	L(StrncpyExit13)
3695	movaps	%xmm5, 48(%edx)
3696	lea	16(%esi), %esi
3697	lea	-16(%ebx), %ebx
3698L(StrncpyExit13):
3699	lea	3(%edx, %esi), %edx
3700	lea	3(%ecx, %esi), %ecx
3701
3702	movl	-4(%ecx), %eax
3703	movl	%eax, -4(%edx)
3704	xor	%esi, %esi
3705	jmp	L(CopyFrom1To16BytesCase3)
3706
3707L(StrncpyLeave14):
3708	movaps	%xmm2, %xmm3
3709	add	$48, %ebx
3710	jle	L(StrncpyExit14)
3711	palignr	$14, %xmm1, %xmm2
3712	movaps	%xmm2, (%edx)
3713	movaps	18(%ecx), %xmm2
3714	lea	16(%esi), %esi
3715	sub	$16, %ebx
3716	jbe	L(StrncpyExit14)
3717	palignr	$14, %xmm3, %xmm2
3718	movaps	%xmm2, 16(%edx)
3719	lea	16(%esi), %esi
3720	sub	$16, %ebx
3721	jbe	L(StrncpyExit14)
3722	movaps	%xmm4, 32(%edx)
3723	lea	16(%esi), %esi
3724	sub	$16, %ebx
3725	jbe	L(StrncpyExit14)
3726	movaps	%xmm5, 48(%edx)
3727	lea	16(%esi), %esi
3728	lea	-16(%ebx), %ebx
3729L(StrncpyExit14):
3730	lea	2(%edx, %esi), %edx
3731	lea	2(%ecx, %esi), %ecx
3732	movw	-2(%ecx), %ax
3733	movw	%ax, -2(%edx)
3734	xor	%esi, %esi
3735	jmp	L(CopyFrom1To16BytesCase3)
3736
3737L(StrncpyLeave15):
3738	movaps	%xmm2, %xmm3
3739	add	$48, %ebx
3740	jle	L(StrncpyExit15)
3741	palignr	$15, %xmm1, %xmm2
3742	movaps	%xmm2, (%edx)
3743	movaps	17(%ecx), %xmm2
3744	lea	16(%esi), %esi
3745	sub	$16, %ebx
3746	jbe	L(StrncpyExit15)
3747	palignr	$15, %xmm3, %xmm2
3748	movaps	%xmm2, 16(%edx)
3749	lea	16(%esi), %esi
3750	sub	$16, %ebx
3751	jbe	L(StrncpyExit15)
3752	movaps	%xmm4, 32(%edx)
3753	lea	16(%esi), %esi
3754	sub	$16, %ebx
3755	jbe	L(StrncpyExit15)
3756	movaps	%xmm5, 48(%edx)
3757	lea	16(%esi), %esi
3758	lea	-16(%ebx), %ebx
3759L(StrncpyExit15):
3760	lea	1(%edx, %esi), %edx
3761	lea	1(%ecx, %esi), %ecx
3762	movb	-1(%ecx), %ah
3763	movb	%ah, -1(%edx)
3764	xor	%esi, %esi
3765	jmp	L(CopyFrom1To16BytesCase3)
3766# endif
3767
3768# ifndef USE_AS_STRCAT
3769#  ifdef USE_AS_STRNCPY
3770	CFI_POP (%esi)
3771	CFI_POP (%edi)
3772
3773	.p2align 4
3774L(ExitTail0):
3775	movl	%edx, %eax
3776	RETURN
3777
3778	.p2align 4
3779L(StrncpyExit15Bytes):
3780	cmp	$12, %ebx
3781	jbe	L(StrncpyExit12Bytes)
3782	cmpb	$0, 8(%ecx)
3783	jz	L(ExitTail9)
3784	cmpb	$0, 9(%ecx)
3785	jz	L(ExitTail10)
3786	cmpb	$0, 10(%ecx)
3787	jz	L(ExitTail11)
3788	cmpb	$0, 11(%ecx)
3789	jz	L(ExitTail12)
3790	cmp	$13, %ebx
3791	je	L(ExitTail13)
3792	cmpb	$0, 12(%ecx)
3793	jz	L(ExitTail13)
3794	cmp	$14, %ebx
3795	je	L(ExitTail14)
3796	cmpb	$0, 13(%ecx)
3797	jz	L(ExitTail14)
3798	movlpd	(%ecx), %xmm0
3799	movlpd	7(%ecx), %xmm1
3800	movlpd	%xmm0, (%edx)
3801	movlpd	%xmm1, 7(%edx)
3802#   ifdef USE_AS_STPCPY
3803	lea	14(%edx), %eax
3804	cmpb	$1, (%eax)
3805	sbb	$-1, %eax
3806#   else
3807	movl	%edx, %eax
3808#   endif
3809	RETURN
3810
3811	.p2align 4
3812L(StrncpyExit12Bytes):
3813	cmp	$9, %ebx
3814	je	L(ExitTail9)
3815	cmpb	$0, 8(%ecx)
3816	jz	L(ExitTail9)
3817	cmp	$10, %ebx
3818	je	L(ExitTail10)
3819	cmpb	$0, 9(%ecx)
3820	jz	L(ExitTail10)
3821	cmp	$11, %ebx
3822	je	L(ExitTail11)
3823	cmpb	$0, 10(%ecx)
3824	jz	L(ExitTail11)
3825	movlpd	(%ecx), %xmm0
3826	movl	8(%ecx), %eax
3827	movlpd	%xmm0, (%edx)
3828	movl	%eax, 8(%edx)
3829	SAVE_RESULT_TAIL (11)
3830#   ifdef USE_AS_STPCPY
3831	cmpb	$1, (%eax)
3832	sbb	$-1, %eax
3833#   endif
3834	RETURN
3835
3836	.p2align 4
3837L(StrncpyExit8Bytes):
3838	cmp	$4, %ebx
3839	jbe	L(StrncpyExit4Bytes)
3840	cmpb	$0, (%ecx)
3841	jz	L(ExitTail1)
3842	cmpb	$0, 1(%ecx)
3843	jz	L(ExitTail2)
3844	cmpb	$0, 2(%ecx)
3845	jz	L(ExitTail3)
3846	cmpb	$0, 3(%ecx)
3847	jz	L(ExitTail4)
3848
3849	cmp	$5, %ebx
3850	je	L(ExitTail5)
3851	cmpb	$0, 4(%ecx)
3852	jz	L(ExitTail5)
3853	cmp	$6, %ebx
3854	je	L(ExitTail6)
3855	cmpb	$0, 5(%ecx)
3856	jz	L(ExitTail6)
3857	cmp	$7, %ebx
3858	je	L(ExitTail7)
3859	cmpb	$0, 6(%ecx)
3860	jz	L(ExitTail7)
3861	movlpd	(%ecx), %xmm0
3862	movlpd	%xmm0, (%edx)
3863#   ifdef USE_AS_STPCPY
3864	lea	7(%edx), %eax
3865	cmpb	$1, (%eax)
3866	sbb	$-1, %eax
3867#   else
3868	movl	%edx, %eax
3869#   endif
3870	RETURN
3871
3872	.p2align 4
3873L(StrncpyExit4Bytes):
3874	test	%ebx, %ebx
3875	jz	L(ExitTail0)
3876	cmp	$1, %ebx
3877	je	L(ExitTail1)
3878	cmpb	$0, (%ecx)
3879	jz	L(ExitTail1)
3880	cmp	$2, %ebx
3881	je	L(ExitTail2)
3882	cmpb	$0, 1(%ecx)
3883	jz	L(ExitTail2)
3884	cmp	$3, %ebx
3885	je	L(ExitTail3)
3886	cmpb	$0, 2(%ecx)
3887	jz	L(ExitTail3)
3888	movl	(%ecx), %eax
3889	movl	%eax, (%edx)
3890	SAVE_RESULT_TAIL (3)
3891#   ifdef USE_AS_STPCPY
3892	cmpb	$1, (%eax)
3893	sbb	$-1, %eax
3894#   endif
3895	RETURN
3896#  endif
3897
3898END (STRCPY)
3899# endif
3900#endif
3901