1/* strcmp with SSSE3
2   Copyright (C) 2010-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#if IS_IN (libc)
20
21#include <sysdep.h>
22#include "asm-syntax.h"
23
24#define CFI_PUSH(REG)						\
25  cfi_adjust_cfa_offset (4);					\
26  cfi_rel_offset (REG, 0)
27
28#define CFI_POP(REG)						\
29  cfi_adjust_cfa_offset (-4);					\
30  cfi_restore (REG)
31
32#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
33#define POP(REG)	popl REG; CFI_POP (REG)
34
35#ifdef USE_AS_STRNCMP
36# ifndef STRCMP
37#  define STRCMP	__strncmp_ssse3
38# endif
39# define STR1		8
40# define STR2		STR1+4
41# define CNT		STR2+4
42# define RETURN		POP (REM); ret; .p2align 4; CFI_PUSH (REM)
43# define UPDATE_STRNCMP_COUNTER				\
44	/* calculate left number to compare */		\
45	mov	$16, %esi;				\
46	sub	%ecx, %esi;				\
47	cmp	%esi, REM;				\
48	jbe	L(more8byteseq);			\
49	sub	%esi, REM
50# define FLAGS		%ebx
51# define REM		%ebp
52#elif defined USE_AS_STRCASECMP_L
53# include "locale-defines.h"
54# ifndef STRCMP
55#  define STRCMP	__strcasecmp_l_ssse3
56# endif
57# ifdef PIC
58#  define STR1		8
59# else
60#  define STR1		4
61# endif
62# define STR2		STR1+4
63# define LOCALE		12	/* Loaded before the adjustment.  */
64# ifdef PIC
65#  define RETURN	POP (%ebx); ret; .p2align 4; CFI_PUSH (%ebx)
66# else
67#  define RETURN	ret; .p2align 4
68# endif
69# define UPDATE_STRNCMP_COUNTER
70# define FLAGS		(%esp)
71# define NONASCII	__strcasecmp_nonascii
72#elif defined USE_AS_STRNCASECMP_L
73# include "locale-defines.h"
74# ifndef STRCMP
75#  define STRCMP	__strncasecmp_l_ssse3
76# endif
77# ifdef PIC
78#  define STR1		12
79# else
80#  define STR1		8
81# endif
82# define STR2		STR1+4
83# define CNT		STR2+4
84# define LOCALE		16	/* Loaded before the adjustment.  */
85# ifdef PIC
86#  define RETURN	POP (REM); POP (%ebx); ret; \
87			.p2align 4; CFI_PUSH (%ebx); CFI_PUSH (REM)
88# else
89#  define RETURN	POP (REM); ret; .p2align 4; CFI_PUSH (REM)
90# endif
91# define UPDATE_STRNCMP_COUNTER				\
92	/* calculate left number to compare */		\
93	mov	$16, %esi;				\
94	sub	%ecx, %esi;				\
95	cmp	%esi, REM;				\
96	jbe	L(more8byteseq);			\
97	sub	%esi, REM
98# define FLAGS		(%esp)
99# define REM		%ebp
100# define NONASCII	__strncasecmp_nonascii
101#else
102# ifndef STRCMP
103#  define STRCMP	__strcmp_ssse3
104# endif
105# define STR1		4
106# define STR2		STR1+4
107# define RETURN		ret; .p2align 4
108# define UPDATE_STRNCMP_COUNTER
109# define FLAGS		%ebx
110#endif
111
112	.section .text.ssse3,"ax",@progbits
113
114#ifdef USE_AS_STRCASECMP_L
115ENTRY (__strcasecmp_ssse3)
116# ifdef PIC
117	PUSH	(%ebx)
118	LOAD_PIC_REG(bx)
119	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
120	movl	%gs:(%eax), %eax
121# else
122	movl	%gs:__libc_tsd_LOCALE@NTPOFF, %eax
123# endif
124# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
125	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
126# else
127	movl	(%eax), %eax
128# endif
129	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
130# ifdef PIC
131	je	L(ascii)
132	POP	(%ebx)
133	jmp	__strcasecmp_nonascii
134# else
135	jne	__strcasecmp_nonascii
136	jmp	L(ascii)
137# endif
138END (__strcasecmp_ssse3)
139#endif
140
141#ifdef USE_AS_STRNCASECMP_L
142ENTRY (__strncasecmp_ssse3)
143# ifdef PIC
144	PUSH	(%ebx)
145	LOAD_PIC_REG(bx)
146	movl	__libc_tsd_LOCALE@GOTNTPOFF(%ebx), %eax
147	movl	%gs:(%eax), %eax
148# else
149	movl	%gs:__libc_tsd_LOCALE@NTPOFF, %eax
150# endif
151# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
152	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
153# else
154	movl	(%eax), %eax
155# endif
156	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
157# ifdef PIC
158	je	L(ascii)
159	POP	(%ebx)
160	jmp	__strncasecmp_nonascii
161# else
162	jne	__strncasecmp_nonascii
163	jmp	L(ascii)
164# endif
165END (__strncasecmp_ssse3)
166#endif
167
168ENTRY (STRCMP)
169#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
170	movl	LOCALE(%esp), %eax
171# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
172	movl	LOCALE_T___LOCALES+LC_CTYPE*4(%eax), %eax
173# else
174	movl	(%eax), %eax
175# endif
176	testl	$1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%eax)
177	jne	NONASCII
178
179# ifdef PIC
180	PUSH	(%ebx)
181	LOAD_PIC_REG(bx)
182# endif
183L(ascii):
184	.section .rodata.cst16,"aM",@progbits,16
185	.align 16
186.Lbelowupper:
187	.quad	0x4040404040404040
188	.quad	0x4040404040404040
189.Ltopupper:
190	.quad	0x5b5b5b5b5b5b5b5b
191	.quad	0x5b5b5b5b5b5b5b5b
192.Ltouppermask:
193	.quad	0x2020202020202020
194	.quad	0x2020202020202020
195	.previous
196
197# ifdef PIC
198#  define UCLOW_reg .Lbelowupper@GOTOFF(%ebx)
199#  define UCHIGH_reg .Ltopupper@GOTOFF(%ebx)
200#  define LCQWORD_reg .Ltouppermask@GOTOFF(%ebx)
201# else
202#  define UCLOW_reg .Lbelowupper
203#  define UCHIGH_reg .Ltopupper
204#  define LCQWORD_reg .Ltouppermask
205# endif
206#endif
207
208#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
209	PUSH	(REM)
210#endif
211
212	movl	STR1(%esp), %edx
213	movl	STR2(%esp), %eax
214#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
215	movl	CNT(%esp), REM
216	cmp	$16, REM
217	jb	L(less16bytes_sncmp)
218#elif !defined USE_AS_STRCASECMP_L
219	movzbl	(%eax), %ecx
220	cmpb	%cl, (%edx)
221	jne	L(neq)
222	cmpl	$0, %ecx
223	je	L(eq)
224
225	movzbl	1(%eax), %ecx
226	cmpb	%cl, 1(%edx)
227	jne	L(neq)
228	cmpl	$0, %ecx
229	je	L(eq)
230
231	movzbl	2(%eax), %ecx
232	cmpb	%cl, 2(%edx)
233	jne	L(neq)
234	cmpl	$0, %ecx
235	je	L(eq)
236
237	movzbl	3(%eax), %ecx
238	cmpb	%cl, 3(%edx)
239	jne	L(neq)
240	cmpl	$0, %ecx
241	je	L(eq)
242
243	movzbl	4(%eax), %ecx
244	cmpb	%cl, 4(%edx)
245	jne	L(neq)
246	cmpl	$0, %ecx
247	je	L(eq)
248
249	movzbl	5(%eax), %ecx
250	cmpb	%cl, 5(%edx)
251	jne	L(neq)
252	cmpl	$0, %ecx
253	je	L(eq)
254
255	movzbl	6(%eax), %ecx
256	cmpb	%cl, 6(%edx)
257	jne	L(neq)
258	cmpl	$0, %ecx
259	je	L(eq)
260
261	movzbl	7(%eax), %ecx
262	cmpb	%cl, 7(%edx)
263	jne	L(neq)
264	cmpl	$0, %ecx
265	je	L(eq)
266
267	add	$8, %edx
268	add	$8, %eax
269#endif
270	movl	%edx, %ecx
271	and	$0xfff, %ecx
272	cmp	$0xff0, %ecx
273	ja	L(crosspage)
274	mov	%eax, %ecx
275	and	$0xfff, %ecx
276	cmp	$0xff0, %ecx
277	ja	L(crosspage)
278	pxor	%xmm0, %xmm0
279	movlpd	(%eax), %xmm1
280	movlpd	(%edx), %xmm2
281	movhpd	8(%eax), %xmm1
282	movhpd	8(%edx), %xmm2
283#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
284# define TOLOWER(reg1, reg2) \
285	movdqa	reg1, %xmm5;					\
286	movdqa	reg2, %xmm7;					\
287	movdqa	UCHIGH_reg, %xmm6;				\
288	pcmpgtb	UCLOW_reg, %xmm5;				\
289	pcmpgtb	UCLOW_reg, %xmm7;				\
290	pcmpgtb	reg1, %xmm6;					\
291	pand	%xmm6, %xmm5;					\
292	movdqa	UCHIGH_reg, %xmm6;				\
293	pcmpgtb	reg2, %xmm6;					\
294	pand	%xmm6, %xmm7;					\
295	pand	LCQWORD_reg, %xmm5;				\
296	por	%xmm5, reg1;					\
297	pand	LCQWORD_reg, %xmm7;				\
298	por	%xmm7, reg2
299	TOLOWER (%xmm1, %xmm2)
300#else
301# define TOLOWER(reg1, reg2)
302#endif
303	pcmpeqb	%xmm1, %xmm0
304	pcmpeqb	%xmm2, %xmm1
305	psubb	%xmm0, %xmm1
306	pmovmskb %xmm1, %ecx
307	sub	$0xffff, %ecx
308	jnz	L(less16bytes)
309#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
310	cmp	$16, REM
311	lea	-16(REM), REM
312	jbe	L(eq)
313#endif
314	add	$16, %eax
315	add	$16, %edx
316
317L(crosspage):
318
319#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
320	PUSH	(FLAGS)
321#endif
322	PUSH	(%edi)
323	PUSH	(%esi)
324#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
325	pushl	$0
326	cfi_adjust_cfa_offset (4)
327#endif
328#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
329	cfi_remember_state
330#endif
331
332	movl	%edx, %edi
333	movl	%eax, %ecx
334	and	$0xf, %ecx
335	and	$0xf, %edi
336	xor	%ecx, %eax
337	xor	%edi, %edx
338#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
339	xor	FLAGS, FLAGS
340#endif
341	cmp	%edi, %ecx
342	je	L(ashr_0)
343	ja	L(bigger)
344	orl	$0x20, FLAGS
345	xchg	%edx, %eax
346	xchg	%ecx, %edi
347L(bigger):
348	lea	15(%edi), %edi
349	sub	%ecx, %edi
350	cmp	$8, %edi
351	jle	L(ashr_less_8)
352	cmp	$14, %edi
353	je	L(ashr_15)
354	cmp	$13, %edi
355	je	L(ashr_14)
356	cmp	$12, %edi
357	je	L(ashr_13)
358	cmp	$11, %edi
359	je	L(ashr_12)
360	cmp	$10, %edi
361	je	L(ashr_11)
362	cmp	$9, %edi
363	je	L(ashr_10)
364L(ashr_less_8):
365	je	L(ashr_9)
366	cmp	$7, %edi
367	je	L(ashr_8)
368	cmp	$6, %edi
369	je	L(ashr_7)
370	cmp	$5, %edi
371	je	L(ashr_6)
372	cmp	$4, %edi
373	je	L(ashr_5)
374	cmp	$3, %edi
375	je	L(ashr_4)
376	cmp	$2, %edi
377	je	L(ashr_3)
378	cmp	$1, %edi
379	je	L(ashr_2)
380	cmp	$0, %edi
381	je	L(ashr_1)
382
383/*
384 * The following cases will be handled by ashr_0
385 *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
386 *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
387 */
388	.p2align 4
389L(ashr_0):
390	mov	$0xffff, %esi
391	movdqa	(%eax), %xmm1
392	pxor	%xmm0, %xmm0
393	pcmpeqb	%xmm1, %xmm0
394#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
395	movdqa	(%edx), %xmm2
396	TOLOWER (%xmm1, %xmm2)
397	pcmpeqb	%xmm2, %xmm1
398#else
399	pcmpeqb	(%edx), %xmm1
400#endif
401	psubb	%xmm0, %xmm1
402	pmovmskb %xmm1, %edi
403	shr	%cl, %esi
404	shr	%cl, %edi
405	sub	%edi, %esi
406	mov	%ecx, %edi
407	jne	L(less32bytes)
408	UPDATE_STRNCMP_COUNTER
409	movl	$0x10, FLAGS
410	mov	$0x10, %ecx
411	pxor	%xmm0, %xmm0
412	.p2align 4
413L(loop_ashr_0):
414	movdqa	(%eax, %ecx), %xmm1
415#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
416	movdqa	(%edx, %ecx), %xmm2
417	TOLOWER (%xmm1, %xmm2)
418
419	pcmpeqb	%xmm1, %xmm0
420	pcmpeqb	%xmm2, %xmm1
421#else
422	pcmpeqb	%xmm1, %xmm0
423	pcmpeqb	(%edx, %ecx), %xmm1
424#endif
425	psubb	%xmm0, %xmm1
426	pmovmskb %xmm1, %esi
427	sub	$0xffff, %esi
428	jnz	L(exit)
429#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
430	cmp	$16, REM
431	lea	-16(REM), REM
432	jbe	L(more8byteseq)
433#endif
434	add	$16, %ecx
435	jmp	L(loop_ashr_0)
436
437/*
438 * The following cases will be handled by ashr_1
439 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
440 *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
441 */
442	.p2align 4
443L(ashr_1):
444	mov	$0xffff, %esi
445	pxor	%xmm0, %xmm0
446	movdqa	(%edx), %xmm2
447	movdqa	(%eax), %xmm1
448	pcmpeqb	%xmm1, %xmm0
449	pslldq	$15, %xmm2
450	TOLOWER (%xmm1, %xmm2)
451	pcmpeqb	%xmm1, %xmm2
452	psubb	%xmm0, %xmm2
453	pmovmskb %xmm2, %edi
454	shr	%cl, %esi
455	shr	%cl, %edi
456	sub	%edi, %esi
457	lea	-15(%ecx), %edi
458	jnz	L(less32bytes)
459
460	UPDATE_STRNCMP_COUNTER
461
462	movdqa	(%edx), %xmm3
463	pxor	%xmm0, %xmm0
464	mov	$16, %ecx
465	orl	$1, FLAGS
466	lea	1(%edx), %edi
467	and	$0xfff, %edi
468	sub	$0x1000, %edi
469
470	.p2align 4
471L(loop_ashr_1):
472	add	$16, %edi
473	jg	L(nibble_ashr_1)
474
475L(gobble_ashr_1):
476	movdqa	(%eax, %ecx), %xmm1
477	movdqa	(%edx, %ecx), %xmm2
478	movdqa	%xmm2, %xmm4
479
480	palignr	$1, %xmm3, %xmm2
481	TOLOWER (%xmm1, %xmm2)
482
483	pcmpeqb	%xmm1, %xmm0
484	pcmpeqb	%xmm2, %xmm1
485	psubb	%xmm0, %xmm1
486	pmovmskb %xmm1, %esi
487	sub	$0xffff, %esi
488	jnz	L(exit)
489#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
490	cmp	$16, REM
491	lea	-16(REM), REM
492	jbe	L(more8byteseq)
493#endif
494
495	add	$16, %ecx
496	movdqa	%xmm4, %xmm3
497
498	add	$16, %edi
499	jg	L(nibble_ashr_1)
500
501	movdqa	(%eax, %ecx), %xmm1
502	movdqa	(%edx, %ecx), %xmm2
503	movdqa	%xmm2, %xmm4
504
505	palignr	$1, %xmm3, %xmm2
506	TOLOWER (%xmm1, %xmm2)
507
508	pcmpeqb	%xmm1, %xmm0
509	pcmpeqb	%xmm2, %xmm1
510	psubb	%xmm0, %xmm1
511	pmovmskb %xmm1, %esi
512	sub	$0xffff, %esi
513	jnz	L(exit)
514
515#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
516	cmp	$16, REM
517	lea	-16(REM), REM
518	jbe	L(more8byteseq)
519#endif
520	add	$16, %ecx
521	movdqa	%xmm4, %xmm3
522	jmp	L(loop_ashr_1)
523
524	.p2align 4
525L(nibble_ashr_1):
526	pcmpeqb	%xmm3, %xmm0
527	pmovmskb %xmm0, %esi
528	test	$0xfffe, %esi
529	jnz	L(ashr_1_exittail)
530
531#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
532	cmp	$15, REM
533	jbe	L(ashr_1_exittail)
534#endif
535	pxor	%xmm0, %xmm0
536	sub	$0x1000, %edi
537	jmp	L(gobble_ashr_1)
538
539	.p2align 4
540L(ashr_1_exittail):
541	movdqa	(%eax, %ecx), %xmm1
542	psrldq	$1, %xmm0
543	psrldq	$1, %xmm3
544	jmp	L(aftertail)
545
546/*
547 * The following cases will be handled by ashr_2
548 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
549 *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
550 */
551	.p2align 4
552L(ashr_2):
553	mov	$0xffff, %esi
554	pxor	%xmm0, %xmm0
555	movdqa	(%edx), %xmm2
556	movdqa	(%eax), %xmm1
557	pcmpeqb	%xmm1, %xmm0
558	pslldq	$14, %xmm2
559	TOLOWER (%xmm1, %xmm2)
560	pcmpeqb	%xmm1, %xmm2
561	psubb	%xmm0, %xmm2
562	pmovmskb %xmm2, %edi
563	shr	%cl, %esi
564	shr	%cl, %edi
565	sub	%edi, %esi
566	lea	-14(%ecx), %edi
567	jnz	L(less32bytes)
568
569	UPDATE_STRNCMP_COUNTER
570
571	movdqa	(%edx), %xmm3
572	pxor	%xmm0, %xmm0
573	mov	$16, %ecx
574	orl	$2, FLAGS
575	lea	2(%edx), %edi
576	and	$0xfff, %edi
577	sub	$0x1000, %edi
578
579	.p2align 4
580L(loop_ashr_2):
581	add	$16, %edi
582	jg	L(nibble_ashr_2)
583
584L(gobble_ashr_2):
585	movdqa	(%eax, %ecx), %xmm1
586	movdqa	(%edx, %ecx), %xmm2
587	movdqa	%xmm2, %xmm4
588
589	palignr	$2, %xmm3, %xmm2
590	TOLOWER (%xmm1, %xmm2)
591
592	pcmpeqb	%xmm1, %xmm0
593	pcmpeqb	%xmm2, %xmm1
594	psubb	%xmm0, %xmm1
595	pmovmskb %xmm1, %esi
596	sub	$0xffff, %esi
597	jnz	L(exit)
598
599#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
600	cmp	$16, REM
601	lea	-16(REM), REM
602	jbe	L(more8byteseq)
603#endif
604	add	$16, %ecx
605	movdqa	%xmm4, %xmm3
606
607	add	$16, %edi
608	jg	L(nibble_ashr_2)
609
610	movdqa	(%eax, %ecx), %xmm1
611	movdqa	(%edx, %ecx), %xmm2
612	movdqa	%xmm2, %xmm4
613
614	palignr	$2, %xmm3, %xmm2
615	TOLOWER (%xmm1, %xmm2)
616
617	pcmpeqb	%xmm1, %xmm0
618	pcmpeqb	%xmm2, %xmm1
619	psubb	%xmm0, %xmm1
620	pmovmskb %xmm1, %esi
621	sub	$0xffff, %esi
622	jnz	L(exit)
623
624#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
625	cmp	$16, REM
626	lea	-16(REM), REM
627	jbe	L(more8byteseq)
628#endif
629	add	$16, %ecx
630	movdqa	%xmm4, %xmm3
631	jmp	L(loop_ashr_2)
632
633	.p2align 4
634L(nibble_ashr_2):
635	pcmpeqb	%xmm3, %xmm0
636	pmovmskb %xmm0, %esi
637	test	$0xfffc, %esi
638	jnz	L(ashr_2_exittail)
639
640#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
641	cmp	$14, REM
642	jbe	L(ashr_2_exittail)
643#endif
644
645	pxor	%xmm0, %xmm0
646	sub	$0x1000, %edi
647	jmp	L(gobble_ashr_2)
648
649	.p2align 4
650L(ashr_2_exittail):
651	movdqa	(%eax, %ecx), %xmm1
652	psrldq	$2, %xmm0
653	psrldq	$2, %xmm3
654	jmp	L(aftertail)
655
656/*
657 * The following cases will be handled by ashr_3
658 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
659 *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
660 */
661	.p2align 4
662L(ashr_3):
663	mov	$0xffff, %esi
664	pxor	%xmm0, %xmm0
665	movdqa	(%edx), %xmm2
666	movdqa	(%eax), %xmm1
667	pcmpeqb	%xmm1, %xmm0
668	pslldq	$13, %xmm2
669	TOLOWER (%xmm1, %xmm2)
670	pcmpeqb	%xmm1, %xmm2
671	psubb	%xmm0, %xmm2
672	pmovmskb %xmm2, %edi
673	shr	%cl, %esi
674	shr	%cl, %edi
675	sub	%edi, %esi
676	lea	-13(%ecx), %edi
677	jnz	L(less32bytes)
678
679	UPDATE_STRNCMP_COUNTER
680
681	movdqa	(%edx), %xmm3
682	pxor	%xmm0, %xmm0
683	mov	$16, %ecx
684	orl	$3, FLAGS
685	lea	3(%edx), %edi
686	and	$0xfff, %edi
687	sub	$0x1000, %edi
688
689	.p2align 4
690L(loop_ashr_3):
691	add	$16, %edi
692	jg	L(nibble_ashr_3)
693
694L(gobble_ashr_3):
695	movdqa	(%eax, %ecx), %xmm1
696	movdqa	(%edx, %ecx), %xmm2
697	movdqa	%xmm2, %xmm4
698
699	palignr	$3, %xmm3, %xmm2
700	TOLOWER (%xmm1, %xmm2)
701
702	pcmpeqb	%xmm1, %xmm0
703	pcmpeqb	%xmm2, %xmm1
704	psubb	%xmm0, %xmm1
705	pmovmskb %xmm1, %esi
706	sub	$0xffff, %esi
707	jnz	L(exit)
708
709#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
710	cmp	$16, REM
711	lea	-16(REM), REM
712	jbe	L(more8byteseq)
713#endif
714	add	$16, %ecx
715	movdqa	%xmm4, %xmm3
716
717	add	$16, %edi
718	jg	L(nibble_ashr_3)
719
720	movdqa	(%eax, %ecx), %xmm1
721	movdqa	(%edx, %ecx), %xmm2
722	movdqa	%xmm2, %xmm4
723
724	palignr	$3, %xmm3, %xmm2
725	TOLOWER (%xmm1, %xmm2)
726
727	pcmpeqb	%xmm1, %xmm0
728	pcmpeqb	%xmm2, %xmm1
729	psubb	%xmm0, %xmm1
730	pmovmskb %xmm1, %esi
731	sub	$0xffff, %esi
732	jnz	L(exit)
733
734#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
735	cmp	$16, REM
736	lea	-16(REM), REM
737	jbe	L(more8byteseq)
738#endif
739	add	$16, %ecx
740	movdqa	%xmm4, %xmm3
741	jmp	L(loop_ashr_3)
742
743	.p2align 4
744L(nibble_ashr_3):
745	pcmpeqb	%xmm3, %xmm0
746	pmovmskb %xmm0, %esi
747	test	$0xfff8, %esi
748	jnz	L(ashr_3_exittail)
749
750#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
751	cmp	$13, REM
752	jbe	L(ashr_3_exittail)
753#endif
754	pxor	%xmm0, %xmm0
755	sub	$0x1000, %edi
756	jmp	L(gobble_ashr_3)
757
758	.p2align 4
759L(ashr_3_exittail):
760	movdqa	(%eax, %ecx), %xmm1
761	psrldq	$3, %xmm0
762	psrldq	$3, %xmm3
763	jmp	L(aftertail)
764
765/*
766 * The following cases will be handled by ashr_4
767 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
768 *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
769 */
770	.p2align 4
771L(ashr_4):
772	mov	$0xffff, %esi
773	pxor	%xmm0, %xmm0
774	movdqa	(%edx), %xmm2
775	movdqa	(%eax), %xmm1
776	pcmpeqb	%xmm1, %xmm0
777	pslldq	$12, %xmm2
778	TOLOWER (%xmm1, %xmm2)
779	pcmpeqb	%xmm1, %xmm2
780	psubb	%xmm0, %xmm2
781	pmovmskb %xmm2, %edi
782	shr	%cl, %esi
783	shr	%cl, %edi
784	sub	%edi, %esi
785	lea	-12(%ecx), %edi
786	jnz	L(less32bytes)
787
788	UPDATE_STRNCMP_COUNTER
789
790	movdqa	(%edx), %xmm3
791	pxor	%xmm0, %xmm0
792	mov	$16, %ecx
793	orl	$4, FLAGS
794	lea	4(%edx), %edi
795	and	$0xfff, %edi
796	sub	$0x1000, %edi
797
798	.p2align 4
799L(loop_ashr_4):
800	add	$16, %edi
801	jg	L(nibble_ashr_4)
802
803L(gobble_ashr_4):
804	movdqa	(%eax, %ecx), %xmm1
805	movdqa	(%edx, %ecx), %xmm2
806	movdqa	%xmm2, %xmm4
807
808	palignr	$4, %xmm3, %xmm2
809	TOLOWER (%xmm1, %xmm2)
810
811	pcmpeqb	%xmm1, %xmm0
812	pcmpeqb	%xmm2, %xmm1
813	psubb	%xmm0, %xmm1
814	pmovmskb %xmm1, %esi
815	sub	$0xffff, %esi
816	jnz	L(exit)
817
818#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
819	cmp	$16, REM
820	lea	-16(REM), REM
821	jbe	L(more8byteseq)
822#endif
823
824	add	$16, %ecx
825	movdqa	%xmm4, %xmm3
826
827	add	$16, %edi
828	jg	L(nibble_ashr_4)
829
830	movdqa	(%eax, %ecx), %xmm1
831	movdqa	(%edx, %ecx), %xmm2
832	movdqa	%xmm2, %xmm4
833
834	palignr	$4, %xmm3, %xmm2
835	TOLOWER (%xmm1, %xmm2)
836
837	pcmpeqb	%xmm1, %xmm0
838	pcmpeqb	%xmm2, %xmm1
839	psubb	%xmm0, %xmm1
840	pmovmskb %xmm1, %esi
841	sub	$0xffff, %esi
842	jnz	L(exit)
843
844#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
845	cmp	$16, REM
846	lea	-16(REM), REM
847	jbe	L(more8byteseq)
848#endif
849
850	add	$16, %ecx
851	movdqa	%xmm4, %xmm3
852	jmp	L(loop_ashr_4)
853
854	.p2align 4
855L(nibble_ashr_4):
856	pcmpeqb	%xmm3, %xmm0
857	pmovmskb %xmm0, %esi
858	test	$0xfff0, %esi
859	jnz	L(ashr_4_exittail)
860
861#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
862	cmp	$12, REM
863	jbe	L(ashr_4_exittail)
864#endif
865
866	pxor	%xmm0, %xmm0
867	sub	$0x1000, %edi
868	jmp	L(gobble_ashr_4)
869
870	.p2align 4
871L(ashr_4_exittail):
872	movdqa	(%eax, %ecx), %xmm1
873	psrldq	$4, %xmm0
874	psrldq	$4, %xmm3
875	jmp	L(aftertail)
876
877/*
878 * The following cases will be handled by ashr_5
879 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
880 *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
881 */
882	.p2align 4
883L(ashr_5):
884	mov	$0xffff, %esi
885	pxor	%xmm0, %xmm0
886	movdqa	(%edx), %xmm2
887	movdqa	(%eax), %xmm1
888	pcmpeqb	%xmm1, %xmm0
889	pslldq	$11, %xmm2
890	TOLOWER (%xmm1, %xmm2)
891	pcmpeqb	%xmm1, %xmm2
892	psubb	%xmm0, %xmm2
893	pmovmskb %xmm2, %edi
894	shr	%cl, %esi
895	shr	%cl, %edi
896	sub	%edi, %esi
897	lea	-11(%ecx), %edi
898	jnz	L(less32bytes)
899
900	UPDATE_STRNCMP_COUNTER
901
902	movdqa	(%edx), %xmm3
903	pxor	%xmm0, %xmm0
904	mov	$16, %ecx
905	orl	$5, FLAGS
906	lea	5(%edx), %edi
907	and	$0xfff, %edi
908	sub	$0x1000, %edi
909
910	.p2align 4
911L(loop_ashr_5):
912	add	$16, %edi
913	jg	L(nibble_ashr_5)
914
915L(gobble_ashr_5):
916	movdqa	(%eax, %ecx), %xmm1
917	movdqa	(%edx, %ecx), %xmm2
918	movdqa	%xmm2, %xmm4
919
920	palignr	$5, %xmm3, %xmm2
921	TOLOWER (%xmm1, %xmm2)
922
923	pcmpeqb	%xmm1, %xmm0
924	pcmpeqb	%xmm2, %xmm1
925	psubb	%xmm0, %xmm1
926	pmovmskb %xmm1, %esi
927	sub	$0xffff, %esi
928	jnz	L(exit)
929
930#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
931	cmp	$16, REM
932	lea	-16(REM), REM
933	jbe	L(more8byteseq)
934#endif
935	add	$16, %ecx
936	movdqa	%xmm4, %xmm3
937
938	add	$16, %edi
939	jg	L(nibble_ashr_5)
940
941	movdqa	(%eax, %ecx), %xmm1
942	movdqa	(%edx, %ecx), %xmm2
943	movdqa	%xmm2, %xmm4
944
945	palignr	$5, %xmm3, %xmm2
946	TOLOWER (%xmm1, %xmm2)
947
948	pcmpeqb	%xmm1, %xmm0
949	pcmpeqb	%xmm2, %xmm1
950	psubb	%xmm0, %xmm1
951	pmovmskb %xmm1, %esi
952	sub	$0xffff, %esi
953	jnz	L(exit)
954
955#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
956	cmp	$16, REM
957	lea	-16(REM), REM
958	jbe	L(more8byteseq)
959#endif
960	add	$16, %ecx
961	movdqa	%xmm4, %xmm3
962	jmp	L(loop_ashr_5)
963
964	.p2align 4
965L(nibble_ashr_5):
966	pcmpeqb	%xmm3, %xmm0
967	pmovmskb %xmm0, %esi
968	test	$0xffe0, %esi
969	jnz	L(ashr_5_exittail)
970
971#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
972	cmp	$11, REM
973	jbe	L(ashr_5_exittail)
974#endif
975	pxor	%xmm0, %xmm0
976	sub	$0x1000, %edi
977	jmp	L(gobble_ashr_5)
978
979	.p2align 4
980L(ashr_5_exittail):
981	movdqa	(%eax, %ecx), %xmm1
982	psrldq	$5, %xmm0
983	psrldq	$5, %xmm3
984	jmp	L(aftertail)
985
986/*
987 * The following cases will be handled by ashr_6
988 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
989 *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
990 */
991
992	.p2align 4
993L(ashr_6):
994	mov	$0xffff, %esi
995	pxor	%xmm0, %xmm0
996	movdqa	(%edx), %xmm2
997	movdqa	(%eax), %xmm1
998	pcmpeqb	%xmm1, %xmm0
999	pslldq	$10, %xmm2
1000	TOLOWER (%xmm1, %xmm2)
1001	pcmpeqb	%xmm1, %xmm2
1002	psubb	%xmm0, %xmm2
1003	pmovmskb %xmm2, %edi
1004	shr	%cl, %esi
1005	shr	%cl, %edi
1006	sub	%edi, %esi
1007	lea	-10(%ecx), %edi
1008	jnz	L(less32bytes)
1009
1010	UPDATE_STRNCMP_COUNTER
1011
1012	movdqa	(%edx), %xmm3
1013	pxor	%xmm0, %xmm0
1014	mov	$16, %ecx
1015	orl	$6, FLAGS
1016	lea	6(%edx), %edi
1017	and	$0xfff, %edi
1018	sub	$0x1000, %edi
1019
1020	.p2align 4
1021L(loop_ashr_6):
1022	add	$16, %edi
1023	jg	L(nibble_ashr_6)
1024
1025L(gobble_ashr_6):
1026	movdqa	(%eax, %ecx), %xmm1
1027	movdqa	(%edx, %ecx), %xmm2
1028	movdqa	%xmm2, %xmm4
1029
1030	palignr	$6, %xmm3, %xmm2
1031	TOLOWER (%xmm1, %xmm2)
1032
1033	pcmpeqb	%xmm1, %xmm0
1034	pcmpeqb	%xmm2, %xmm1
1035	psubb	%xmm0, %xmm1
1036	pmovmskb %xmm1, %esi
1037	sub	$0xffff, %esi
1038	jnz	L(exit)
1039
1040#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1041	cmp	$16, REM
1042	lea	-16(REM), REM
1043	jbe	L(more8byteseq)
1044#endif
1045
1046	add	$16, %ecx
1047	movdqa	%xmm4, %xmm3
1048
1049	add	$16, %edi
1050	jg	L(nibble_ashr_6)
1051
1052	movdqa	(%eax, %ecx), %xmm1
1053	movdqa	(%edx, %ecx), %xmm2
1054	movdqa	%xmm2, %xmm4
1055
1056	palignr	$6, %xmm3, %xmm2
1057	TOLOWER (%xmm1, %xmm2)
1058
1059	pcmpeqb	%xmm1, %xmm0
1060	pcmpeqb	%xmm2, %xmm1
1061	psubb	%xmm0, %xmm1
1062	pmovmskb %xmm1, %esi
1063	sub	$0xffff, %esi
1064	jnz	L(exit)
1065#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1066	cmp	$16, REM
1067	lea	-16(REM), REM
1068	jbe	L(more8byteseq)
1069#endif
1070
1071	add	$16, %ecx
1072	movdqa	%xmm4, %xmm3
1073	jmp	L(loop_ashr_6)
1074
1075	.p2align 4
1076L(nibble_ashr_6):
1077	pcmpeqb	%xmm3, %xmm0
1078	pmovmskb %xmm0, %esi
1079	test	$0xffc0, %esi
1080	jnz	L(ashr_6_exittail)
1081
1082#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1083	cmp	$10, REM
1084	jbe	L(ashr_6_exittail)
1085#endif
1086	pxor	%xmm0, %xmm0
1087	sub	$0x1000, %edi
1088	jmp	L(gobble_ashr_6)
1089
1090	.p2align 4
1091L(ashr_6_exittail):
1092	movdqa	(%eax, %ecx), %xmm1
1093	psrldq	$6, %xmm0
1094	psrldq	$6, %xmm3
1095	jmp	L(aftertail)
1096
1097/*
1098 * The following cases will be handled by ashr_7
1099 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1100 *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
1101 */
1102
1103	.p2align 4
1104L(ashr_7):
1105	mov	$0xffff, %esi
1106	pxor	%xmm0, %xmm0
1107	movdqa	(%edx), %xmm2
1108	movdqa	(%eax), %xmm1
1109	pcmpeqb	%xmm1, %xmm0
1110	pslldq	$9, %xmm2
1111	TOLOWER (%xmm1, %xmm2)
1112	pcmpeqb	%xmm1, %xmm2
1113	psubb	%xmm0, %xmm2
1114	pmovmskb %xmm2, %edi
1115	shr	%cl, %esi
1116	shr	%cl, %edi
1117	sub	%edi, %esi
1118	lea	-9(%ecx), %edi
1119	jnz	L(less32bytes)
1120
1121	UPDATE_STRNCMP_COUNTER
1122
1123	movdqa	(%edx), %xmm3
1124	pxor	%xmm0, %xmm0
1125	mov	$16, %ecx
1126	orl	$7, FLAGS
1127	lea	8(%edx), %edi
1128	and	$0xfff, %edi
1129	sub	$0x1000, %edi
1130
1131	.p2align 4
1132L(loop_ashr_7):
1133	add	$16, %edi
1134	jg	L(nibble_ashr_7)
1135
1136L(gobble_ashr_7):
1137	movdqa	(%eax, %ecx), %xmm1
1138	movdqa	(%edx, %ecx), %xmm2
1139	movdqa	%xmm2, %xmm4
1140
1141	palignr	$7, %xmm3, %xmm2
1142	TOLOWER (%xmm1, %xmm2)
1143
1144	pcmpeqb	%xmm1, %xmm0
1145	pcmpeqb	%xmm2, %xmm1
1146	psubb	%xmm0, %xmm1
1147	pmovmskb %xmm1, %esi
1148	sub	$0xffff, %esi
1149	jnz	L(exit)
1150
1151#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1152	cmp	$16, REM
1153	lea	-16(REM), REM
1154	jbe	L(more8byteseq)
1155#endif
1156
1157	add	$16, %ecx
1158	movdqa	%xmm4, %xmm3
1159
1160	add	$16, %edi
1161	jg	L(nibble_ashr_7)
1162
1163	movdqa	(%eax, %ecx), %xmm1
1164	movdqa	(%edx, %ecx), %xmm2
1165	movdqa	%xmm2, %xmm4
1166
1167	palignr	$7, %xmm3, %xmm2
1168	TOLOWER (%xmm1, %xmm2)
1169
1170	pcmpeqb	%xmm1, %xmm0
1171	pcmpeqb	%xmm2, %xmm1
1172	psubb	%xmm0, %xmm1
1173	pmovmskb %xmm1, %esi
1174	sub	$0xffff, %esi
1175	jnz	L(exit)
1176
1177#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1178	cmp	$16, REM
1179	lea	-16(REM), REM
1180	jbe	L(more8byteseq)
1181#endif
1182
1183	add	$16, %ecx
1184	movdqa	%xmm4, %xmm3
1185	jmp	L(loop_ashr_7)
1186
1187	.p2align 4
1188L(nibble_ashr_7):
1189	pcmpeqb	%xmm3, %xmm0
1190	pmovmskb %xmm0, %esi
1191	test	$0xff80, %esi
1192	jnz	L(ashr_7_exittail)
1193
1194#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1195	cmp	$9, REM
1196	jbe	L(ashr_7_exittail)
1197#endif
1198	pxor	%xmm0, %xmm0
1199	pxor	%xmm0, %xmm0
1200	sub	$0x1000, %edi
1201	jmp	L(gobble_ashr_7)
1202
1203	.p2align 4
1204L(ashr_7_exittail):
1205	movdqa	(%eax, %ecx), %xmm1
1206	psrldq	$7, %xmm0
1207	psrldq	$7, %xmm3
1208	jmp	L(aftertail)
1209
1210/*
1211 * The following cases will be handled by ashr_8
1212 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1213 *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
1214 */
1215	.p2align 4
1216L(ashr_8):
1217	mov	$0xffff, %esi
1218	pxor	%xmm0, %xmm0
1219	movdqa	(%edx), %xmm2
1220	movdqa	(%eax), %xmm1
1221	pcmpeqb	%xmm1, %xmm0
1222	pslldq	$8, %xmm2
1223	TOLOWER (%xmm1, %xmm2)
1224	pcmpeqb	%xmm1, %xmm2
1225	psubb	%xmm0, %xmm2
1226	pmovmskb %xmm2, %edi
1227	shr	%cl, %esi
1228	shr	%cl, %edi
1229	sub	%edi, %esi
1230	lea	-8(%ecx), %edi
1231	jnz	L(less32bytes)
1232
1233	UPDATE_STRNCMP_COUNTER
1234
1235	movdqa	(%edx), %xmm3
1236	pxor	%xmm0, %xmm0
1237	mov	$16, %ecx
1238	orl	$8, FLAGS
1239	lea	8(%edx), %edi
1240	and	$0xfff, %edi
1241	sub	$0x1000, %edi
1242
1243	.p2align 4
1244L(loop_ashr_8):
1245	add	$16, %edi
1246	jg	L(nibble_ashr_8)
1247
1248L(gobble_ashr_8):
1249	movdqa	(%eax, %ecx), %xmm1
1250	movdqa	(%edx, %ecx), %xmm2
1251	movdqa	%xmm2, %xmm4
1252
1253	palignr	$8, %xmm3, %xmm2
1254	TOLOWER (%xmm1, %xmm2)
1255
1256	pcmpeqb	%xmm1, %xmm0
1257	pcmpeqb	%xmm2, %xmm1
1258	psubb	%xmm0, %xmm1
1259	pmovmskb %xmm1, %esi
1260	sub	$0xffff, %esi
1261	jnz	L(exit)
1262
1263#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1264	cmp	$16, REM
1265	lea	-16(REM), REM
1266	jbe	L(more8byteseq)
1267#endif
1268	add	$16, %ecx
1269	movdqa	%xmm4, %xmm3
1270
1271	add	$16, %edi
1272	jg	L(nibble_ashr_8)
1273
1274	movdqa	(%eax, %ecx), %xmm1
1275	movdqa	(%edx, %ecx), %xmm2
1276	movdqa	%xmm2, %xmm4
1277
1278	palignr	$8, %xmm3, %xmm2
1279	TOLOWER (%xmm1, %xmm2)
1280
1281	pcmpeqb	%xmm1, %xmm0
1282	pcmpeqb	%xmm2, %xmm1
1283	psubb	%xmm0, %xmm1
1284	pmovmskb %xmm1, %esi
1285	sub	$0xffff, %esi
1286	jnz	L(exit)
1287
1288#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1289	cmp	$16, REM
1290	lea	-16(REM), REM
1291	jbe	L(more8byteseq)
1292#endif
1293	add	$16, %ecx
1294	movdqa	%xmm4, %xmm3
1295	jmp	L(loop_ashr_8)
1296
1297	.p2align 4
1298L(nibble_ashr_8):
1299	pcmpeqb	%xmm3, %xmm0
1300	pmovmskb %xmm0, %esi
1301	test	$0xff00, %esi
1302	jnz	L(ashr_8_exittail)
1303
1304#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1305	cmp	$8, REM
1306	jbe	L(ashr_8_exittail)
1307#endif
1308	pxor	%xmm0, %xmm0
1309	pxor	%xmm0, %xmm0
1310	sub	$0x1000, %edi
1311	jmp	L(gobble_ashr_8)
1312
1313	.p2align 4
1314L(ashr_8_exittail):
1315	movdqa	(%eax, %ecx), %xmm1
1316	psrldq	$8, %xmm0
1317	psrldq	$8, %xmm3
1318	jmp	L(aftertail)
1319
1320/*
1321 * The following cases will be handled by ashr_9
1322 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1323 *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
1324 */
1325	.p2align 4
1326L(ashr_9):
1327	mov	$0xffff, %esi
1328	pxor	%xmm0, %xmm0
1329	movdqa	(%edx), %xmm2
1330	movdqa	(%eax), %xmm1
1331	pcmpeqb	%xmm1, %xmm0
1332	pslldq	$7, %xmm2
1333	TOLOWER (%xmm1, %xmm2)
1334	pcmpeqb	%xmm1, %xmm2
1335	psubb	%xmm0, %xmm2
1336	pmovmskb %xmm2, %edi
1337	shr	%cl, %esi
1338	shr	%cl, %edi
1339	sub	%edi, %esi
1340	lea	-7(%ecx), %edi
1341	jnz	L(less32bytes)
1342
1343	UPDATE_STRNCMP_COUNTER
1344
1345	movdqa	(%edx), %xmm3
1346	pxor	%xmm0, %xmm0
1347	mov	$16, %ecx
1348	orl	$9, FLAGS
1349	lea	9(%edx), %edi
1350	and	$0xfff, %edi
1351	sub	$0x1000, %edi
1352
1353	.p2align 4
1354L(loop_ashr_9):
1355	add	$16, %edi
1356	jg	L(nibble_ashr_9)
1357
1358L(gobble_ashr_9):
1359	movdqa	(%eax, %ecx), %xmm1
1360	movdqa	(%edx, %ecx), %xmm2
1361	movdqa	%xmm2, %xmm4
1362
1363	palignr	$9, %xmm3, %xmm2
1364	TOLOWER (%xmm1, %xmm2)
1365
1366	pcmpeqb	%xmm1, %xmm0
1367	pcmpeqb	%xmm2, %xmm1
1368	psubb	%xmm0, %xmm1
1369	pmovmskb %xmm1, %esi
1370	sub	$0xffff, %esi
1371	jnz	L(exit)
1372
1373#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1374	cmp	$16, REM
1375	lea	-16(REM), REM
1376	jbe	L(more8byteseq)
1377#endif
1378	add	$16, %ecx
1379	movdqa	%xmm4, %xmm3
1380
1381	add	$16, %edi
1382	jg	L(nibble_ashr_9)
1383
1384	movdqa	(%eax, %ecx), %xmm1
1385	movdqa	(%edx, %ecx), %xmm2
1386	movdqa	%xmm2, %xmm4
1387
1388	palignr	$9, %xmm3, %xmm2
1389	TOLOWER (%xmm1, %xmm2)
1390
1391	pcmpeqb	%xmm1, %xmm0
1392	pcmpeqb	%xmm2, %xmm1
1393	psubb	%xmm0, %xmm1
1394	pmovmskb %xmm1, %esi
1395	sub	$0xffff, %esi
1396	jnz	L(exit)
1397
1398#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1399	cmp	$16, REM
1400	lea	-16(REM), REM
1401	jbe	L(more8byteseq)
1402#endif
1403	add	$16, %ecx
1404	movdqa	%xmm4, %xmm3
1405	jmp	L(loop_ashr_9)
1406
1407	.p2align 4
1408L(nibble_ashr_9):
1409	pcmpeqb	%xmm3, %xmm0
1410	pmovmskb %xmm0, %esi
1411	test	$0xfe00, %esi
1412	jnz	L(ashr_9_exittail)
1413
1414#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1415	cmp	$7, REM
1416	jbe	L(ashr_9_exittail)
1417#endif
1418	pxor	%xmm0, %xmm0
1419	sub	$0x1000, %edi
1420	jmp	L(gobble_ashr_9)
1421
1422	.p2align 4
1423L(ashr_9_exittail):
1424	movdqa	(%eax, %ecx), %xmm1
1425	psrldq	$9, %xmm0
1426	psrldq	$9, %xmm3
1427	jmp	L(aftertail)
1428
1429/*
1430 * The following cases will be handled by ashr_10
1431 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1432 *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
1433 */
1434	.p2align 4
1435L(ashr_10):
1436	mov	$0xffff, %esi
1437	pxor	%xmm0, %xmm0
1438	movdqa	(%edx), %xmm2
1439	movdqa	(%eax), %xmm1
1440	pcmpeqb	%xmm1, %xmm0
1441	pslldq	$6, %xmm2
1442	TOLOWER (%xmm1, %xmm2)
1443	pcmpeqb	%xmm1, %xmm2
1444	psubb	%xmm0, %xmm2
1445	pmovmskb %xmm2, %edi
1446	shr	%cl, %esi
1447	shr	%cl, %edi
1448	sub	%edi, %esi
1449	lea	-6(%ecx), %edi
1450	jnz	L(less32bytes)
1451
1452	UPDATE_STRNCMP_COUNTER
1453
1454	movdqa	(%edx), %xmm3
1455	pxor	%xmm0, %xmm0
1456	mov	$16, %ecx
1457	orl	$10, FLAGS
1458	lea	10(%edx), %edi
1459	and	$0xfff, %edi
1460	sub	$0x1000, %edi
1461
1462	.p2align 4
1463L(loop_ashr_10):
1464	add	$16, %edi
1465	jg	L(nibble_ashr_10)
1466
1467L(gobble_ashr_10):
1468	movdqa	(%eax, %ecx), %xmm1
1469	movdqa	(%edx, %ecx), %xmm2
1470	movdqa	%xmm2, %xmm4
1471
1472	palignr	$10, %xmm3, %xmm2
1473	TOLOWER (%xmm1, %xmm2)
1474
1475	pcmpeqb	%xmm1, %xmm0
1476	pcmpeqb	%xmm2, %xmm1
1477	psubb	%xmm0, %xmm1
1478	pmovmskb %xmm1, %esi
1479	sub	$0xffff, %esi
1480	jnz	L(exit)
1481
1482#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1483	cmp	$16, REM
1484	lea	-16(REM), REM
1485	jbe	L(more8byteseq)
1486#endif
1487	add	$16, %ecx
1488	movdqa	%xmm4, %xmm3
1489
1490	add	$16, %edi
1491	jg	L(nibble_ashr_10)
1492
1493	movdqa	(%eax, %ecx), %xmm1
1494	movdqa	(%edx, %ecx), %xmm2
1495	movdqa	%xmm2, %xmm4
1496
1497	palignr	$10, %xmm3, %xmm2
1498	TOLOWER (%xmm1, %xmm2)
1499
1500	pcmpeqb	%xmm1, %xmm0
1501	pcmpeqb	%xmm2, %xmm1
1502	psubb	%xmm0, %xmm1
1503	pmovmskb %xmm1, %esi
1504	sub	$0xffff, %esi
1505	jnz	L(exit)
1506
1507#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1508	cmp	$16, REM
1509	lea	-16(REM), REM
1510	jbe	L(more8byteseq)
1511#endif
1512	add	$16, %ecx
1513	movdqa	%xmm4, %xmm3
1514	jmp	L(loop_ashr_10)
1515
1516	.p2align 4
1517L(nibble_ashr_10):
1518	pcmpeqb	%xmm3, %xmm0
1519	pmovmskb %xmm0, %esi
1520	test	$0xfc00, %esi
1521	jnz	L(ashr_10_exittail)
1522
1523#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1524	cmp	$6, REM
1525	jbe	L(ashr_10_exittail)
1526#endif
1527	pxor	%xmm0, %xmm0
1528	sub	$0x1000, %edi
1529	jmp	L(gobble_ashr_10)
1530
1531	.p2align 4
1532L(ashr_10_exittail):
1533	movdqa	(%eax, %ecx), %xmm1
1534	psrldq	$10, %xmm0
1535	psrldq	$10, %xmm3
1536	jmp	L(aftertail)
1537
1538/*
1539 * The following cases will be handled by ashr_11
1540 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1541 *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
1542 */
1543	.p2align 4
1544L(ashr_11):
1545	mov	$0xffff, %esi
1546	pxor	%xmm0, %xmm0
1547	movdqa	(%edx), %xmm2
1548	movdqa	(%eax), %xmm1
1549	pcmpeqb	%xmm1, %xmm0
1550	pslldq	$5, %xmm2
1551	TOLOWER (%xmm1, %xmm2)
1552	pcmpeqb	%xmm1, %xmm2
1553	psubb	%xmm0, %xmm2
1554	pmovmskb %xmm2, %edi
1555	shr	%cl, %esi
1556	shr	%cl, %edi
1557	sub	%edi, %esi
1558	lea	-5(%ecx), %edi
1559	jnz	L(less32bytes)
1560
1561	UPDATE_STRNCMP_COUNTER
1562
1563	movdqa	(%edx), %xmm3
1564	pxor	%xmm0, %xmm0
1565	mov	$16, %ecx
1566	orl	$11, FLAGS
1567	lea	11(%edx), %edi
1568	and	$0xfff, %edi
1569	sub	$0x1000, %edi
1570
1571	.p2align 4
1572L(loop_ashr_11):
1573	add	$16, %edi
1574	jg	L(nibble_ashr_11)
1575
1576L(gobble_ashr_11):
1577	movdqa	(%eax, %ecx), %xmm1
1578	movdqa	(%edx, %ecx), %xmm2
1579	movdqa	%xmm2, %xmm4
1580
1581	palignr	$11, %xmm3, %xmm2
1582	TOLOWER (%xmm1, %xmm2)
1583
1584	pcmpeqb	%xmm1, %xmm0
1585	pcmpeqb	%xmm2, %xmm1
1586	psubb	%xmm0, %xmm1
1587	pmovmskb %xmm1, %esi
1588	sub	$0xffff, %esi
1589	jnz	L(exit)
1590
1591#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1592	cmp	$16, REM
1593	lea	-16(REM), REM
1594	jbe	L(more8byteseq)
1595#endif
1596	add	$16, %ecx
1597	movdqa	%xmm4, %xmm3
1598
1599	add	$16, %edi
1600	jg	L(nibble_ashr_11)
1601
1602	movdqa	(%eax, %ecx), %xmm1
1603	movdqa	(%edx, %ecx), %xmm2
1604	movdqa	%xmm2, %xmm4
1605
1606	palignr	$11, %xmm3, %xmm2
1607	TOLOWER (%xmm1, %xmm2)
1608
1609	pcmpeqb	%xmm1, %xmm0
1610	pcmpeqb	%xmm2, %xmm1
1611	psubb	%xmm0, %xmm1
1612	pmovmskb %xmm1, %esi
1613	sub	$0xffff, %esi
1614	jnz	L(exit)
1615
1616#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1617	cmp	$16, REM
1618	lea	-16(REM), REM
1619	jbe	L(more8byteseq)
1620#endif
1621	add	$16, %ecx
1622	movdqa	%xmm4, %xmm3
1623	jmp	L(loop_ashr_11)
1624
1625	.p2align 4
1626L(nibble_ashr_11):
1627	pcmpeqb	%xmm3, %xmm0
1628	pmovmskb %xmm0, %esi
1629	test	$0xf800, %esi
1630	jnz	L(ashr_11_exittail)
1631
1632#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1633	cmp	$5, REM
1634	jbe	L(ashr_11_exittail)
1635#endif
1636	pxor	%xmm0, %xmm0
1637	sub	$0x1000, %edi
1638	jmp	L(gobble_ashr_11)
1639
1640	.p2align 4
1641L(ashr_11_exittail):
1642	movdqa	(%eax, %ecx), %xmm1
1643	psrldq	$11, %xmm0
1644	psrldq	$11, %xmm3
1645	jmp	L(aftertail)
1646
1647/*
1648 * The following cases will be handled by ashr_12
1649 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1650 *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
1651 */
1652	.p2align 4
1653L(ashr_12):
1654	mov	$0xffff, %esi
1655	pxor	%xmm0, %xmm0
1656	movdqa	(%edx), %xmm2
1657	movdqa	(%eax), %xmm1
1658	pcmpeqb	%xmm1, %xmm0
1659	pslldq	$4, %xmm2
1660	TOLOWER (%xmm1, %xmm2)
1661	pcmpeqb	%xmm1, %xmm2
1662	psubb	%xmm0, %xmm2
1663	pmovmskb %xmm2, %edi
1664	shr	%cl, %esi
1665	shr	%cl, %edi
1666	sub	%edi, %esi
1667	lea	-4(%ecx), %edi
1668	jnz	L(less32bytes)
1669
1670	UPDATE_STRNCMP_COUNTER
1671
1672	movdqa	(%edx), %xmm3
1673	pxor	%xmm0, %xmm0
1674	mov	$16, %ecx
1675	orl	$12, FLAGS
1676	lea	12(%edx), %edi
1677	and	$0xfff, %edi
1678	sub	$0x1000, %edi
1679
1680	.p2align 4
1681L(loop_ashr_12):
1682	add	$16, %edi
1683	jg	L(nibble_ashr_12)
1684
1685L(gobble_ashr_12):
1686	movdqa	(%eax, %ecx), %xmm1
1687	movdqa	(%edx, %ecx), %xmm2
1688	movdqa	%xmm2, %xmm4
1689
1690	palignr	$12, %xmm3, %xmm2
1691	TOLOWER (%xmm1, %xmm2)
1692
1693	pcmpeqb	%xmm1, %xmm0
1694	pcmpeqb	%xmm2, %xmm1
1695	psubb	%xmm0, %xmm1
1696	pmovmskb %xmm1, %esi
1697	sub	$0xffff, %esi
1698	jnz	L(exit)
1699
1700#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1701	cmp	$16, REM
1702	lea	-16(REM), REM
1703	jbe	L(more8byteseq)
1704#endif
1705
1706	add	$16, %ecx
1707	movdqa	%xmm4, %xmm3
1708
1709	add	$16, %edi
1710	jg	L(nibble_ashr_12)
1711
1712	movdqa	(%eax, %ecx), %xmm1
1713	movdqa	(%edx, %ecx), %xmm2
1714	movdqa	%xmm2, %xmm4
1715
1716	palignr	$12, %xmm3, %xmm2
1717	TOLOWER (%xmm1, %xmm2)
1718
1719	pcmpeqb	%xmm1, %xmm0
1720	pcmpeqb	%xmm2, %xmm1
1721	psubb	%xmm0, %xmm1
1722	pmovmskb %xmm1, %esi
1723	sub	$0xffff, %esi
1724	jnz	L(exit)
1725
1726#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1727	cmp	$16, REM
1728	lea	-16(REM), REM
1729	jbe	L(more8byteseq)
1730#endif
1731	add	$16, %ecx
1732	movdqa	%xmm4, %xmm3
1733	jmp	L(loop_ashr_12)
1734
1735	.p2align 4
1736L(nibble_ashr_12):
1737	pcmpeqb	%xmm3, %xmm0
1738	pmovmskb %xmm0, %esi
1739	test	$0xf000, %esi
1740	jnz	L(ashr_12_exittail)
1741
1742#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1743	cmp	$4, REM
1744	jbe	L(ashr_12_exittail)
1745#endif
1746	pxor	%xmm0, %xmm0
1747	sub	$0x1000, %edi
1748	jmp	L(gobble_ashr_12)
1749
1750	.p2align 4
1751L(ashr_12_exittail):
1752	movdqa	(%eax, %ecx), %xmm1
1753	psrldq	$12, %xmm0
1754	psrldq	$12, %xmm3
1755	jmp	L(aftertail)
1756
1757/*
1758 * The following cases will be handled by ashr_13
1759 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1760 *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
1761 */
1762	.p2align 4
1763L(ashr_13):
1764	mov	$0xffff, %esi
1765	pxor	%xmm0, %xmm0
1766	movdqa	(%edx), %xmm2
1767	movdqa	(%eax), %xmm1
1768	pcmpeqb	%xmm1, %xmm0
1769	pslldq	$3, %xmm2
1770	TOLOWER (%xmm1, %xmm2)
1771	pcmpeqb	%xmm1, %xmm2
1772	psubb	%xmm0, %xmm2
1773	pmovmskb %xmm2, %edi
1774	shr	%cl, %esi
1775	shr	%cl, %edi
1776	sub	%edi, %esi
1777	lea	-3(%ecx), %edi
1778	jnz	L(less32bytes)
1779
1780	UPDATE_STRNCMP_COUNTER
1781
1782	movdqa	(%edx), %xmm3
1783	pxor	%xmm0, %xmm0
1784	mov	$16, %ecx
1785	orl	$13, FLAGS
1786	lea	13(%edx), %edi
1787	and	$0xfff, %edi
1788	sub	$0x1000, %edi
1789
1790	.p2align 4
1791L(loop_ashr_13):
1792	add	$16, %edi
1793	jg	L(nibble_ashr_13)
1794
1795L(gobble_ashr_13):
1796	movdqa	(%eax, %ecx), %xmm1
1797	movdqa	(%edx, %ecx), %xmm2
1798	movdqa	%xmm2, %xmm4
1799
1800	palignr	$13, %xmm3, %xmm2
1801	TOLOWER (%xmm1, %xmm2)
1802
1803	pcmpeqb	%xmm1, %xmm0
1804	pcmpeqb	%xmm2, %xmm1
1805	psubb	%xmm0, %xmm1
1806	pmovmskb %xmm1, %esi
1807	sub	$0xffff, %esi
1808	jnz	L(exit)
1809
1810#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1811	cmp	$16, REM
1812	lea	-16(REM), REM
1813	jbe	L(more8byteseq)
1814#endif
1815	add	$16, %ecx
1816	movdqa	%xmm4, %xmm3
1817
1818	add	$16, %edi
1819	jg	L(nibble_ashr_13)
1820
1821	movdqa	(%eax, %ecx), %xmm1
1822	movdqa	(%edx, %ecx), %xmm2
1823	movdqa	%xmm2, %xmm4
1824
1825	palignr	$13, %xmm3, %xmm2
1826	TOLOWER (%xmm1, %xmm2)
1827
1828	pcmpeqb	%xmm1, %xmm0
1829	pcmpeqb	%xmm2, %xmm1
1830	psubb	%xmm0, %xmm1
1831	pmovmskb %xmm1, %esi
1832	sub	$0xffff, %esi
1833	jnz	L(exit)
1834
1835#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1836	cmp	$16, REM
1837	lea	-16(REM), REM
1838	jbe	L(more8byteseq)
1839#endif
1840	add	$16, %ecx
1841	movdqa	%xmm4, %xmm3
1842	jmp	L(loop_ashr_13)
1843
1844	.p2align 4
1845L(nibble_ashr_13):
1846	pcmpeqb	%xmm3, %xmm0
1847	pmovmskb %xmm0, %esi
1848	test	$0xe000, %esi
1849	jnz	L(ashr_13_exittail)
1850
1851#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1852	cmp	$3, REM
1853	jbe	L(ashr_13_exittail)
1854#endif
1855	pxor	%xmm0, %xmm0
1856	sub	$0x1000, %edi
1857	jmp	L(gobble_ashr_13)
1858
1859	.p2align 4
1860L(ashr_13_exittail):
1861	movdqa	(%eax, %ecx), %xmm1
1862	psrldq	$13, %xmm0
1863	psrldq	$13, %xmm3
1864	jmp	L(aftertail)
1865
1866/*
1867 * The following cases will be handled by ashr_14
1868 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1869 *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
1870 */
1871	.p2align 4
1872L(ashr_14):
1873	mov	$0xffff, %esi
1874	pxor	%xmm0, %xmm0
1875	movdqa	(%edx), %xmm2
1876	movdqa	(%eax), %xmm1
1877	pcmpeqb	%xmm1, %xmm0
1878	pslldq	$2, %xmm2
1879	TOLOWER (%xmm1, %xmm2)
1880	pcmpeqb	%xmm1, %xmm2
1881	psubb	%xmm0, %xmm2
1882	pmovmskb %xmm2, %edi
1883	shr	%cl, %esi
1884	shr	%cl, %edi
1885	sub	%edi, %esi
1886	lea	-2(%ecx), %edi
1887	jnz	L(less32bytes)
1888
1889	UPDATE_STRNCMP_COUNTER
1890
1891	movdqa	(%edx), %xmm3
1892	pxor	%xmm0, %xmm0
1893	mov	$16, %ecx
1894	orl	$14, FLAGS
1895	lea	14(%edx), %edi
1896	and	$0xfff, %edi
1897	sub	$0x1000, %edi
1898
1899	.p2align 4
1900L(loop_ashr_14):
1901	add	$16, %edi
1902	jg	L(nibble_ashr_14)
1903
1904L(gobble_ashr_14):
1905	movdqa	(%eax, %ecx), %xmm1
1906	movdqa	(%edx, %ecx), %xmm2
1907	movdqa	%xmm2, %xmm4
1908
1909	palignr	$14, %xmm3, %xmm2
1910	TOLOWER (%xmm1, %xmm2)
1911
1912	pcmpeqb	%xmm1, %xmm0
1913	pcmpeqb	%xmm2, %xmm1
1914	psubb	%xmm0, %xmm1
1915	pmovmskb %xmm1, %esi
1916	sub	$0xffff, %esi
1917	jnz	L(exit)
1918
1919#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1920	cmp	$16, REM
1921	lea	-16(REM), REM
1922	jbe	L(more8byteseq)
1923#endif
1924	add	$16, %ecx
1925	movdqa	%xmm4, %xmm3
1926
1927	add	$16, %edi
1928	jg	L(nibble_ashr_14)
1929
1930	movdqa	(%eax, %ecx), %xmm1
1931	movdqa	(%edx, %ecx), %xmm2
1932	movdqa	%xmm2, %xmm4
1933
1934	palignr	$14, %xmm3, %xmm2
1935	TOLOWER (%xmm1, %xmm2)
1936
1937	pcmpeqb	%xmm1, %xmm0
1938	pcmpeqb	%xmm2, %xmm1
1939	psubb	%xmm0, %xmm1
1940	pmovmskb %xmm1, %esi
1941	sub	$0xffff, %esi
1942	jnz	L(exit)
1943
1944#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1945	cmp	$16, REM
1946	lea	-16(REM), REM
1947	jbe	L(more8byteseq)
1948#endif
1949	add	$16, %ecx
1950	movdqa	%xmm4, %xmm3
1951	jmp	L(loop_ashr_14)
1952
1953	.p2align 4
1954L(nibble_ashr_14):
1955	pcmpeqb	%xmm3, %xmm0
1956	pmovmskb %xmm0, %esi
1957	test	$0xc000, %esi
1958	jnz	L(ashr_14_exittail)
1959
1960#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1961	cmp	$2, REM
1962	jbe	L(ashr_14_exittail)
1963#endif
1964	pxor	%xmm0, %xmm0
1965	sub	$0x1000, %edi
1966	jmp	L(gobble_ashr_14)
1967
1968	.p2align 4
1969L(ashr_14_exittail):
1970	movdqa	(%eax, %ecx), %xmm1
1971	psrldq	$14, %xmm0
1972	psrldq	$14, %xmm3
1973	jmp	L(aftertail)
1974
1975/*
1976 * The following cases will be handled by ashr_14
1977 * ecx(offset of esi)  eax(offset of edi)   relative offset	corresponding case
1978 *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
1979 */
1980
1981	.p2align 4
1982L(ashr_15):
1983	mov	$0xffff, %esi
1984	pxor	%xmm0, %xmm0
1985	movdqa	(%edx), %xmm2
1986	movdqa	(%eax), %xmm1
1987	pcmpeqb	%xmm1, %xmm0
1988	pslldq	$1, %xmm2
1989	TOLOWER (%xmm1, %xmm2)
1990	pcmpeqb	%xmm1, %xmm2
1991	psubb	%xmm0, %xmm2
1992	pmovmskb %xmm2, %edi
1993	shr	%cl, %esi
1994	shr	%cl, %edi
1995	sub	%edi, %esi
1996	lea	-1(%ecx), %edi
1997	jnz	L(less32bytes)
1998
1999	UPDATE_STRNCMP_COUNTER
2000
2001	movdqa	(%edx), %xmm3
2002	pxor	%xmm0, %xmm0
2003	mov	$16, %ecx
2004	orl	$15, FLAGS
2005	lea	15(%edx), %edi
2006	and	$0xfff, %edi
2007	sub	$0x1000, %edi
2008
2009	.p2align 4
2010L(loop_ashr_15):
2011	add	$16, %edi
2012	jg	L(nibble_ashr_15)
2013
2014L(gobble_ashr_15):
2015	movdqa	(%eax, %ecx), %xmm1
2016	movdqa	(%edx, %ecx), %xmm2
2017	movdqa	%xmm2, %xmm4
2018
2019	palignr	$15, %xmm3, %xmm2
2020	TOLOWER (%xmm1, %xmm2)
2021
2022	pcmpeqb	%xmm1, %xmm0
2023	pcmpeqb	%xmm2, %xmm1
2024	psubb	%xmm0, %xmm1
2025	pmovmskb %xmm1, %esi
2026	sub	$0xffff, %esi
2027	jnz	L(exit)
2028
2029#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2030	cmp	$16, REM
2031	lea	-16(REM), REM
2032	jbe	L(more8byteseq)
2033#endif
2034	add	$16, %ecx
2035	movdqa	%xmm4, %xmm3
2036
2037	add	$16, %edi
2038	jg	L(nibble_ashr_15)
2039
2040	movdqa	(%eax, %ecx), %xmm1
2041	movdqa	(%edx, %ecx), %xmm2
2042	movdqa	%xmm2, %xmm4
2043
2044	palignr	$15, %xmm3, %xmm2
2045	TOLOWER (%xmm1, %xmm2)
2046
2047	pcmpeqb	%xmm1, %xmm0
2048	pcmpeqb	%xmm2, %xmm1
2049	psubb	%xmm0, %xmm1
2050	pmovmskb %xmm1, %esi
2051	sub	$0xffff, %esi
2052	jnz	L(exit)
2053
2054#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2055	cmp	$16, REM
2056	lea	-16(REM), REM
2057	jbe	L(more8byteseq)
2058#endif
2059	add	$16, %ecx
2060	movdqa	%xmm4, %xmm3
2061	jmp	L(loop_ashr_15)
2062
2063	.p2align 4
2064L(nibble_ashr_15):
2065	pcmpeqb	%xmm3, %xmm0
2066	pmovmskb %xmm0, %esi
2067	test	$0x8000, %esi
2068	jnz	L(ashr_15_exittail)
2069
2070#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2071	cmp	$1, REM
2072	jbe	L(ashr_15_exittail)
2073#endif
2074	pxor	%xmm0, %xmm0
2075	sub	$0x1000, %edi
2076	jmp	L(gobble_ashr_15)
2077
2078	.p2align 4
2079L(ashr_15_exittail):
2080	movdqa	(%eax, %ecx), %xmm1
2081	psrldq	$15, %xmm0
2082	psrldq	$15, %xmm3
2083	jmp	L(aftertail)
2084
2085	.p2align 4
2086L(aftertail):
2087	TOLOWER (%xmm1, %xmm3)
2088	pcmpeqb	%xmm3, %xmm1
2089	psubb	%xmm0, %xmm1
2090	pmovmskb %xmm1, %esi
2091	not	%esi
2092L(exit):
2093	mov	FLAGS, %edi
2094	and	$0x1f, %edi
2095	lea	-16(%edi, %ecx), %edi
2096L(less32bytes):
2097	add	%edi, %edx
2098	add	%ecx, %eax
2099	testl	$0x20, FLAGS
2100	jz	L(ret2)
2101	xchg	%eax, %edx
2102
2103	.p2align 4
2104L(ret2):
2105	mov	%esi, %ecx
2106#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2107	addl	$4, %esp
2108	cfi_adjust_cfa_offset (-4)
2109#endif
2110	POP	(%esi)
2111	POP	(%edi)
2112#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2113	POP	(FLAGS)
2114#endif
2115L(less16bytes):
2116	test	%cl, %cl
2117	jz	L(2next_8_bytes)
2118
2119	test	$0x01, %cl
2120	jnz	L(Byte0)
2121
2122	test	$0x02, %cl
2123	jnz	L(Byte1)
2124
2125	test	$0x04, %cl
2126	jnz	L(Byte2)
2127
2128	test	$0x08, %cl
2129	jnz	L(Byte3)
2130
2131	test	$0x10, %cl
2132	jnz	L(Byte4)
2133
2134	test	$0x20, %cl
2135	jnz	L(Byte5)
2136
2137	test	$0x40, %cl
2138	jnz	L(Byte6)
2139#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2140	cmp	$7, REM
2141	jbe	L(eq)
2142#endif
2143
2144	movzx	7(%eax), %ecx
2145	movzx	7(%edx), %eax
2146#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2147# ifdef PIC
2148	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2149	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2150# else
2151	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2152	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2153# endif
2154#endif
2155
2156	sub	%ecx, %eax
2157	RETURN
2158
2159L(Byte0):
2160#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2161	cmp	$0, REM
2162	jbe	L(eq)
2163#endif
2164	movzx	(%eax), %ecx
2165	movzx	(%edx), %eax
2166
2167#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2168# ifdef PIC
2169	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2170	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2171# else
2172	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2173	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2174# endif
2175#endif
2176
2177	sub	%ecx, %eax
2178	RETURN
2179
2180L(Byte1):
2181#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2182	cmp	$1, REM
2183	jbe	L(eq)
2184#endif
2185	movzx	1(%eax), %ecx
2186	movzx	1(%edx), %eax
2187
2188#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2189# ifdef PIC
2190	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2191	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2192# else
2193	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2194	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2195# endif
2196#endif
2197
2198	sub	%ecx, %eax
2199	RETURN
2200
2201L(Byte2):
2202#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2203	cmp	$2, REM
2204	jbe	L(eq)
2205#endif
2206	movzx	2(%eax), %ecx
2207	movzx	2(%edx), %eax
2208
2209#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2210# ifdef PIC
2211	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2212	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2213# else
2214	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2215	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2216# endif
2217#endif
2218
2219	sub	%ecx, %eax
2220	RETURN
2221
2222L(Byte3):
2223#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2224	cmp	$3, REM
2225	jbe	L(eq)
2226#endif
2227	movzx	3(%eax), %ecx
2228	movzx	3(%edx), %eax
2229
2230#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2231# ifdef PIC
2232	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2233	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2234# else
2235	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2236	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2237# endif
2238#endif
2239
2240	sub	%ecx, %eax
2241	RETURN
2242
2243L(Byte4):
2244#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2245	cmp	$4, REM
2246	jbe	L(eq)
2247#endif
2248	movzx	4(%eax), %ecx
2249	movzx	4(%edx), %eax
2250
2251#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2252# ifdef PIC
2253	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2254	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2255# else
2256	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2257	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2258# endif
2259#endif
2260
2261	sub	%ecx, %eax
2262	RETURN
2263
2264L(Byte5):
2265#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2266	cmp	$5, REM
2267	jbe	L(eq)
2268#endif
2269	movzx	5(%eax), %ecx
2270	movzx	5(%edx), %eax
2271
2272#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2273# ifdef PIC
2274	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2275	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2276# else
2277	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2278	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2279# endif
2280#endif
2281
2282	sub	%ecx, %eax
2283	RETURN
2284
2285L(Byte6):
2286#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2287	cmp	$6, REM
2288	jbe	L(eq)
2289#endif
2290	movzx	6(%eax), %ecx
2291	movzx	6(%edx), %eax
2292
2293#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2294# ifdef PIC
2295	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2296	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2297# else
2298	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2299	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2300# endif
2301#endif
2302
2303	sub	%ecx, %eax
2304	RETURN
2305
2306L(2next_8_bytes):
2307	add	$8, %eax
2308	add	$8, %edx
2309#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2310	cmp	$8, REM
2311	lea	-8(REM), REM
2312	jbe	L(eq)
2313#endif
2314
2315	test	$0x01, %ch
2316	jnz	L(Byte0)
2317
2318	test	$0x02, %ch
2319	jnz	L(Byte1)
2320
2321	test	$0x04, %ch
2322	jnz	L(Byte2)
2323
2324	test	$0x08, %ch
2325	jnz	L(Byte3)
2326
2327	test	$0x10, %ch
2328	jnz	L(Byte4)
2329
2330	test	$0x20, %ch
2331	jnz	L(Byte5)
2332
2333	test	$0x40, %ch
2334	jnz	L(Byte6)
2335
2336#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2337	cmp	$7, REM
2338	jbe	L(eq)
2339#endif
2340	movzx	7(%eax), %ecx
2341	movzx	7(%edx), %eax
2342
2343#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2344# ifdef PIC
2345	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2346	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%eax,4), %eax
2347# else
2348	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2349	movl	_nl_C_LC_CTYPE_tolower+128*4(,%eax,4), %eax
2350# endif
2351#endif
2352
2353	sub	%ecx, %eax
2354	RETURN
2355
2356#ifdef USE_AS_STRNCMP
2357L(neq_sncmp):
2358#endif
2359L(neq):
2360	mov	$1, %eax
2361	ja	L(neq_bigger)
2362	neg	%eax
2363L(neq_bigger):
2364#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2365	addl	$4, %esp
2366	cfi_adjust_cfa_offset (-4)
2367#endif
2368#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2369	POP	(REM)
2370#endif
2371#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2372# ifdef PIC
2373	POP	(%ebx)
2374# endif
2375#endif
2376	ret
2377
2378#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2379	.p2align 4
2380	cfi_restore_state
2381L(more8byteseq):
2382
2383# ifdef USE_AS_STRNCASECMP_L
2384	addl	$4, %esp
2385	cfi_adjust_cfa_offset (-4)
2386# endif
2387	POP	(%esi)
2388	POP	(%edi)
2389# ifdef USE_AS_STRNCMP
2390	POP	(FLAGS)
2391# endif
2392#endif
2393
2394#ifdef USE_AS_STRNCMP
2395L(eq_sncmp):
2396#endif
2397L(eq):
2398
2399#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2400	POP	(REM)
2401#endif
2402#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2403# ifdef PIC
2404	POP	(%ebx)
2405# endif
2406#endif
2407	xorl	%eax, %eax
2408	ret
2409
2410#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2411	.p2align 4
2412# if defined USE_AS_STRNCASECMP_L && defined PIC
2413	CFI_PUSH (%ebx)
2414# endif
2415	CFI_PUSH (REM)
2416L(less16bytes_sncmp):
2417# ifdef USE_AS_STRNCASECMP_L
2418	PUSH	(%esi)
2419# endif
2420	test	REM, REM
2421	jz	L(eq_sncmp)
2422
2423	movzbl	(%eax), %ecx
2424# ifdef USE_AS_STRNCASECMP_L
2425	movzbl	(%edx), %esi
2426#  ifdef PIC
2427	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2428	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2429#  else
2430	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2431	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2432#  endif
2433	cmpl	%ecx, %esi
2434# else
2435	cmpb	%cl, (%edx)
2436# endif
2437	jne	L(neq_sncmp)
2438	test	%cl, %cl
2439	je	L(eq_sncmp)
2440
2441	cmp	$1, REM
2442	je	L(eq_sncmp)
2443
2444	movzbl	1(%eax), %ecx
2445# ifdef USE_AS_STRNCASECMP_L
2446	movzbl	1(%edx), %esi
2447#  ifdef PIC
2448	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2449	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2450#  else
2451	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2452	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2453#  endif
2454	cmpl	%ecx, %esi
2455# else
2456	cmpb	%cl, 1(%edx)
2457# endif
2458	jne	L(neq_sncmp)
2459	test	%cl, %cl
2460	je	L(eq_sncmp)
2461
2462	cmp	$2, REM
2463	je	L(eq_sncmp)
2464
2465	movzbl	2(%eax), %ecx
2466# ifdef USE_AS_STRNCASECMP_L
2467	movzbl	2(%edx), %esi
2468#  ifdef PIC
2469	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2470	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2471#  else
2472	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2473	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2474#  endif
2475	cmpl	%ecx, %esi
2476# else
2477	cmpb	%cl, 2(%edx)
2478# endif
2479	jne	L(neq_sncmp)
2480	test	%cl, %cl
2481	je	L(eq_sncmp)
2482
2483	cmp	$3, REM
2484	je	L(eq_sncmp)
2485
2486	movzbl	3(%eax), %ecx
2487# ifdef USE_AS_STRNCASECMP_L
2488	movzbl	3(%edx), %esi
2489#  ifdef PIC
2490	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2491	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2492#  else
2493	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2494	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2495#  endif
2496	cmpl	%ecx, %esi
2497# else
2498	cmpb	%cl, 3(%edx)
2499# endif
2500	jne	L(neq_sncmp)
2501	test	%cl, %cl
2502	je	L(eq_sncmp)
2503
2504	cmp	$4, REM
2505	je	L(eq_sncmp)
2506
2507	movzbl	4(%eax), %ecx
2508# ifdef USE_AS_STRNCASECMP_L
2509	movzbl	4(%edx), %esi
2510#  ifdef PIC
2511	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2512	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2513#  else
2514	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2515	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2516#  endif
2517	cmpl	%ecx, %esi
2518# else
2519	cmpb	%cl, 4(%edx)
2520# endif
2521	jne	L(neq_sncmp)
2522	test	%cl, %cl
2523	je	L(eq_sncmp)
2524
2525	cmp	$5, REM
2526	je	L(eq_sncmp)
2527
2528	movzbl	5(%eax), %ecx
2529# ifdef USE_AS_STRNCASECMP_L
2530	movzbl	5(%edx), %esi
2531#  ifdef PIC
2532	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2533	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2534#  else
2535	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2536	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2537#  endif
2538	cmpl	%ecx, %esi
2539# else
2540	cmpb	%cl, 5(%edx)
2541# endif
2542	jne	L(neq_sncmp)
2543	test	%cl, %cl
2544	je	L(eq_sncmp)
2545
2546	cmp	$6, REM
2547	je	L(eq_sncmp)
2548
2549	movzbl	6(%eax), %ecx
2550# ifdef USE_AS_STRNCASECMP_L
2551	movzbl	6(%edx), %esi
2552#  ifdef PIC
2553	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2554	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2555#  else
2556	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2557	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2558#  endif
2559	cmpl	%ecx, %esi
2560# else
2561	cmpb	%cl, 6(%edx)
2562# endif
2563	jne	L(neq_sncmp)
2564	test	%cl, %cl
2565	je	L(eq_sncmp)
2566
2567	cmp	$7, REM
2568	je	L(eq_sncmp)
2569
2570	movzbl	7(%eax), %ecx
2571# ifdef USE_AS_STRNCASECMP_L
2572	movzbl	7(%edx), %esi
2573#  ifdef PIC
2574	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2575	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2576#  else
2577	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2578	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2579#  endif
2580	cmpl	%ecx, %esi
2581# else
2582	cmpb	%cl, 7(%edx)
2583# endif
2584	jne	L(neq_sncmp)
2585	test	%cl, %cl
2586	je	L(eq_sncmp)
2587
2588
2589	cmp	$8, REM
2590	je	L(eq_sncmp)
2591
2592	movzbl	8(%eax), %ecx
2593# ifdef USE_AS_STRNCASECMP_L
2594	movzbl	8(%edx), %esi
2595#  ifdef PIC
2596	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2597	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2598#  else
2599	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2600	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2601#  endif
2602	cmpl	%ecx, %esi
2603# else
2604	cmpb	%cl, 8(%edx)
2605# endif
2606	jne	L(neq_sncmp)
2607	test	%cl, %cl
2608	je	L(eq_sncmp)
2609
2610	cmp	$9, REM
2611	je	L(eq_sncmp)
2612
2613	movzbl	9(%eax), %ecx
2614# ifdef USE_AS_STRNCASECMP_L
2615	movzbl	9(%edx), %esi
2616#  ifdef PIC
2617	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2618	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2619#  else
2620	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2621	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2622#  endif
2623	cmpl	%ecx, %esi
2624# else
2625	cmpb	%cl, 9(%edx)
2626# endif
2627	jne	L(neq_sncmp)
2628	test	%cl, %cl
2629	je	L(eq_sncmp)
2630
2631	cmp	$10, REM
2632	je	L(eq_sncmp)
2633
2634	movzbl	10(%eax), %ecx
2635# ifdef USE_AS_STRNCASECMP_L
2636	movzbl	10(%edx), %esi
2637#  ifdef PIC
2638	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2639	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2640#  else
2641	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2642	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2643#  endif
2644	cmpl	%ecx, %esi
2645# else
2646	cmpb	%cl, 10(%edx)
2647# endif
2648	jne	L(neq_sncmp)
2649	test	%cl, %cl
2650	je	L(eq_sncmp)
2651
2652	cmp	$11, REM
2653	je	L(eq_sncmp)
2654
2655	movzbl	11(%eax), %ecx
2656# ifdef USE_AS_STRNCASECMP_L
2657	movzbl	11(%edx), %esi
2658#  ifdef PIC
2659	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2660	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2661#  else
2662	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2663	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2664#  endif
2665	cmpl	%ecx, %esi
2666# else
2667	cmpb	%cl, 11(%edx)
2668# endif
2669	jne	L(neq_sncmp)
2670	test	%cl, %cl
2671	je	L(eq_sncmp)
2672
2673
2674	cmp	$12, REM
2675	je	L(eq_sncmp)
2676
2677	movzbl	12(%eax), %ecx
2678# ifdef USE_AS_STRNCASECMP_L
2679	movzbl	12(%edx), %esi
2680#  ifdef PIC
2681	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2682	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2683#  else
2684	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2685	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2686#  endif
2687	cmpl	%ecx, %esi
2688# else
2689	cmpb	%cl, 12(%edx)
2690# endif
2691	jne	L(neq_sncmp)
2692	test	%cl, %cl
2693	je	L(eq_sncmp)
2694
2695	cmp	$13, REM
2696	je	L(eq_sncmp)
2697
2698	movzbl	13(%eax), %ecx
2699# ifdef USE_AS_STRNCASECMP_L
2700	movzbl	13(%edx), %esi
2701#  ifdef PIC
2702	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2703	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2704#  else
2705	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2706	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2707#  endif
2708	cmpl	%ecx, %esi
2709# else
2710	cmpb	%cl, 13(%edx)
2711# endif
2712	jne	L(neq_sncmp)
2713	test	%cl, %cl
2714	je	L(eq_sncmp)
2715
2716	cmp	$14, REM
2717	je	L(eq_sncmp)
2718
2719	movzbl	14(%eax), %ecx
2720# ifdef USE_AS_STRNCASECMP_L
2721	movzbl	14(%edx), %esi
2722#  ifdef PIC
2723	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2724	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2725#  else
2726	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2727	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2728#  endif
2729	cmpl	%ecx, %esi
2730# else
2731	cmpb	%cl, 14(%edx)
2732# endif
2733	jne	L(neq_sncmp)
2734	test	%cl, %cl
2735	je	L(eq_sncmp)
2736
2737	cmp	$15, REM
2738	je	L(eq_sncmp)
2739
2740	movzbl	15(%eax), %ecx
2741# ifdef USE_AS_STRNCASECMP_L
2742	movzbl	15(%edx), %esi
2743#  ifdef PIC
2744	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%ecx,4), %ecx
2745	movl	_nl_C_LC_CTYPE_tolower@GOTOFF+128*4(%ebx,%esi,4), %esi
2746#  else
2747	movl	_nl_C_LC_CTYPE_tolower+128*4(,%ecx,4), %ecx
2748	movl	_nl_C_LC_CTYPE_tolower+128*4(,%esi,4), %esi
2749#  endif
2750	cmpl	%ecx, %esi
2751# else
2752	cmpb	%cl, 15(%edx)
2753# endif
2754	jne	L(neq_sncmp)
2755
2756# ifdef USE_AS_STRNCASECMP_L
2757L(eq_sncmp):
2758	POP	(%esi)
2759# endif
2760	POP	(REM)
2761# if defined USE_AS_STRNCASECMP_L && defined PIC
2762	POP	(%ebx)
2763# endif
2764	xor	%eax, %eax
2765	ret
2766
2767# ifdef USE_AS_STRNCASECMP_L
2768	.p2align 4
2769#  ifdef PIC
2770	CFI_PUSH (%ebx)
2771#  endif
2772	CFI_PUSH (REM)
2773	CFI_PUSH (%esi)
2774L(neq_sncmp):
2775	mov	$1, %eax
2776	mov	$-1, %edx
2777	cmovna	%edx, %eax
2778	POP	(%esi)
2779	POP	(REM)
2780#  ifdef PIC
2781	POP	(%ebx)
2782#  endif
2783	ret
2784# endif
2785#endif
2786
2787END (STRCMP)
2788
2789#endif
2790