1 #ifndef _I386_STRING_I486_H_
2 #define _I386_STRING_I486_H_
3
4 /*
5 * This string-include defines all string functions as inline
6 * functions. Use gcc. It also assumes ds=es=data space, this should be
7 * normal. Most of the string-functions are rather heavily hand-optimized,
8 * see especially strtok,strstr,str[c]spn. They should work, but are not
9 * very easy to understand. Everything is done entirely within the register
10 * set, making the functions fast and clean.
11 *
12 * Copyright (C) 1991, 1992 Linus Torvalds
13 * Revised and optimized for i486/pentium
14 * 1994/03/15 by Alberto Vignani/Davide Parodi @crf.it
15 *
16 * Split into 2 CPU specific files by Alan Cox to keep #ifdef noise down.
17 *
18 * 1999/10/5 Proper register args for newer GCCs and minor bugs
19 * fixed - Petko Manolov (petkan@spct.net)
20 * 1999/10/14 3DNow memscpy() added - Petkan
21 * 2000/05/09 extern changed to static in function definitions
22 * and a few cleanups - Petkan
23 */
24
25 #define __HAVE_ARCH_STRCPY
strcpy(char * dest,const char * src)26 static inline char * strcpy(char * dest,const char *src)
27 {
28 register char *tmp= (char *)dest;
29 register char dummy;
30 __asm__ __volatile__(
31 "\n1:\t"
32 "movb (%0),%2\n\t"
33 "incl %0\n\t"
34 "movb %2,(%1)\n\t"
35 "incl %1\n\t"
36 "testb %2,%2\n\t"
37 "jne 1b"
38 :"=r" (src), "=r" (tmp), "=q" (dummy)
39 :"0" (src), "1" (tmp)
40 :"memory");
41 return dest;
42 }
43
44 #define __HAVE_ARCH_STRNCPY
strncpy(char * dest,const char * src,size_t count)45 static inline char * strncpy(char * dest,const char *src,size_t count)
46 {
47 register char *tmp= (char *)dest;
48 register char dummy;
49 if (count) {
50 __asm__ __volatile__(
51 "\n1:\t"
52 "movb (%0),%2\n\t"
53 "incl %0\n\t"
54 "movb %2,(%1)\n\t"
55 "incl %1\n\t"
56 "decl %3\n\t"
57 "je 3f\n\t"
58 "testb %2,%2\n\t"
59 "jne 1b\n\t"
60 "2:\tmovb %2,(%1)\n\t"
61 "incl %1\n\t"
62 "decl %3\n\t"
63 "jne 2b\n\t"
64 "3:"
65 :"=r" (src), "=r" (tmp), "=q" (dummy), "=r" (count)
66 :"0" (src), "1" (tmp), "3" (count)
67 :"memory");
68 } /* if (count) */
69 return dest;
70 }
71
72 #define __HAVE_ARCH_STRCAT
strcat(char * dest,const char * src)73 static inline char * strcat(char * dest,const char * src)
74 {
75 register char *tmp = (char *)(dest-1);
76 register char dummy;
77 __asm__ __volatile__(
78 "\n1:\tincl %1\n\t"
79 "cmpb $0,(%1)\n\t"
80 "jne 1b\n"
81 "2:\tmovb (%2),%b0\n\t"
82 "incl %2\n\t"
83 "movb %b0,(%1)\n\t"
84 "incl %1\n\t"
85 "testb %b0,%b0\n\t"
86 "jne 2b\n"
87 :"=q" (dummy), "=r" (tmp), "=r" (src)
88 :"1" (tmp), "2" (src)
89 :"memory");
90 return dest;
91 }
92
93 #define __HAVE_ARCH_STRNCAT
strncat(char * dest,const char * src,size_t count)94 static inline char * strncat(char * dest,const char * src,size_t count)
95 {
96 register char *tmp = (char *)(dest-1);
97 register char dummy;
98 __asm__ __volatile__(
99 "\n1:\tincl %1\n\t"
100 "cmpb $0,(%1)\n\t"
101 "jne 1b\n"
102 "2:\tdecl %3\n\t"
103 "js 3f\n\t"
104 "movb (%2),%b0\n\t"
105 "incl %2\n\t"
106 "movb %b0,(%1)\n\t"
107 "incl %1\n\t"
108 "testb %b0,%b0\n\t"
109 "jne 2b\n"
110 "3:\txorb %0,%0\n\t"
111 "movb %b0,(%1)\n\t"
112 :"=q" (dummy), "=r" (tmp), "=r" (src), "=r" (count)
113 :"1" (tmp), "2" (src), "3" (count)
114 :"memory");
115 return dest;
116 }
117
118 #define __HAVE_ARCH_STRCMP
strcmp(const char * cs,const char * ct)119 static inline int strcmp(const char * cs,const char * ct)
120 {
121 register int __res;
122 __asm__ __volatile__(
123 "\n1:\tmovb (%1),%b0\n\t"
124 "incl %1\n\t"
125 "cmpb %b0,(%2)\n\t"
126 "jne 2f\n\t"
127 "incl %2\n\t"
128 "testb %b0,%b0\n\t"
129 "jne 1b\n\t"
130 "xorl %0,%0\n\t"
131 "jmp 3f\n"
132 "2:\tmovl $1,%0\n\t"
133 "jb 3f\n\t"
134 "negl %0\n"
135 "3:"
136 :"=q" (__res), "=r" (cs), "=r" (ct)
137 :"1" (cs), "2" (ct)
138 : "memory" );
139 return __res;
140 }
141
142 #define __HAVE_ARCH_STRNCMP
strncmp(const char * cs,const char * ct,size_t count)143 static inline int strncmp(const char * cs,const char * ct,size_t count)
144 {
145 register int __res;
146 __asm__ __volatile__(
147 "\n1:\tdecl %3\n\t"
148 "js 2f\n\t"
149 "movb (%1),%b0\n\t"
150 "incl %1\n\t"
151 "cmpb %b0,(%2)\n\t"
152 "jne 3f\n\t"
153 "incl %2\n\t"
154 "testb %b0,%b0\n\t"
155 "jne 1b\n"
156 "2:\txorl %0,%0\n\t"
157 "jmp 4f\n"
158 "3:\tmovl $1,%0\n\t"
159 "jb 4f\n\t"
160 "negl %0\n"
161 "4:"
162 :"=q" (__res), "=r" (cs), "=r" (ct), "=r" (count)
163 :"1" (cs), "2" (ct), "3" (count));
164 return __res;
165 }
166
167 #define __HAVE_ARCH_STRCHR
strchr(const char * s,int c)168 static inline char * strchr(const char * s, int c)
169 {
170 register char * __res;
171 __asm__ __volatile__(
172 "movb %%al,%%ah\n"
173 "1:\tmovb (%1),%%al\n\t"
174 "cmpb %%ah,%%al\n\t"
175 "je 2f\n\t"
176 "incl %1\n\t"
177 "testb %%al,%%al\n\t"
178 "jne 1b\n\t"
179 "xorl %1,%1\n"
180 "2:\tmovl %1,%0\n\t"
181 :"=a" (__res), "=r" (s)
182 :"0" (c), "1" (s));
183 return __res;
184 }
185
186 #define __HAVE_ARCH_STRRCHR
strrchr(const char * s,int c)187 static inline char * strrchr(const char * s, int c)
188 {
189 int d0, d1;
190 register char * __res;
191 __asm__ __volatile__(
192 "movb %%al,%%ah\n"
193 "1:\tlodsb\n\t"
194 "cmpb %%ah,%%al\n\t"
195 "jne 2f\n\t"
196 "leal -1(%%esi),%0\n"
197 "2:\ttestb %%al,%%al\n\t"
198 "jne 1b"
199 :"=d" (__res), "=&S" (d0), "=&a" (d1)
200 :"0" (0), "1" (s), "2" (c));
201 return __res;
202 }
203
204
205 #define __HAVE_ARCH_STRCSPN
strcspn(const char * cs,const char * ct)206 static inline size_t strcspn(const char * cs, const char * ct)
207 {
208 int d0, d1;
209 register char * __res;
210 __asm__ __volatile__(
211 "movl %6,%%edi\n\t"
212 "repne\n\t"
213 "scasb\n\t"
214 "notl %%ecx\n\t"
215 "decl %%ecx\n\t"
216 "movl %%ecx,%%edx\n"
217 "1:\tlodsb\n\t"
218 "testb %%al,%%al\n\t"
219 "je 2f\n\t"
220 "movl %6,%%edi\n\t"
221 "movl %%edx,%%ecx\n\t"
222 "repne\n\t"
223 "scasb\n\t"
224 "jne 1b\n"
225 "2:\tdecl %0"
226 :"=S" (__res), "=&a" (d0), "=&c" (d1)
227 :"0" (cs), "1" (0), "2" (0xffffffff), "g" (ct)
228 :"dx", "di");
229 return __res-cs;
230 }
231
232
233 #define __HAVE_ARCH_STRLEN
strlen(const char * s)234 static inline size_t strlen(const char * s)
235 {
236 /*
237 * slightly slower on a 486, but with better chances of
238 * register allocation
239 */
240 register char dummy, *tmp= (char *)s;
241 __asm__ __volatile__(
242 "\n1:\t"
243 "movb\t(%0),%1\n\t"
244 "incl\t%0\n\t"
245 "testb\t%1,%1\n\t"
246 "jne\t1b"
247 :"=r" (tmp),"=q" (dummy)
248 :"0" (s)
249 : "memory" );
250 return (tmp-s-1);
251 }
252
253 /* Added by Gertjan van Wingerde to make minix and sysv module work */
254 #define __HAVE_ARCH_STRNLEN
strnlen(const char * s,size_t count)255 static inline size_t strnlen(const char * s, size_t count)
256 {
257 int d0;
258 register int __res;
259 __asm__ __volatile__(
260 "movl %3,%0\n\t"
261 "jmp 2f\n"
262 "1:\tcmpb $0,(%0)\n\t"
263 "je 3f\n\t"
264 "incl %0\n"
265 "2:\tdecl %2\n\t"
266 "cmpl $-1,%2\n\t"
267 "jne 1b\n"
268 "3:\tsubl %3,%0"
269 :"=a" (__res), "=&d" (d0)
270 :"1" (count), "c" (s));
271 return __res;
272 }
273 /* end of additional stuff */
274
275
276 /*
277 * These ought to get tweaked to do some cache priming.
278 */
279
__memcpy_by4(void * to,const void * from,size_t n)280 static inline void * __memcpy_by4(void * to, const void * from, size_t n)
281 {
282 register void *tmp = (void *)to;
283 register int dummy1,dummy2;
284 __asm__ __volatile__ (
285 "\n1:\tmovl (%2),%0\n\t"
286 "addl $4,%2\n\t"
287 "movl %0,(%1)\n\t"
288 "addl $4,%1\n\t"
289 "decl %3\n\t"
290 "jnz 1b"
291 :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2)
292 :"1" (tmp), "2" (from), "3" (n/4)
293 :"memory");
294 return (to);
295 }
296
__memcpy_by2(void * to,const void * from,size_t n)297 static inline void * __memcpy_by2(void * to, const void * from, size_t n)
298 {
299 register void *tmp = (void *)to;
300 register int dummy1,dummy2;
301 __asm__ __volatile__ (
302 "shrl $1,%3\n\t"
303 "jz 2f\n" /* only a word */
304 "1:\tmovl (%2),%0\n\t"
305 "addl $4,%2\n\t"
306 "movl %0,(%1)\n\t"
307 "addl $4,%1\n\t"
308 "decl %3\n\t"
309 "jnz 1b\n"
310 "2:\tmovw (%2),%w0\n\t"
311 "movw %w0,(%1)"
312 :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2)
313 :"1" (tmp), "2" (from), "3" (n/2)
314 :"memory");
315 return (to);
316 }
317
__memcpy_g(void * to,const void * from,size_t n)318 static inline void * __memcpy_g(void * to, const void * from, size_t n)
319 {
320 int d0, d1, d2;
321 register void *tmp = (void *)to;
322 __asm__ __volatile__ (
323 "shrl $1,%%ecx\n\t"
324 "jnc 1f\n\t"
325 "movsb\n"
326 "1:\tshrl $1,%%ecx\n\t"
327 "jnc 2f\n\t"
328 "movsw\n"
329 "2:\trep\n\t"
330 "movsl"
331 :"=&c" (d0), "=&D" (d1), "=&S" (d2)
332 :"0" (n), "1" ((long) tmp), "2" ((long) from)
333 :"memory");
334 return (to);
335 }
336
337 #define __memcpy_c(d,s,count) \
338 ((count%4==0) ? \
339 __memcpy_by4((d),(s),(count)) : \
340 ((count%2==0) ? \
341 __memcpy_by2((d),(s),(count)) : \
342 __memcpy_g((d),(s),(count))))
343
344 #define __memcpy(d,s,count) \
345 (__builtin_constant_p(count) ? \
346 __memcpy_c((d),(s),(count)) : \
347 __memcpy_g((d),(s),(count)))
348
349 #define __HAVE_ARCH_MEMCPY
350
351 #include <linux/config.h>
352
353 #ifdef CONFIG_X86_USE_3DNOW
354
355 #include <asm/mmx.h>
356
357 /*
358 ** This CPU favours 3DNow strongly (eg AMD K6-II, K6-III, Athlon)
359 */
360
__constant_memcpy3d(void * to,const void * from,size_t len)361 static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
362 {
363 if (len < 512)
364 return __memcpy_c(to, from, len);
365 return _mmx_memcpy(to, from, len);
366 }
367
__memcpy3d(void * to,const void * from,size_t len)368 static inline void *__memcpy3d(void *to, const void *from, size_t len)
369 {
370 if(len < 512)
371 return __memcpy_g(to, from, len);
372 return _mmx_memcpy(to, from, len);
373 }
374
375 #define memcpy(d, s, count) \
376 (__builtin_constant_p(count) ? \
377 __constant_memcpy3d((d),(s),(count)) : \
378 __memcpy3d((d),(s),(count)))
379
380 #else /* CONFIG_X86_USE_3DNOW */
381
382 /*
383 ** Generic routines
384 */
385
386
387 #define memcpy(d, s, count) __memcpy(d, s, count)
388
389 #endif /* CONFIG_X86_USE_3DNOW */
390
391
392 extern void __struct_cpy_bug( void );
393
394 #define struct_cpy(x,y) \
395 ({ \
396 if (sizeof(*(x)) != sizeof(*(y))) \
397 __struct_cpy_bug; \
398 memcpy(x, y, sizeof(*(x))); \
399 })
400
401
402 #define __HAVE_ARCH_MEMMOVE
memmove(void * dest,const void * src,size_t n)403 static inline void * memmove(void * dest,const void * src, size_t n)
404 {
405 int d0, d1, d2;
406 register void *tmp = (void *)dest;
407 if (dest<src)
408 __asm__ __volatile__ (
409 "rep\n\t"
410 "movsb"
411 :"=&c" (d0), "=&S" (d1), "=&D" (d2)
412 :"0" (n), "1" (src), "2" (tmp)
413 :"memory");
414 else
415 __asm__ __volatile__ (
416 "std\n\t"
417 "rep\n\t"
418 "movsb\n\t"
419 "cld"
420 :"=&c" (d0), "=&S" (d1), "=&D" (d2)
421 :"0" (n), "1" (n-1+(const char *)src), "2" (n-1+(char *)tmp)
422 :"memory");
423 return dest;
424 }
425
426
427 #define __HAVE_ARCH_MEMCMP
memcmp(const void * cs,const void * ct,size_t count)428 static inline int memcmp(const void * cs,const void * ct,size_t count)
429 {
430 int d0, d1, d2;
431 register int __res;
432 __asm__ __volatile__(
433 "repe\n\t"
434 "cmpsb\n\t"
435 "je 1f\n\t"
436 "sbbl %0,%0\n\t"
437 "orb $1,%b0\n"
438 "1:"
439 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
440 :"0" (0), "1" (cs), "2" (ct), "3" (count));
441 return __res;
442 }
443
444
445 #define __HAVE_ARCH_MEMCHR
memchr(const void * cs,int c,size_t count)446 static inline void * memchr(const void * cs,int c,size_t count)
447 {
448 int d0;
449 register void * __res;
450 if (!count)
451 return NULL;
452 __asm__ __volatile__(
453 "repne\n\t"
454 "scasb\n\t"
455 "je 1f\n\t"
456 "movl $1,%0\n"
457 "1:\tdecl %0"
458 :"=D" (__res), "=&c" (d0)
459 :"a" (c), "0" (cs), "1" (count));
460 return __res;
461 }
462
463 #define __memset_cc(s,c,count) \
464 ((count%4==0) ? \
465 __memset_cc_by4((s),(c),(count)) : \
466 ((count%2==0) ? \
467 __memset_cc_by2((s),(c),(count)) : \
468 __memset_cg((s),(c),(count))))
469
470 #define __memset_gc(s,c,count) \
471 ((count%4==0) ? \
472 __memset_gc_by4((s),(c),(count)) : \
473 ((count%2==0) ? \
474 __memset_gc_by2((s),(c),(count)) : \
475 __memset_gg((s),(c),(count))))
476
477 #define __HAVE_ARCH_MEMSET
478 #define memset(s,c,count) \
479 (__builtin_constant_p(c) ? \
480 (__builtin_constant_p(count) ? \
481 __memset_cc((s),(c),(count)) : \
482 __memset_cg((s),(c),(count))) : \
483 (__builtin_constant_p(count) ? \
484 __memset_gc((s),(c),(count)) : \
485 __memset_gg((s),(c),(count))))
486
__memset_cc_by4(void * s,char c,size_t count)487 static inline void * __memset_cc_by4(void * s, char c, size_t count)
488 {
489 /*
490 * register char *tmp = s;
491 */
492 register char *tmp = (char *)s;
493 register int dummy;
494 __asm__ __volatile__ (
495 "\n1:\tmovl %2,(%0)\n\t"
496 "addl $4,%0\n\t"
497 "decl %1\n\t"
498 "jnz 1b"
499 :"=r" (tmp), "=r" (dummy)
500 :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/4)
501 :"memory");
502 return s;
503 }
504
__memset_cc_by2(void * s,char c,size_t count)505 static inline void * __memset_cc_by2(void * s, char c, size_t count)
506 {
507 register void *tmp = (void *)s;
508 register int dummy;
509 __asm__ __volatile__ (
510 "shrl $1,%1\n\t" /* may be divisible also by 4 */
511 "jz 2f\n"
512 "\n1:\tmovl %2,(%0)\n\t"
513 "addl $4,%0\n\t"
514 "decl %1\n\t"
515 "jnz 1b\n"
516 "2:\tmovw %w2,(%0)"
517 :"=r" (tmp), "=r" (dummy)
518 :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/2)
519 :"memory");
520 return s;
521 }
522
__memset_gc_by4(void * s,char c,size_t count)523 static inline void * __memset_gc_by4(void * s, char c, size_t count)
524 {
525 register void *tmp = (void *)s;
526 register int dummy;
527 __asm__ __volatile__ (
528 "movb %b0,%h0\n"
529 "pushw %w0\n\t"
530 "shll $16,%0\n\t"
531 "popw %w0\n"
532 "1:\tmovl %0,(%1)\n\t"
533 "addl $4,%1\n\t"
534 "decl %2\n\t"
535 "jnz 1b\n"
536 :"=q" (c), "=r" (tmp), "=r" (dummy)
537 :"0" ((unsigned) c), "1" (tmp), "2" (count/4)
538 :"memory");
539 return s;
540 }
541
__memset_gc_by2(void * s,char c,size_t count)542 static inline void * __memset_gc_by2(void * s, char c, size_t count)
543 {
544 register void *tmp = (void *)s;
545 register int dummy1,dummy2;
546 __asm__ __volatile__ (
547 "movb %b0,%h0\n\t"
548 "shrl $1,%2\n\t" /* may be divisible also by 4 */
549 "jz 2f\n\t"
550 "pushw %w0\n\t"
551 "shll $16,%0\n\t"
552 "popw %w0\n"
553 "1:\tmovl %0,(%1)\n\t"
554 "addl $4,%1\n\t"
555 "decl %2\n\t"
556 "jnz 1b\n"
557 "2:\tmovw %w0,(%1)"
558 :"=q" (dummy1), "=r" (tmp), "=r" (dummy2)
559 :"0" ((unsigned) c), "1" (tmp), "2" (count/2)
560 :"memory");
561 return s;
562 }
563
__memset_cg(void * s,char c,size_t count)564 static inline void * __memset_cg(void * s, char c, size_t count)
565 {
566 int d0, d1;
567 register void *tmp = (void *)s;
568 __asm__ __volatile__ (
569 "shrl $1,%%ecx\n\t"
570 "rep\n\t"
571 "stosw\n\t"
572 "jnc 1f\n\t"
573 "movb %%al,(%%edi)\n"
574 "1:"
575 :"=&c" (d0), "=&D" (d1)
576 :"a" (0x0101U * (unsigned char) c), "0" (count), "1" (tmp)
577 :"memory");
578 return s;
579 }
580
__memset_gg(void * s,char c,size_t count)581 static inline void * __memset_gg(void * s,char c,size_t count)
582 {
583 int d0, d1, d2;
584 register void *tmp = (void *)s;
585 __asm__ __volatile__ (
586 "movb %%al,%%ah\n\t"
587 "shrl $1,%%ecx\n\t"
588 "rep\n\t"
589 "stosw\n\t"
590 "jnc 1f\n\t"
591 "movb %%al,(%%edi)\n"
592 "1:"
593 :"=&c" (d0), "=&D" (d1), "=&D" (d2)
594 :"0" (count), "1" (tmp), "2" (c)
595 :"memory");
596 return s;
597 }
598
599
600 /*
601 * find the first occurrence of byte 'c', or 1 past the area if none
602 */
603 #define __HAVE_ARCH_MEMSCAN
memscan(void * addr,int c,size_t size)604 static inline void * memscan(void * addr, int c, size_t size)
605 {
606 if (!size)
607 return addr;
608 __asm__("repnz; scasb
609 jnz 1f
610 dec %%edi
611 1: "
612 : "=D" (addr), "=c" (size)
613 : "0" (addr), "1" (size), "a" (c));
614 return addr;
615 }
616
617 #endif
618