1/*
2 * String handling functions for PowerPC.
3 *
4 * Copyright (C) 1996 Paul Mackerras.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11#include <linux/config.h>
12#include <asm/processor.h>
13#include <asm/cache.h>
14#include <asm/errno.h>
15#include <asm/ppc_asm.h>
16
17#define COPY_16_BYTES		\
18	lwz	r7,4(r4);	\
19	lwz	r8,8(r4);	\
20	lwz	r9,12(r4);	\
21	lwzu	r10,16(r4);	\
22	stw	r7,4(r6);	\
23	stw	r8,8(r6);	\
24	stw	r9,12(r6);	\
25	stwu	r10,16(r6)
26
27#define COPY_16_BYTES_WITHEX(n)	\
288 ## n ## 0:			\
29	lwz	r7,4(r4);	\
308 ## n ## 1:			\
31	lwz	r8,8(r4);	\
328 ## n ## 2:			\
33	lwz	r9,12(r4);	\
348 ## n ## 3:			\
35	lwzu	r10,16(r4);	\
368 ## n ## 4:			\
37	stw	r7,4(r6);	\
388 ## n ## 5:			\
39	stw	r8,8(r6);	\
408 ## n ## 6:			\
41	stw	r9,12(r6);	\
428 ## n ## 7:			\
43	stwu	r10,16(r6)
44
45#define COPY_16_BYTES_EXCODE(n)			\
469 ## n ## 0:					\
47	addi	r5,r5,-(16 * n);		\
48	b	104f;				\
499 ## n ## 1:					\
50	addi	r5,r5,-(16 * n);		\
51	b	105f;				\
52.section __ex_table,"a";			\
53	.align	2;				\
54	.long	8 ## n ## 0b,9 ## n ## 0b;	\
55	.long	8 ## n ## 1b,9 ## n ## 0b;	\
56	.long	8 ## n ## 2b,9 ## n ## 0b;	\
57	.long	8 ## n ## 3b,9 ## n ## 0b;	\
58	.long	8 ## n ## 4b,9 ## n ## 1b;	\
59	.long	8 ## n ## 5b,9 ## n ## 1b;	\
60	.long	8 ## n ## 6b,9 ## n ## 1b;	\
61	.long	8 ## n ## 7b,9 ## n ## 1b;	\
62	.text
63
64	.text
65	.stabs	"arch/ppc/lib/",N_SO,0,0,0f
66	.stabs	"string.S",N_SO,0,0,0f
67
68CACHELINE_BYTES = L1_CACHE_LINE_SIZE
69LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
70CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
71
72_GLOBAL(strcpy)
73	addi	r5,r3,-1
74	addi	r4,r4,-1
751:	lbzu	r0,1(r4)
76	cmpwi	0,r0,0
77	stbu	r0,1(r5)
78	bne	1b
79	blr
80
81/* This clears out any unused part of the destination buffer,
82   just as the libc version does.  -- paulus */
83_GLOBAL(strncpy)
84	cmpwi	0,r5,0
85	beqlr
86	mtctr	r5
87	addi	r6,r3,-1
88	addi	r4,r4,-1
891:	lbzu	r0,1(r4)
90	cmpwi	0,r0,0
91	stbu	r0,1(r6)
92	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
93	bnelr			/* if we didn't hit a null char, we're done */
94	mfctr	r5
95	cmpwi	0,r5,0		/* any space left in destination buffer? */
96	beqlr			/* we know r0 == 0 here */
972:	stbu	r0,1(r6)	/* clear it out if so */
98	bdnz	2b
99	blr
100
101_GLOBAL(strcat)
102	addi	r5,r3,-1
103	addi	r4,r4,-1
1041:	lbzu	r0,1(r5)
105	cmpwi	0,r0,0
106	bne	1b
107	addi	r5,r5,-1
1081:	lbzu	r0,1(r4)
109	cmpwi	0,r0,0
110	stbu	r0,1(r5)
111	bne	1b
112	blr
113
114_GLOBAL(strcmp)
115	addi	r5,r3,-1
116	addi	r4,r4,-1
1171:	lbzu	r3,1(r5)
118	cmpwi	1,r3,0
119	lbzu	r0,1(r4)
120	subf.	r3,r0,r3
121	beqlr	1
122	beq	1b
123	blr
124
125_GLOBAL(strlen)
126	addi	r4,r3,-1
1271:	lbzu	r0,1(r4)
128	cmpwi	0,r0,0
129	bne	1b
130	subf	r3,r3,r4
131	blr
132
133/*
134 * Use dcbz on the complete cache lines in the destination
135 * to set them to zero.  This requires that the destination
136 * area is cacheable.  -- paulus
137 */
138_GLOBAL(cacheable_memzero)
139	mr	r5,r4
140	li	r4,0
141	addi	r6,r3,-4
142	cmplwi	0,r5,4
143	blt	7f
144	stwu	r4,4(r6)
145	beqlr
146	andi.	r0,r6,3
147	add	r5,r0,r5
148	subf	r6,r0,r6
149	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
150	add	r8,r7,r5
151	srwi	r9,r8,LG_CACHELINE_BYTES
152	addic.	r9,r9,-1	/* total number of complete cachelines */
153	ble	2f
154	xori	r0,r7,CACHELINE_MASK & ~3
155	srwi.	r0,r0,2
156	beq	3f
157	mtctr	r0
1584:	stwu	r4,4(r6)
159	bdnz	4b
1603:	mtctr	r9
161	li	r7,4
162#if !defined(CONFIG_8xx)
16310:	dcbz	r7,r6
164#else
16510:	stw	r4, 4(r6)
166	stw	r4, 8(r6)
167	stw	r4, 12(r6)
168	stw	r4, 16(r6)
169#endif
170	addi	r6,r6,CACHELINE_BYTES
171	bdnz	10b
172	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
173	addi	r5,r5,4
1742:	srwi	r0,r5,2
175	mtctr	r0
176	bdz	6f
1771:	stwu	r4,4(r6)
178	bdnz	1b
1796:	andi.	r5,r5,3
1807:	cmpwi	0,r5,0
181	beqlr
182	mtctr	r5
183	addi	r6,r6,3
1848:	stbu	r4,1(r6)
185	bdnz	8b
186	blr
187
188_GLOBAL(memset)
189	rlwimi	r4,r4,8,16,23
190	rlwimi	r4,r4,16,0,15
191	addi	r6,r3,-4
192	cmplwi	0,r5,4
193	blt	7f
194	stwu	r4,4(r6)
195	beqlr
196	andi.	r0,r6,3
197	add	r5,r0,r5
198	subf	r6,r0,r6
199	srwi	r0,r5,2
200	mtctr	r0
201	bdz	6f
2021:	stwu	r4,4(r6)
203	bdnz	1b
2046:	andi.	r5,r5,3
2057:	cmpwi	0,r5,0
206	beqlr
207	mtctr	r5
208	addi	r6,r6,3
2098:	stbu	r4,1(r6)
210	bdnz	8b
211	blr
212
213_GLOBAL(bcopy)
214	mr	r6,r3
215	mr	r3,r4
216	mr	r4,r6
217	b	memcpy
218
219/*
220 * This version uses dcbz on the complete cache lines in the
221 * destination area to reduce memory traffic.  This requires that
222 * the destination area is cacheable.
223 * We only use this version if the source and dest don't overlap.
224 * -- paulus.
225 */
226_GLOBAL(cacheable_memcpy)
227	add	r7,r3,r5		/* test if the src & dst overlap */
228	add	r8,r4,r5
229	cmplw	0,r4,r7
230	cmplw	1,r3,r8
231	crand	0,0,4			/* cr0.lt &= cr1.lt */
232	blt	memcpy			/* if regions overlap */
233
234	addi	r4,r4,-4
235	addi	r6,r3,-4
236	neg	r0,r3
237	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
238	beq	58f
239
240	cmplw	0,r5,r0			/* is this more than total to do? */
241	blt	63f			/* if not much to do */
242	andi.	r8,r0,3			/* get it word-aligned first */
243	subf	r5,r0,r5
244	mtctr	r8
245	beq+	61f
24670:	lbz	r9,4(r4)		/* do some bytes */
247	stb	r9,4(r6)
248	addi	r4,r4,1
249	addi	r6,r6,1
250	bdnz	70b
25161:	srwi.	r0,r0,2
252	mtctr	r0
253	beq	58f
25472:	lwzu	r9,4(r4)		/* do some words */
255	stwu	r9,4(r6)
256	bdnz	72b
257
25858:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
259	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
260	li	r11,4
261	mtctr	r0
262	beq	63f
26353:
264#if !defined(CONFIG_8xx)
265	dcbz	r11,r6
266#endif
267	COPY_16_BYTES
268#if L1_CACHE_LINE_SIZE >= 32
269	COPY_16_BYTES
270#if L1_CACHE_LINE_SIZE >= 64
271	COPY_16_BYTES
272	COPY_16_BYTES
273#if L1_CACHE_LINE_SIZE >= 128
274	COPY_16_BYTES
275	COPY_16_BYTES
276	COPY_16_BYTES
277	COPY_16_BYTES
278#endif
279#endif
280#endif
281	bdnz	53b
282
28363:	srwi.	r0,r5,2
284	mtctr	r0
285	beq	64f
28630:	lwzu	r0,4(r4)
287	stwu	r0,4(r6)
288	bdnz	30b
289
29064:	andi.	r0,r5,3
291	mtctr	r0
292	beq+	65f
29340:	lbz	r0,4(r4)
294	stb	r0,4(r6)
295	addi	r4,r4,1
296	addi	r6,r6,1
297	bdnz	40b
29865:	blr
299
300_GLOBAL(memmove)
301	cmplw	0,r3,r4
302	bgt	backwards_memcpy
303	/* fall through */
304
305_GLOBAL(memcpy)
306	srwi.	r7,r5,3
307	addi	r6,r3,-4
308	addi	r4,r4,-4
309	beq	2f			/* if less than 8 bytes to do */
310	andi.	r0,r6,3			/* get dest word aligned */
311	mtctr	r7
312	bne	5f
3131:	lwz	r7,4(r4)
314	lwzu	r8,8(r4)
315	stw	r7,4(r6)
316	stwu	r8,8(r6)
317	bdnz	1b
318	andi.	r5,r5,7
3192:	cmplwi	0,r5,4
320	blt	3f
321	lwzu	r0,4(r4)
322	addi	r5,r5,-4
323	stwu	r0,4(r6)
3243:	cmpwi	0,r5,0
325	beqlr
326	mtctr	r5
327	addi	r4,r4,3
328	addi	r6,r6,3
3294:	lbzu	r0,1(r4)
330	stbu	r0,1(r6)
331	bdnz	4b
332	blr
3335:	subfic	r0,r0,4
334	mtctr	r0
3356:	lbz	r7,4(r4)
336	addi	r4,r4,1
337	stb	r7,4(r6)
338	addi	r6,r6,1
339	bdnz	6b
340	subf	r5,r0,r5
341	rlwinm.	r7,r5,32-3,3,31
342	beq	2b
343	mtctr	r7
344	b	1b
345
346_GLOBAL(backwards_memcpy)
347	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
348	add	r6,r3,r5
349	add	r4,r4,r5
350	beq	2f
351	andi.	r0,r6,3
352	mtctr	r7
353	bne	5f
3541:	lwz	r7,-4(r4)
355	lwzu	r8,-8(r4)
356	stw	r7,-4(r6)
357	stwu	r8,-8(r6)
358	bdnz	1b
359	andi.	r5,r5,7
3602:	cmplwi	0,r5,4
361	blt	3f
362	lwzu	r0,-4(r4)
363	subi	r5,r5,4
364	stwu	r0,-4(r6)
3653:	cmpwi	0,r5,0
366	beqlr
367	mtctr	r5
3684:	lbzu	r0,-1(r4)
369	stbu	r0,-1(r6)
370	bdnz	4b
371	blr
3725:	mtctr	r0
3736:	lbzu	r7,-1(r4)
374	stbu	r7,-1(r6)
375	bdnz	6b
376	subf	r5,r0,r5
377	rlwinm.	r7,r5,32-3,3,31
378	beq	2b
379	mtctr	r7
380	b	1b
381
382_GLOBAL(memcmp)
383	cmpwi	0,r5,0
384	ble-	2f
385	mtctr	r5
386	addi	r6,r3,-1
387	addi	r4,r4,-1
3881:	lbzu	r3,1(r6)
389	lbzu	r0,1(r4)
390	subf.	r3,r0,r3
391	bdnzt	2,1b
392	blr
3932:	li	r3,0
394	blr
395
396_GLOBAL(memchr)
397	cmpwi	0,r5,0
398	ble-	2f
399	mtctr	r5
400	addi	r3,r3,-1
4011:	lbzu	r0,1(r3)
402	cmpw	0,r0,r4
403	bdnzf	2,1b
404	beqlr
4052:	li	r3,0
406	blr
407
408_GLOBAL(__copy_tofrom_user)
409	addi	r4,r4,-4
410	addi	r6,r3,-4
411	neg	r0,r3
412	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
413	beq	58f
414
415	cmplw	0,r5,r0			/* is this more than total to do? */
416	blt	63f			/* if not much to do */
417	andi.	r8,r0,3			/* get it word-aligned first */
418	mtctr	r8
419	beq+	61f
42070:	lbz	r9,4(r4)		/* do some bytes */
42171:	stb	r9,4(r6)
422	addi	r4,r4,1
423	addi	r6,r6,1
424	bdnz	70b
42561:	subf	r5,r0,r5
426	srwi.	r0,r0,2
427	mtctr	r0
428	beq	58f
42972:	lwzu	r9,4(r4)		/* do some words */
43073:	stwu	r9,4(r6)
431	bdnz	72b
432
433	.section __ex_table,"a"
434	.align	2
435	.long	70b,100f
436	.long	71b,101f
437	.long	72b,102f
438	.long	73b,103f
439	.text
440
44158:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
442	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
443	li	r11,4
444	beq	63f
445
446#ifdef CONFIG_8xx
447	/* Don't use prefetch on 8xx */
448	mtctr	r0
44953:	COPY_16_BYTES_WITHEX(0)
450	bdnz	53b
451
452#else /* not CONFIG_8xx */
453	/* Here we decide how far ahead to prefetch the source */
454	li	r3,4
455	cmpwi	r0,1
456	li	r7,0
457	ble	114f
458	li	r7,1
459#if MAX_COPY_PREFETCH > 1
460	/* Heuristically, for large transfers we prefetch
461	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
462	   we prefetch 1 cacheline ahead. */
463	cmpwi	r0,MAX_COPY_PREFETCH
464	ble	112f
465	li	r7,MAX_COPY_PREFETCH
466112:	mtctr	r7
467111:	dcbt	r3,r4
468	addi	r3,r3,CACHELINE_BYTES
469	bdnz	111b
470#else
471	dcbt	r3,r4
472	addi	r3,r3,CACHELINE_BYTES
473#endif /* MAX_COPY_PREFETCH > 1 */
474
475114:	subf	r8,r7,r0
476	mr	r0,r7
477	mtctr	r8
478
47953:	dcbt	r3,r4
48054:	dcbz	r11,r6
481	.section __ex_table,"a"
482	.align	2
483	.long	54b,105f
484	.text
485/* the main body of the cacheline loop */
486	COPY_16_BYTES_WITHEX(0)
487#if L1_CACHE_LINE_SIZE >= 32
488	COPY_16_BYTES_WITHEX(1)
489#if L1_CACHE_LINE_SIZE >= 64
490	COPY_16_BYTES_WITHEX(2)
491	COPY_16_BYTES_WITHEX(3)
492#if L1_CACHE_LINE_SIZE >= 128
493	COPY_16_BYTES_WITHEX(4)
494	COPY_16_BYTES_WITHEX(5)
495	COPY_16_BYTES_WITHEX(6)
496	COPY_16_BYTES_WITHEX(7)
497#endif
498#endif
499#endif
500	bdnz	53b
501	cmpwi	r0,0
502	li	r3,4
503	li	r7,0
504	bne	114b
505#endif /* CONFIG_8xx */
506
50763:	srwi.	r0,r5,2
508	mtctr	r0
509	beq	64f
51030:	lwzu	r0,4(r4)
51131:	stwu	r0,4(r6)
512	bdnz	30b
513
51464:	andi.	r0,r5,3
515	mtctr	r0
516	beq+	65f
51740:	lbz	r0,4(r4)
51841:	stb	r0,4(r6)
519	addi	r4,r4,1
520	addi	r6,r6,1
521	bdnz	40b
52265:	li	r3,0
523	blr
524
525/* read fault, initial single-byte copy */
526100:	li	r9,0
527	b	90f
528/* write fault, initial single-byte copy */
529101:	li	r9,1
53090:	subf	r5,r8,r5
531	li	r3,0
532	b	99f
533/* read fault, initial word copy */
534102:	li	r9,0
535	b	91f
536/* write fault, initial word copy */
537103:	li	r9,1
53891:	li	r3,2
539	b	99f
540
541/*
542 * this stuff handles faults in the cacheline loop and branches to either
543 * 104f (if in read part) or 105f (if in write part), after updating r5
544 */
545	COPY_16_BYTES_EXCODE(0)
546#if L1_CACHE_LINE_SIZE >= 32
547	COPY_16_BYTES_EXCODE(1)
548#if L1_CACHE_LINE_SIZE >= 64
549	COPY_16_BYTES_EXCODE(2)
550	COPY_16_BYTES_EXCODE(3)
551#if L1_CACHE_LINE_SIZE >= 128
552	COPY_16_BYTES_EXCODE(4)
553	COPY_16_BYTES_EXCODE(5)
554	COPY_16_BYTES_EXCODE(6)
555	COPY_16_BYTES_EXCODE(7)
556#endif
557#endif
558#endif
559
560/* read fault in cacheline loop */
561104:	li	r9,0
562	b	92f
563/* fault on dcbz (effectively a write fault) */
564/* or write fault in cacheline loop */
565105:	li	r9,1
56692:	li	r3,LG_CACHELINE_BYTES
567	b	99f
568/* read fault in final word loop */
569108:	li	r9,0
570	b	93f
571/* write fault in final word loop */
572109:	li	r9,1
57393:	andi.	r5,r5,3
574	li	r3,2
575	b	99f
576/* read fault in final byte loop */
577110:	li	r9,0
578	b	94f
579/* write fault in final byte loop */
580111:	li	r9,1
58194:	li	r5,0
582	li	r3,0
583/*
584 * At this stage the number of bytes not copied is
585 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
586 */
58799:	mfctr	r0
588	slw	r3,r0,r3
589	add.	r3,r3,r5
590	beq	120f			/* shouldn't happen */
591	cmpwi	0,r9,0
592	bne	120f
593/* for a read fault, first try to continue the copy one byte at a time */
594	mtctr	r3
595130:	lbz	r0,4(r4)
596131:	stb	r0,4(r6)
597	addi	r4,r4,1
598	addi	r6,r6,1
599	bdnz	130b
600/* then clear out the destination: r3 bytes starting at 4(r6) */
601132:	mfctr	r3
602	srwi.	r0,r3,2
603	li	r9,0
604	mtctr	r0
605	beq	113f
606112:	stwu	r9,4(r6)
607	bdnz	112b
608113:	andi.	r0,r3,3
609	mtctr	r0
610	beq	120f
611114:	stb	r9,4(r6)
612	addi	r6,r6,1
613	bdnz	114b
614120:	blr
615
616	.section __ex_table,"a"
617	.align	2
618	.long	30b,108b
619	.long	31b,109b
620	.long	40b,110b
621	.long	41b,111b
622	.long	130b,132b
623	.long	131b,120b
624	.long	112b,120b
625	.long	114b,120b
626	.text
627
628_GLOBAL(__clear_user)
629	addi	r6,r3,-4
630	li	r3,0
631	li	r5,0
632	cmplwi	0,r4,4
633	blt	7f
634	/* clear a single word */
63511:	stwu	r5,4(r6)
636	beqlr
637	/* clear word sized chunks */
638	andi.	r0,r6,3
639	add	r4,r0,r4
640	subf	r6,r0,r6
641	srwi	r0,r4,2
642	andi.	r4,r4,3
643	mtctr	r0
644	bdz	7f
6451:	stwu	r5,4(r6)
646	bdnz	1b
647	/* clear byte sized chunks */
6487:	cmpwi	0,r4,0
649	beqlr
650	mtctr	r4
651	addi	r6,r6,3
6528:	stbu	r5,1(r6)
653	bdnz	8b
654	blr
65590:	mr	r3,r4
656	blr
65791:	mfctr	r3
658	slwi	r3,r3,2
659	add	r3,r3,r4
660	blr
66192:	mfctr	r3
662	blr
663
664	.section __ex_table,"a"
665	.align	2
666	.long	11b,90b
667	.long	1b,91b
668	.long	8b,92b
669	.text
670
671_GLOBAL(__strncpy_from_user)
672	addi	r6,r3,-1
673	addi	r4,r4,-1
674	cmpwi	0,r5,0
675	beq	2f
676	mtctr	r5
6771:	lbzu	r0,1(r4)
678	cmpwi	0,r0,0
679	stbu	r0,1(r6)
680	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
681	beq	3f
6822:	addi	r6,r6,1
6833:	subf	r3,r3,r6
684	blr
68599:	li	r3,-EFAULT
686	blr
687
688	.section __ex_table,"a"
689	.align	2
690	.long	1b,99b
691	.text
692
693/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
694_GLOBAL(__strnlen_user)
695	addi	r7,r3,-1
696	subf	r6,r7,r5	/* top+1 - str */
697	cmplw	0,r4,r6
698	bge	0f
699	mr	r6,r4
7000:	mtctr	r6		/* ctr = min(len, top - str) */
7011:	lbzu	r0,1(r7)	/* get next byte */
702	cmpwi	0,r0,0
703	bdnzf	2,1b		/* loop if --ctr != 0 && byte != 0 */
704	addi	r7,r7,1
705	subf	r3,r3,r7	/* number of bytes we have looked at */
706	beqlr			/* return if we found a 0 byte */
707	cmpw	0,r3,r4		/* did we look at all len bytes? */
708	blt	99f		/* if not, must have hit top */
709	addi	r3,r4,1		/* return len + 1 to indicate no null found */
710	blr
71199:	li	r3,0		/* bad address, return 0 */
712	blr
713
714	.section __ex_table,"a"
715	.align	2
716	.long	1b,99b
717