1/*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11
12	.align	7
13_GLOBAL(__copy_tofrom_user)
14BEGIN_FTR_SECTION
15	nop
16FTR_SECTION_ELSE
17	b	__copy_tofrom_user_power7
18ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
19_GLOBAL(__copy_tofrom_user_base)
20	/* first check for a whole page copy on a page boundary */
21	cmpldi	cr1,r5,16
22	cmpdi	cr6,r5,4096
23	or	r0,r3,r4
24	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
25	andi.	r0,r0,4095
26	std	r3,-24(r1)
27	crand	cr0*4+2,cr0*4+2,cr6*4+2
28	std	r4,-16(r1)
29	std	r5,-8(r1)
30	dcbt	0,r4
31	beq	.Lcopy_page_4K
32	andi.	r6,r6,7
33	PPC_MTOCRF	0x01,r5
34	blt	cr1,.Lshort_copy
35/* Below we want to nop out the bne if we're on a CPU that has the
36 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
37 * cleared.
38 * At the time of writing the only CPU that has this combination of bits
39 * set is Power6.
40 */
41BEGIN_FTR_SECTION
42	nop
43FTR_SECTION_ELSE
44	bne	.Ldst_unaligned
45ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
46		    CPU_FTR_UNALIGNED_LD_STD)
47.Ldst_aligned:
48	addi	r3,r3,-16
49BEGIN_FTR_SECTION
50	andi.	r0,r4,7
51	bne	.Lsrc_unaligned
52END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
53	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
54	srdi	r0,r5,5
55	cmpdi	cr1,r0,0
5620:	ld	r7,0(r4)
57220:	ld	r6,8(r4)
58	addi	r4,r4,16
59	mtctr	r0
60	andi.	r0,r5,0x10
61	beq	22f
62	addi	r3,r3,16
63	addi	r4,r4,-16
64	mr	r9,r7
65	mr	r8,r6
66	beq	cr1,72f
6721:	ld	r7,16(r4)
68221:	ld	r6,24(r4)
69	addi	r4,r4,32
7070:	std	r9,0(r3)
71270:	std	r8,8(r3)
7222:	ld	r9,0(r4)
73222:	ld	r8,8(r4)
7471:	std	r7,16(r3)
75271:	std	r6,24(r3)
76	addi	r3,r3,32
77	bdnz	21b
7872:	std	r9,0(r3)
79272:	std	r8,8(r3)
80	andi.	r5,r5,0xf
81	beq+	3f
82	addi	r4,r4,16
83.Ldo_tail:
84	addi	r3,r3,16
85	bf	cr7*4+0,246f
86244:	ld	r9,0(r4)
87	addi	r4,r4,8
88245:	std	r9,0(r3)
89	addi	r3,r3,8
90246:	bf	cr7*4+1,1f
9123:	lwz	r9,0(r4)
92	addi	r4,r4,4
9373:	stw	r9,0(r3)
94	addi	r3,r3,4
951:	bf	cr7*4+2,2f
9644:	lhz	r9,0(r4)
97	addi	r4,r4,2
9874:	sth	r9,0(r3)
99	addi	r3,r3,2
1002:	bf	cr7*4+3,3f
10145:	lbz	r9,0(r4)
10275:	stb	r9,0(r3)
1033:	li	r3,0
104	blr
105
106.Lsrc_unaligned:
107	srdi	r6,r5,3
108	addi	r5,r5,-16
109	subf	r4,r0,r4
110	srdi	r7,r5,4
111	sldi	r10,r0,3
112	cmpldi	cr6,r6,3
113	andi.	r5,r5,7
114	mtctr	r7
115	subfic	r11,r10,64
116	add	r5,r5,r0
117	bt	cr7*4+0,28f
118
11924:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
12025:	ld	r0,8(r4)
121	sld	r6,r9,r10
12226:	ldu	r9,16(r4)
123	srd	r7,r0,r11
124	sld	r8,r0,r10
125	or	r7,r7,r6
126	blt	cr6,79f
12727:	ld	r0,8(r4)
128	b	2f
129
13028:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
13129:	ldu	r9,8(r4)
132	sld	r8,r0,r10
133	addi	r3,r3,-8
134	blt	cr6,5f
13530:	ld	r0,8(r4)
136	srd	r12,r9,r11
137	sld	r6,r9,r10
13831:	ldu	r9,16(r4)
139	or	r12,r8,r12
140	srd	r7,r0,r11
141	sld	r8,r0,r10
142	addi	r3,r3,16
143	beq	cr6,78f
144
1451:	or	r7,r7,r6
14632:	ld	r0,8(r4)
14776:	std	r12,8(r3)
1482:	srd	r12,r9,r11
149	sld	r6,r9,r10
15033:	ldu	r9,16(r4)
151	or	r12,r8,r12
15277:	stdu	r7,16(r3)
153	srd	r7,r0,r11
154	sld	r8,r0,r10
155	bdnz	1b
156
15778:	std	r12,8(r3)
158	or	r7,r7,r6
15979:	std	r7,16(r3)
1605:	srd	r12,r9,r11
161	or	r12,r8,r12
16280:	std	r12,24(r3)
163	bne	6f
164	li	r3,0
165	blr
1666:	cmpwi	cr1,r5,8
167	addi	r3,r3,32
168	sld	r9,r9,r10
169	ble	cr1,7f
17034:	ld	r0,8(r4)
171	srd	r7,r0,r11
172	or	r9,r7,r9
1737:
174	bf	cr7*4+1,1f
175	rotldi	r9,r9,32
17694:	stw	r9,0(r3)
177	addi	r3,r3,4
1781:	bf	cr7*4+2,2f
179	rotldi	r9,r9,16
18095:	sth	r9,0(r3)
181	addi	r3,r3,2
1822:	bf	cr7*4+3,3f
183	rotldi	r9,r9,8
18496:	stb	r9,0(r3)
1853:	li	r3,0
186	blr
187
188.Ldst_unaligned:
189	PPC_MTOCRF	0x01,r6		/* put #bytes to 8B bdry into cr7 */
190	subf	r5,r6,r5
191	li	r7,0
192	cmpldi	cr1,r5,16
193	bf	cr7*4+3,1f
19435:	lbz	r0,0(r4)
19581:	stb	r0,0(r3)
196	addi	r7,r7,1
1971:	bf	cr7*4+2,2f
19836:	lhzx	r0,r7,r4
19982:	sthx	r0,r7,r3
200	addi	r7,r7,2
2012:	bf	cr7*4+1,3f
20237:	lwzx	r0,r7,r4
20383:	stwx	r0,r7,r3
2043:	PPC_MTOCRF	0x01,r5
205	add	r4,r6,r4
206	add	r3,r6,r3
207	b	.Ldst_aligned
208
209.Lshort_copy:
210	bf	cr7*4+0,1f
21138:	lwz	r0,0(r4)
21239:	lwz	r9,4(r4)
213	addi	r4,r4,8
21484:	stw	r0,0(r3)
21585:	stw	r9,4(r3)
216	addi	r3,r3,8
2171:	bf	cr7*4+1,2f
21840:	lwz	r0,0(r4)
219	addi	r4,r4,4
22086:	stw	r0,0(r3)
221	addi	r3,r3,4
2222:	bf	cr7*4+2,3f
22341:	lhz	r0,0(r4)
224	addi	r4,r4,2
22587:	sth	r0,0(r3)
226	addi	r3,r3,2
2273:	bf	cr7*4+3,4f
22842:	lbz	r0,0(r4)
22988:	stb	r0,0(r3)
2304:	li	r3,0
231	blr
232
233/*
234 * exception handlers follow
235 * we have to return the number of bytes not copied
236 * for an exception on a load, we set the rest of the destination to 0
237 */
238
239136:
240137:
241	add	r3,r3,r7
242	b	1f
243130:
244131:
245	addi	r3,r3,8
246120:
247320:
248122:
249322:
250124:
251125:
252126:
253127:
254128:
255129:
256133:
257	addi	r3,r3,8
258132:
259	addi	r3,r3,8
260121:
261321:
262344:
263134:
264135:
265138:
266139:
267140:
268141:
269142:
270123:
271144:
272145:
273
274/*
275 * here we have had a fault on a load and r3 points to the first
276 * unmodified byte of the destination
277 */
2781:	ld	r6,-24(r1)
279	ld	r4,-16(r1)
280	ld	r5,-8(r1)
281	subf	r6,r6,r3
282	add	r4,r4,r6
283	subf	r5,r6,r5	/* #bytes left to go */
284
285/*
286 * first see if we can copy any more bytes before hitting another exception
287 */
288	mtctr	r5
28943:	lbz	r0,0(r4)
290	addi	r4,r4,1
29189:	stb	r0,0(r3)
292	addi	r3,r3,1
293	bdnz	43b
294	li	r3,0		/* huh? all copied successfully this time? */
295	blr
296
297/*
298 * here we have trapped again, need to clear ctr bytes starting at r3
299 */
300143:	mfctr	r5
301	li	r0,0
302	mr	r4,r3
303	mr	r3,r5		/* return the number of bytes not copied */
3041:	andi.	r9,r4,7
305	beq	3f
30690:	stb	r0,0(r4)
307	addic.	r5,r5,-1
308	addi	r4,r4,1
309	bne	1b
310	blr
3113:	cmpldi	cr1,r5,8
312	srdi	r9,r5,3
313	andi.	r5,r5,7
314	blt	cr1,93f
315	mtctr	r9
31691:	std	r0,0(r4)
317	addi	r4,r4,8
318	bdnz	91b
31993:	beqlr
320	mtctr	r5
32192:	stb	r0,0(r4)
322	addi	r4,r4,1
323	bdnz	92b
324	blr
325
326/*
327 * exception handlers for stores: we just need to work
328 * out how many bytes weren't copied
329 */
330182:
331183:
332	add	r3,r3,r7
333	b	1f
334371:
335180:
336	addi	r3,r3,8
337171:
338177:
339	addi	r3,r3,8
340370:
341372:
342176:
343178:
344	addi	r3,r3,4
345185:
346	addi	r3,r3,4
347170:
348172:
349345:
350173:
351174:
352175:
353179:
354181:
355184:
356186:
357187:
358188:
359189:
360194:
361195:
362196:
3631:
364	ld	r6,-24(r1)
365	ld	r5,-8(r1)
366	add	r6,r6,r5
367	subf	r3,r3,r6	/* #bytes not copied */
368190:
369191:
370192:
371	blr			/* #bytes not copied in r3 */
372
373	.section __ex_table,"a"
374	.align	3
375	.llong	20b,120b
376	.llong	220b,320b
377	.llong	21b,121b
378	.llong	221b,321b
379	.llong	70b,170b
380	.llong	270b,370b
381	.llong	22b,122b
382	.llong	222b,322b
383	.llong	71b,171b
384	.llong	271b,371b
385	.llong	72b,172b
386	.llong	272b,372b
387	.llong	244b,344b
388	.llong	245b,345b
389	.llong	23b,123b
390	.llong	73b,173b
391	.llong	44b,144b
392	.llong	74b,174b
393	.llong	45b,145b
394	.llong	75b,175b
395	.llong	24b,124b
396	.llong	25b,125b
397	.llong	26b,126b
398	.llong	27b,127b
399	.llong	28b,128b
400	.llong	29b,129b
401	.llong	30b,130b
402	.llong	31b,131b
403	.llong	32b,132b
404	.llong	76b,176b
405	.llong	33b,133b
406	.llong	77b,177b
407	.llong	78b,178b
408	.llong	79b,179b
409	.llong	80b,180b
410	.llong	34b,134b
411	.llong	94b,194b
412	.llong	95b,195b
413	.llong	96b,196b
414	.llong	35b,135b
415	.llong	81b,181b
416	.llong	36b,136b
417	.llong	82b,182b
418	.llong	37b,137b
419	.llong	83b,183b
420	.llong	38b,138b
421	.llong	39b,139b
422	.llong	84b,184b
423	.llong	85b,185b
424	.llong	40b,140b
425	.llong	86b,186b
426	.llong	41b,141b
427	.llong	87b,187b
428	.llong	42b,142b
429	.llong	88b,188b
430	.llong	43b,143b
431	.llong	89b,189b
432	.llong	90b,190b
433	.llong	91b,191b
434	.llong	92b,192b
435
436	.text
437
438/*
439 * Routine to copy a whole page of data, optimized for POWER4.
440 * On POWER4 it is more than 50% faster than the simple loop
441 * above (following the .Ldst_aligned label) but it runs slightly
442 * slower on POWER3.
443 */
444.Lcopy_page_4K:
445	std	r31,-32(1)
446	std	r30,-40(1)
447	std	r29,-48(1)
448	std	r28,-56(1)
449	std	r27,-64(1)
450	std	r26,-72(1)
451	std	r25,-80(1)
452	std	r24,-88(1)
453	std	r23,-96(1)
454	std	r22,-104(1)
455	std	r21,-112(1)
456	std	r20,-120(1)
457	li	r5,4096/32 - 1
458	addi	r3,r3,-8
459	li	r0,5
4600:	addi	r5,r5,-24
461	mtctr	r0
46220:	ld	r22,640(4)
46321:	ld	r21,512(4)
46422:	ld	r20,384(4)
46523:	ld	r11,256(4)
46624:	ld	r9,128(4)
46725:	ld	r7,0(4)
46826:	ld	r25,648(4)
46927:	ld	r24,520(4)
47028:	ld	r23,392(4)
47129:	ld	r10,264(4)
47230:	ld	r8,136(4)
47331:	ldu	r6,8(4)
474	cmpwi	r5,24
4751:
47632:	std	r22,648(3)
47733:	std	r21,520(3)
47834:	std	r20,392(3)
47935:	std	r11,264(3)
48036:	std	r9,136(3)
48137:	std	r7,8(3)
48238:	ld	r28,648(4)
48339:	ld	r27,520(4)
48440:	ld	r26,392(4)
48541:	ld	r31,264(4)
48642:	ld	r30,136(4)
48743:	ld	r29,8(4)
48844:	std	r25,656(3)
48945:	std	r24,528(3)
49046:	std	r23,400(3)
49147:	std	r10,272(3)
49248:	std	r8,144(3)
49349:	std	r6,16(3)
49450:	ld	r22,656(4)
49551:	ld	r21,528(4)
49652:	ld	r20,400(4)
49753:	ld	r11,272(4)
49854:	ld	r9,144(4)
49955:	ld	r7,16(4)
50056:	std	r28,664(3)
50157:	std	r27,536(3)
50258:	std	r26,408(3)
50359:	std	r31,280(3)
50460:	std	r30,152(3)
50561:	stdu	r29,24(3)
50662:	ld	r25,664(4)
50763:	ld	r24,536(4)
50864:	ld	r23,408(4)
50965:	ld	r10,280(4)
51066:	ld	r8,152(4)
51167:	ldu	r6,24(4)
512	bdnz	1b
51368:	std	r22,648(3)
51469:	std	r21,520(3)
51570:	std	r20,392(3)
51671:	std	r11,264(3)
51772:	std	r9,136(3)
51873:	std	r7,8(3)
51974:	addi	r4,r4,640
52075:	addi	r3,r3,648
521	bge	0b
522	mtctr	r5
52376:	ld	r7,0(4)
52477:	ld	r8,8(4)
52578:	ldu	r9,16(4)
5263:
52779:	ld	r10,8(4)
52880:	std	r7,8(3)
52981:	ld	r7,16(4)
53082:	std	r8,16(3)
53183:	ld	r8,24(4)
53284:	std	r9,24(3)
53385:	ldu	r9,32(4)
53486:	stdu	r10,32(3)
535	bdnz	3b
5364:
53787:	ld	r10,8(4)
53888:	std	r7,8(3)
53989:	std	r8,16(3)
54090:	std	r9,24(3)
54191:	std	r10,32(3)
5429:	ld	r20,-120(1)
543	ld	r21,-112(1)
544	ld	r22,-104(1)
545	ld	r23,-96(1)
546	ld	r24,-88(1)
547	ld	r25,-80(1)
548	ld	r26,-72(1)
549	ld	r27,-64(1)
550	ld	r28,-56(1)
551	ld	r29,-48(1)
552	ld	r30,-40(1)
553	ld	r31,-32(1)
554	li	r3,0
555	blr
556
557/*
558 * on an exception, reset to the beginning and jump back into the
559 * standard __copy_tofrom_user
560 */
561100:	ld	r20,-120(1)
562	ld	r21,-112(1)
563	ld	r22,-104(1)
564	ld	r23,-96(1)
565	ld	r24,-88(1)
566	ld	r25,-80(1)
567	ld	r26,-72(1)
568	ld	r27,-64(1)
569	ld	r28,-56(1)
570	ld	r29,-48(1)
571	ld	r30,-40(1)
572	ld	r31,-32(1)
573	ld	r3,-24(r1)
574	ld	r4,-16(r1)
575	li	r5,4096
576	b	.Ldst_aligned
577
578	.section __ex_table,"a"
579	.align	3
580	.llong	20b,100b
581	.llong	21b,100b
582	.llong	22b,100b
583	.llong	23b,100b
584	.llong	24b,100b
585	.llong	25b,100b
586	.llong	26b,100b
587	.llong	27b,100b
588	.llong	28b,100b
589	.llong	29b,100b
590	.llong	30b,100b
591	.llong	31b,100b
592	.llong	32b,100b
593	.llong	33b,100b
594	.llong	34b,100b
595	.llong	35b,100b
596	.llong	36b,100b
597	.llong	37b,100b
598	.llong	38b,100b
599	.llong	39b,100b
600	.llong	40b,100b
601	.llong	41b,100b
602	.llong	42b,100b
603	.llong	43b,100b
604	.llong	44b,100b
605	.llong	45b,100b
606	.llong	46b,100b
607	.llong	47b,100b
608	.llong	48b,100b
609	.llong	49b,100b
610	.llong	50b,100b
611	.llong	51b,100b
612	.llong	52b,100b
613	.llong	53b,100b
614	.llong	54b,100b
615	.llong	55b,100b
616	.llong	56b,100b
617	.llong	57b,100b
618	.llong	58b,100b
619	.llong	59b,100b
620	.llong	60b,100b
621	.llong	61b,100b
622	.llong	62b,100b
623	.llong	63b,100b
624	.llong	64b,100b
625	.llong	65b,100b
626	.llong	66b,100b
627	.llong	67b,100b
628	.llong	68b,100b
629	.llong	69b,100b
630	.llong	70b,100b
631	.llong	71b,100b
632	.llong	72b,100b
633	.llong	73b,100b
634	.llong	74b,100b
635	.llong	75b,100b
636	.llong	76b,100b
637	.llong	77b,100b
638	.llong	78b,100b
639	.llong	79b,100b
640	.llong	80b,100b
641	.llong	81b,100b
642	.llong	82b,100b
643	.llong	83b,100b
644	.llong	84b,100b
645	.llong	85b,100b
646	.llong	86b,100b
647	.llong	87b,100b
648	.llong	88b,100b
649	.llong	89b,100b
650	.llong	90b,100b
651	.llong	91b,100b
652