1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *  PARISC TLB and cache flushing support
4 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
5 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
6 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
7 */
8
9/*
10 * NOTE: fdc,fic, and pdc instructions that use base register modification
11 *       should only use index and base registers that are not shadowed,
12 *       so that the fast path emulation in the non access miss handler
13 *       can be used.
14 */
15
16#ifdef CONFIG_64BIT
17	.level	2.0w
18#else
19	.level	2.0
20#endif
21
22#include <asm/psw.h>
23#include <asm/assembly.h>
24#include <asm/cache.h>
25#include <asm/ldcw.h>
26#include <asm/alternative.h>
27#include <linux/linkage.h>
28#include <linux/init.h>
29#include <linux/pgtable.h>
30
31	.section .text.hot
32	.align	16
33
34ENTRY_CFI(flush_tlb_all_local)
35	/*
36	 * The pitlbe and pdtlbe instructions should only be used to
37	 * flush the entire tlb. Also, there needs to be no intervening
38	 * tlb operations, e.g. tlb misses, so the operation needs
39	 * to happen in real mode with all interruptions disabled.
40	 */
41
42	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
43	rsm		PSW_SM_I, %r19		/* save I-bit state */
44	load32		PA(1f), %r1
45	nop
46	nop
47	nop
48	nop
49	nop
50
51	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
52	mtctl		%r0, %cr17		/* Clear IIASQ tail */
53	mtctl		%r0, %cr17		/* Clear IIASQ head */
54	mtctl		%r1, %cr18		/* IIAOQ head */
55	ldo		4(%r1), %r1
56	mtctl		%r1, %cr18		/* IIAOQ tail */
57	load32		REAL_MODE_PSW, %r1
58	mtctl           %r1, %ipsw
59	rfi
60	nop
61
621:      load32		PA(cache_info), %r1
63
64	/* Flush Instruction Tlb */
65
6688:	LDREG		ITLB_SID_BASE(%r1), %r20
67	LDREG		ITLB_SID_STRIDE(%r1), %r21
68	LDREG		ITLB_SID_COUNT(%r1), %r22
69	LDREG		ITLB_OFF_BASE(%r1), %arg0
70	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
71	LDREG		ITLB_OFF_COUNT(%r1), %arg2
72	LDREG		ITLB_LOOP(%r1), %arg3
73
74	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
75	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
76	copy		%arg0, %r28		/* Init base addr */
77
78fitmanyloop:					/* Loop if LOOP >= 2 */
79	mtsp		%r20, %sr1
80	add		%r21, %r20, %r20	/* increment space */
81	copy		%arg2, %r29		/* Init middle loop count */
82
83fitmanymiddle:					/* Loop if LOOP >= 2 */
84	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
85	pitlbe		%r0(%sr1, %r28)
86	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
87	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
88	copy		%arg3, %r31		/* Re-init inner loop count */
89
90	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
91	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
92
93fitoneloop:					/* Loop if LOOP = 1 */
94	mtsp		%r20, %sr1
95	copy		%arg0, %r28		/* init base addr */
96	copy		%arg2, %r29		/* init middle loop count */
97
98fitonemiddle:					/* Loop if LOOP = 1 */
99	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
100	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
101
102	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
103	add		%r21, %r20, %r20		/* increment space */
104
105fitdone:
106	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
107
108	/* Flush Data Tlb */
109
110	LDREG		DTLB_SID_BASE(%r1), %r20
111	LDREG		DTLB_SID_STRIDE(%r1), %r21
112	LDREG		DTLB_SID_COUNT(%r1), %r22
113	LDREG		DTLB_OFF_BASE(%r1), %arg0
114	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
115	LDREG		DTLB_OFF_COUNT(%r1), %arg2
116	LDREG		DTLB_LOOP(%r1), %arg3
117
118	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
119	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
120	copy		%arg0, %r28		/* Init base addr */
121
122fdtmanyloop:					/* Loop if LOOP >= 2 */
123	mtsp		%r20, %sr1
124	add		%r21, %r20, %r20	/* increment space */
125	copy		%arg2, %r29		/* Init middle loop count */
126
127fdtmanymiddle:					/* Loop if LOOP >= 2 */
128	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
129	pdtlbe		%r0(%sr1, %r28)
130	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
131	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
132	copy		%arg3, %r31		/* Re-init inner loop count */
133
134	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
135	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
136
137fdtoneloop:					/* Loop if LOOP = 1 */
138	mtsp		%r20, %sr1
139	copy		%arg0, %r28		/* init base addr */
140	copy		%arg2, %r29		/* init middle loop count */
141
142fdtonemiddle:					/* Loop if LOOP = 1 */
143	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
144	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
145
146	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
147	add		%r21, %r20, %r20	/* increment space */
148
149
150fdtdone:
151	/*
152	 * Switch back to virtual mode
153	 */
154	/* pcxt_ssm_bug */
155	rsm		PSW_SM_I, %r0
156	load32		2f, %r1
157	nop
158	nop
159	nop
160	nop
161	nop
162
163	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
164	mtctl		%r0, %cr17		/* Clear IIASQ tail */
165	mtctl		%r0, %cr17		/* Clear IIASQ head */
166	mtctl		%r1, %cr18		/* IIAOQ head */
167	ldo		4(%r1), %r1
168	mtctl		%r1, %cr18		/* IIAOQ tail */
169	load32		KERNEL_PSW, %r1
170	or		%r1, %r19, %r1	/* I-bit to state on entry */
171	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
172	rfi
173	nop
174
1752:      bv		%r0(%r2)
176	nop
177
178	/*
179	 * When running in qemu, drop whole flush_tlb_all_local function and
180	 * replace by one pdtlbe instruction, for which QEMU will drop all
181	 * local TLB entries.
182	 */
1833:	pdtlbe		%r0(%sr1,%r0)
184	bv,n		%r0(%r2)
185	ALTERNATIVE_CODE(flush_tlb_all_local, 2, ALT_COND_RUN_ON_QEMU, 3b)
186ENDPROC_CFI(flush_tlb_all_local)
187
188	.import cache_info,data
189
190ENTRY_CFI(flush_instruction_cache_local)
19188:	load32		cache_info, %r1
192
193	/* Flush Instruction Cache */
194
195	LDREG		ICACHE_BASE(%r1), %arg0
196	LDREG		ICACHE_STRIDE(%r1), %arg1
197	LDREG		ICACHE_COUNT(%r1), %arg2
198	LDREG		ICACHE_LOOP(%r1), %arg3
199	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
200	mtsp		%r0, %sr1
201	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
202	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
203
204fimanyloop:					/* Loop if LOOP >= 2 */
205	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
206	fice            %r0(%sr1, %arg0)
207	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
208	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
209	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
210
211fioneloop:					/* Loop if LOOP = 1 */
212	/* Some implementations may flush with a single fice instruction */
213	cmpib,COND(>>=),n	15, %arg2, fioneloop2
214
215fioneloop1:
216	fice,m		%arg1(%sr1, %arg0)
217	fice,m		%arg1(%sr1, %arg0)
218	fice,m		%arg1(%sr1, %arg0)
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	fice,m		%arg1(%sr1, %arg0)
223	fice,m		%arg1(%sr1, %arg0)
224	fice,m		%arg1(%sr1, %arg0)
225	fice,m		%arg1(%sr1, %arg0)
226	fice,m		%arg1(%sr1, %arg0)
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	addib,COND(>)	-16, %arg2, fioneloop1
232	fice,m		%arg1(%sr1, %arg0)
233
234	/* Check if done */
235	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
236
237fioneloop2:
238	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
239	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
240
241fisync:
242	sync
243	mtsm		%r22			/* restore I-bit */
24489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
245	bv		%r0(%r2)
246	nop
247ENDPROC_CFI(flush_instruction_cache_local)
248
249
250	.import cache_info, data
251ENTRY_CFI(flush_data_cache_local)
25288:	load32		cache_info, %r1
253
254	/* Flush Data Cache */
255
256	LDREG		DCACHE_BASE(%r1), %arg0
257	LDREG		DCACHE_STRIDE(%r1), %arg1
258	LDREG		DCACHE_COUNT(%r1), %arg2
259	LDREG		DCACHE_LOOP(%r1), %arg3
260	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
261	mtsp		%r0, %sr1
262	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
263	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
264
265fdmanyloop:					/* Loop if LOOP >= 2 */
266	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
267	fdce		%r0(%sr1, %arg0)
268	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
269	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
270	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
271
272fdoneloop:					/* Loop if LOOP = 1 */
273	/* Some implementations may flush with a single fdce instruction */
274	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
275
276fdoneloop1:
277	fdce,m		%arg1(%sr1, %arg0)
278	fdce,m		%arg1(%sr1, %arg0)
279	fdce,m		%arg1(%sr1, %arg0)
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	fdce,m		%arg1(%sr1, %arg0)
284	fdce,m		%arg1(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)
286	fdce,m		%arg1(%sr1, %arg0)
287	fdce,m		%arg1(%sr1, %arg0)
288	fdce,m		%arg1(%sr1, %arg0)
289	fdce,m		%arg1(%sr1, %arg0)
290	fdce,m		%arg1(%sr1, %arg0)
291	fdce,m		%arg1(%sr1, %arg0)
292	addib,COND(>)	-16, %arg2, fdoneloop1
293	fdce,m		%arg1(%sr1, %arg0)
294
295	/* Check if done */
296	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
297
298fdoneloop2:
299	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
300	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
301
302fdsync:
303	sync
304	mtsm		%r22			/* restore I-bit */
30589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
306	bv		%r0(%r2)
307	nop
308ENDPROC_CFI(flush_data_cache_local)
309
310/* Clear page using kernel mapping.  */
311
312ENTRY_CFI(clear_page_asm)
313#ifdef CONFIG_64BIT
314
315	/* Unroll the loop.  */
316	ldi		(PAGE_SIZE / 128), %r1
317
3181:
319	std		%r0, 0(%r26)
320	std		%r0, 8(%r26)
321	std		%r0, 16(%r26)
322	std		%r0, 24(%r26)
323	std		%r0, 32(%r26)
324	std		%r0, 40(%r26)
325	std		%r0, 48(%r26)
326	std		%r0, 56(%r26)
327	std		%r0, 64(%r26)
328	std		%r0, 72(%r26)
329	std		%r0, 80(%r26)
330	std		%r0, 88(%r26)
331	std		%r0, 96(%r26)
332	std		%r0, 104(%r26)
333	std		%r0, 112(%r26)
334	std		%r0, 120(%r26)
335
336	/* Note reverse branch hint for addib is taken.  */
337	addib,COND(>),n	-1, %r1, 1b
338	ldo		128(%r26), %r26
339
340#else
341
342	/*
343	 * Note that until (if) we start saving the full 64-bit register
344	 * values on interrupt, we can't use std on a 32 bit kernel.
345	 */
346	ldi		(PAGE_SIZE / 64), %r1
347
3481:
349	stw		%r0, 0(%r26)
350	stw		%r0, 4(%r26)
351	stw		%r0, 8(%r26)
352	stw		%r0, 12(%r26)
353	stw		%r0, 16(%r26)
354	stw		%r0, 20(%r26)
355	stw		%r0, 24(%r26)
356	stw		%r0, 28(%r26)
357	stw		%r0, 32(%r26)
358	stw		%r0, 36(%r26)
359	stw		%r0, 40(%r26)
360	stw		%r0, 44(%r26)
361	stw		%r0, 48(%r26)
362	stw		%r0, 52(%r26)
363	stw		%r0, 56(%r26)
364	stw		%r0, 60(%r26)
365
366	addib,COND(>),n	-1, %r1, 1b
367	ldo		64(%r26), %r26
368#endif
369	bv		%r0(%r2)
370	nop
371ENDPROC_CFI(clear_page_asm)
372
373/* Copy page using kernel mapping.  */
374
375ENTRY_CFI(copy_page_asm)
376#ifdef CONFIG_64BIT
377	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
378	 * Unroll the loop by hand and arrange insn appropriately.
379	 * Prefetch doesn't improve performance on rp3440.
380	 * GCC probably can do this just as well...
381	 */
382
383	ldi		(PAGE_SIZE / 128), %r1
384
3851:	ldd		0(%r25), %r19
386	ldd		8(%r25), %r20
387
388	ldd		16(%r25), %r21
389	ldd		24(%r25), %r22
390	std		%r19, 0(%r26)
391	std		%r20, 8(%r26)
392
393	ldd		32(%r25), %r19
394	ldd		40(%r25), %r20
395	std		%r21, 16(%r26)
396	std		%r22, 24(%r26)
397
398	ldd		48(%r25), %r21
399	ldd		56(%r25), %r22
400	std		%r19, 32(%r26)
401	std		%r20, 40(%r26)
402
403	ldd		64(%r25), %r19
404	ldd		72(%r25), %r20
405	std		%r21, 48(%r26)
406	std		%r22, 56(%r26)
407
408	ldd		80(%r25), %r21
409	ldd		88(%r25), %r22
410	std		%r19, 64(%r26)
411	std		%r20, 72(%r26)
412
413	ldd		 96(%r25), %r19
414	ldd		104(%r25), %r20
415	std		%r21, 80(%r26)
416	std		%r22, 88(%r26)
417
418	ldd		112(%r25), %r21
419	ldd		120(%r25), %r22
420	ldo		128(%r25), %r25
421	std		%r19, 96(%r26)
422	std		%r20, 104(%r26)
423
424	std		%r21, 112(%r26)
425	std		%r22, 120(%r26)
426
427	/* Note reverse branch hint for addib is taken.  */
428	addib,COND(>),n	-1, %r1, 1b
429	ldo		128(%r26), %r26
430
431#else
432
433	/*
434	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
435	 * bundles (very restricted rules for bundling).
436	 * Note that until (if) we start saving
437	 * the full 64 bit register values on interrupt, we can't
438	 * use ldd/std on a 32 bit kernel.
439	 */
440	ldw		0(%r25), %r19
441	ldi		(PAGE_SIZE / 64), %r1
442
4431:
444	ldw		4(%r25), %r20
445	ldw		8(%r25), %r21
446	ldw		12(%r25), %r22
447	stw		%r19, 0(%r26)
448	stw		%r20, 4(%r26)
449	stw		%r21, 8(%r26)
450	stw		%r22, 12(%r26)
451	ldw		16(%r25), %r19
452	ldw		20(%r25), %r20
453	ldw		24(%r25), %r21
454	ldw		28(%r25), %r22
455	stw		%r19, 16(%r26)
456	stw		%r20, 20(%r26)
457	stw		%r21, 24(%r26)
458	stw		%r22, 28(%r26)
459	ldw		32(%r25), %r19
460	ldw		36(%r25), %r20
461	ldw		40(%r25), %r21
462	ldw		44(%r25), %r22
463	stw		%r19, 32(%r26)
464	stw		%r20, 36(%r26)
465	stw		%r21, 40(%r26)
466	stw		%r22, 44(%r26)
467	ldw		48(%r25), %r19
468	ldw		52(%r25), %r20
469	ldw		56(%r25), %r21
470	ldw		60(%r25), %r22
471	stw		%r19, 48(%r26)
472	stw		%r20, 52(%r26)
473	ldo		64(%r25), %r25
474	stw		%r21, 56(%r26)
475	stw		%r22, 60(%r26)
476	ldo		64(%r26), %r26
477	addib,COND(>),n	-1, %r1, 1b
478	ldw		0(%r25), %r19
479#endif
480	bv		%r0(%r2)
481	nop
482ENDPROC_CFI(copy_page_asm)
483
484/*
485 * NOTE: Code in clear_user_page has a hard coded dependency on the
486 *       maximum alias boundary being 4 Mb. We've been assured by the
487 *       parisc chip designers that there will not ever be a parisc
488 *       chip with a larger alias boundary (Never say never :-) ).
489 *
490 *       Yah, what about the PA8800 and PA8900 processors?
491 *
492 *       Subtle: the dtlb miss handlers support the temp alias region by
493 *       "knowing" that if a dtlb miss happens within the temp alias
494 *       region it must have occurred while in clear_user_page. Since
495 *       this routine makes use of processor local translations, we
496 *       don't want to insert them into the kernel page table. Instead,
497 *       we load up some general registers (they need to be registers
498 *       which aren't shadowed) with the physical page numbers (preshifted
499 *       for tlb insertion) needed to insert the translations. When we
500 *       miss on the translation, the dtlb miss handler inserts the
501 *       translation into the tlb using these values:
502 *
503 *          %r26 physical address of "to" translation
504 *          %r23 physical address of "from" translation
505 */
506
507	/*
508	 * copy_user_page_asm() performs a page copy using mappings
509	 * equivalent to the user page mappings.  It can be used to
510	 * implement copy_user_page() but unfortunately both the `from'
511	 * and `to' pages need to be flushed through mappings equivalent
512	 * to the user mappings after the copy because the kernel accesses
513	 * the `from' page through the kmap kernel mapping and the `to'
514	 * page needs to be flushed since code can be copied.  As a
515	 * result, this implementation is less efficient than the simpler
516	 * copy using the kernel mapping.  It only needs the `from' page
517	 * to flushed via the user mapping.  The kunmap routines handle
518	 * the flushes needed for the kernel mapping.
519	 *
520	 * I'm still keeping this around because it may be possible to
521	 * use it if more information is passed into copy_user_page().
522	 * Have to do some measurements to see if it is worthwhile to
523	 * lobby for such a change.
524	 *
525	 */
526
527ENTRY_CFI(copy_user_page_asm)
528	/* Convert virtual `to' and `from' addresses to physical addresses.
529	   Move `from' physical address to non shadowed register.  */
530	ldil		L%(__PAGE_OFFSET), %r1
531	sub		%r26, %r1, %r26
532	sub		%r25, %r1, %r23
533
534	ldil		L%(TMPALIAS_MAP_START), %r28
535	dep_safe	%r24, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
536	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
537	copy		%r28, %r29
538	depi_safe	1, 31-TMPALIAS_SIZE_BITS,1, %r29	/* Form aliased virtual address 'from' */
539
540	/* Purge any old translations */
541
542#ifdef CONFIG_PA20
543	pdtlb,l		%r0(%r28)
544	pdtlb,l		%r0(%r29)
545#else
5460:	pdtlb		%r0(%r28)
5471:	pdtlb		%r0(%r29)
548	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
549	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
550#endif
551
552#ifdef CONFIG_64BIT
553	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
554	 * Unroll the loop by hand and arrange insn appropriately.
555	 * GCC probably can do this just as well.
556	 */
557
558	ldd		0(%r29), %r19
559	ldi		(PAGE_SIZE / 128), %r1
560
5611:	ldd		8(%r29), %r20
562
563	ldd		16(%r29), %r21
564	ldd		24(%r29), %r22
565	std		%r19, 0(%r28)
566	std		%r20, 8(%r28)
567
568	ldd		32(%r29), %r19
569	ldd		40(%r29), %r20
570	std		%r21, 16(%r28)
571	std		%r22, 24(%r28)
572
573	ldd		48(%r29), %r21
574	ldd		56(%r29), %r22
575	std		%r19, 32(%r28)
576	std		%r20, 40(%r28)
577
578	ldd		64(%r29), %r19
579	ldd		72(%r29), %r20
580	std		%r21, 48(%r28)
581	std		%r22, 56(%r28)
582
583	ldd		80(%r29), %r21
584	ldd		88(%r29), %r22
585	std		%r19, 64(%r28)
586	std		%r20, 72(%r28)
587
588	ldd		 96(%r29), %r19
589	ldd		104(%r29), %r20
590	std		%r21, 80(%r28)
591	std		%r22, 88(%r28)
592
593	ldd		112(%r29), %r21
594	ldd		120(%r29), %r22
595	std		%r19, 96(%r28)
596	std		%r20, 104(%r28)
597
598	ldo		128(%r29), %r29
599	std		%r21, 112(%r28)
600	std		%r22, 120(%r28)
601	ldo		128(%r28), %r28
602
603	/* conditional branches nullify on forward taken branch, and on
604	 * non-taken backward branch. Note that .+4 is a backwards branch.
605	 * The ldd should only get executed if the branch is taken.
606	 */
607	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
608	ldd		0(%r29), %r19		/* start next loads */
609
610#else
611	ldi		(PAGE_SIZE / 64), %r1
612
613	/*
614	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
615	 * bundles (very restricted rules for bundling). It probably
616	 * does OK on PCXU and better, but we could do better with
617	 * ldd/std instructions. Note that until (if) we start saving
618	 * the full 64 bit register values on interrupt, we can't
619	 * use ldd/std on a 32 bit kernel.
620	 */
621
6221:	ldw		0(%r29), %r19
623	ldw		4(%r29), %r20
624	ldw		8(%r29), %r21
625	ldw		12(%r29), %r22
626	stw		%r19, 0(%r28)
627	stw		%r20, 4(%r28)
628	stw		%r21, 8(%r28)
629	stw		%r22, 12(%r28)
630	ldw		16(%r29), %r19
631	ldw		20(%r29), %r20
632	ldw		24(%r29), %r21
633	ldw		28(%r29), %r22
634	stw		%r19, 16(%r28)
635	stw		%r20, 20(%r28)
636	stw		%r21, 24(%r28)
637	stw		%r22, 28(%r28)
638	ldw		32(%r29), %r19
639	ldw		36(%r29), %r20
640	ldw		40(%r29), %r21
641	ldw		44(%r29), %r22
642	stw		%r19, 32(%r28)
643	stw		%r20, 36(%r28)
644	stw		%r21, 40(%r28)
645	stw		%r22, 44(%r28)
646	ldw		48(%r29), %r19
647	ldw		52(%r29), %r20
648	ldw		56(%r29), %r21
649	ldw		60(%r29), %r22
650	stw		%r19, 48(%r28)
651	stw		%r20, 52(%r28)
652	stw		%r21, 56(%r28)
653	stw		%r22, 60(%r28)
654	ldo		64(%r28), %r28
655
656	addib,COND(>)		-1, %r1,1b
657	ldo		64(%r29), %r29
658#endif
659
660	bv		%r0(%r2)
661	nop
662ENDPROC_CFI(copy_user_page_asm)
663
664ENTRY_CFI(clear_user_page_asm)
665	tophys_r1	%r26
666
667	ldil		L%(TMPALIAS_MAP_START), %r28
668	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
669	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
670
671	/* Purge any old translation */
672
673#ifdef CONFIG_PA20
674	pdtlb,l		%r0(%r28)
675#else
6760:	pdtlb		%r0(%r28)
677	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
678#endif
679
680#ifdef CONFIG_64BIT
681	ldi		(PAGE_SIZE / 128), %r1
682
683	/* PREFETCH (Write) has not (yet) been proven to help here */
684	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
685
6861:	std		%r0, 0(%r28)
687	std		%r0, 8(%r28)
688	std		%r0, 16(%r28)
689	std		%r0, 24(%r28)
690	std		%r0, 32(%r28)
691	std		%r0, 40(%r28)
692	std		%r0, 48(%r28)
693	std		%r0, 56(%r28)
694	std		%r0, 64(%r28)
695	std		%r0, 72(%r28)
696	std		%r0, 80(%r28)
697	std		%r0, 88(%r28)
698	std		%r0, 96(%r28)
699	std		%r0, 104(%r28)
700	std		%r0, 112(%r28)
701	std		%r0, 120(%r28)
702	addib,COND(>)		-1, %r1, 1b
703	ldo		128(%r28), %r28
704
705#else	/* ! CONFIG_64BIT */
706	ldi		(PAGE_SIZE / 64), %r1
707
7081:	stw		%r0, 0(%r28)
709	stw		%r0, 4(%r28)
710	stw		%r0, 8(%r28)
711	stw		%r0, 12(%r28)
712	stw		%r0, 16(%r28)
713	stw		%r0, 20(%r28)
714	stw		%r0, 24(%r28)
715	stw		%r0, 28(%r28)
716	stw		%r0, 32(%r28)
717	stw		%r0, 36(%r28)
718	stw		%r0, 40(%r28)
719	stw		%r0, 44(%r28)
720	stw		%r0, 48(%r28)
721	stw		%r0, 52(%r28)
722	stw		%r0, 56(%r28)
723	stw		%r0, 60(%r28)
724	addib,COND(>)		-1, %r1, 1b
725	ldo		64(%r28), %r28
726#endif	/* CONFIG_64BIT */
727
728	bv		%r0(%r2)
729	nop
730ENDPROC_CFI(clear_user_page_asm)
731
732ENTRY_CFI(flush_dcache_page_asm)
733	ldil		L%(TMPALIAS_MAP_START), %r28
734	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
735	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
736
737	/* Purge any old translation */
738
739#ifdef CONFIG_PA20
740	pdtlb,l		%r0(%r28)
741#else
7420:	pdtlb		%r0(%r28)
743	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
744#endif
745
74688:	ldil		L%dcache_stride, %r1
747	ldw		R%dcache_stride(%r1), r31
748
749#ifdef CONFIG_64BIT
750	depdi,z		1, 63-PAGE_SHIFT,1, %r25
751#else
752	depwi,z		1, 31-PAGE_SHIFT,1, %r25
753#endif
754	add		%r28, %r25, %r25
755	sub		%r25, r31, %r25
756
7571:	fdc,m		r31(%r28)
758	fdc,m		r31(%r28)
759	fdc,m		r31(%r28)
760	fdc,m		r31(%r28)
761	fdc,m		r31(%r28)
762	fdc,m		r31(%r28)
763	fdc,m		r31(%r28)
764	fdc,m		r31(%r28)
765	fdc,m		r31(%r28)
766	fdc,m		r31(%r28)
767	fdc,m		r31(%r28)
768	fdc,m		r31(%r28)
769	fdc,m		r31(%r28)
770	fdc,m		r31(%r28)
771	fdc,m		r31(%r28)
772	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
773	fdc,m		r31(%r28)
774
77589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
776	sync
777	bv		%r0(%r2)
778	nop
779ENDPROC_CFI(flush_dcache_page_asm)
780
781ENTRY_CFI(purge_dcache_page_asm)
782	ldil		L%(TMPALIAS_MAP_START), %r28
783	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
784	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
785
786	/* Purge any old translation */
787
788#ifdef CONFIG_PA20
789	pdtlb,l		%r0(%r28)
790#else
7910:	pdtlb		%r0(%r28)
792	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
793#endif
794
79588:	ldil		L%dcache_stride, %r1
796	ldw		R%dcache_stride(%r1), r31
797
798#ifdef CONFIG_64BIT
799	depdi,z		1, 63-PAGE_SHIFT,1, %r25
800#else
801	depwi,z		1, 31-PAGE_SHIFT,1, %r25
802#endif
803	add		%r28, %r25, %r25
804	sub		%r25, r31, %r25
805
8061:      pdc,m		r31(%r28)
807	pdc,m		r31(%r28)
808	pdc,m		r31(%r28)
809	pdc,m		r31(%r28)
810	pdc,m		r31(%r28)
811	pdc,m		r31(%r28)
812	pdc,m		r31(%r28)
813	pdc,m		r31(%r28)
814	pdc,m		r31(%r28)
815	pdc,m		r31(%r28)
816	pdc,m		r31(%r28)
817	pdc,m		r31(%r28)
818	pdc,m		r31(%r28)
819	pdc,m		r31(%r28)
820	pdc,m		r31(%r28)
821	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
822	pdc,m		r31(%r28)
823
82489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
825	sync
826	bv		%r0(%r2)
827	nop
828ENDPROC_CFI(purge_dcache_page_asm)
829
830ENTRY_CFI(flush_icache_page_asm)
831	ldil		L%(TMPALIAS_MAP_START), %r28
832	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
833	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
834
835	/* Purge any old translation.  Note that the FIC instruction
836	 * may use either the instruction or data TLB.  Given that we
837	 * have a flat address space, it's not clear which TLB will be
838	 * used.  So, we purge both entries.  */
839
840#ifdef CONFIG_PA20
841	pdtlb,l		%r0(%r28)
8421:	pitlb,l         %r0(%sr4,%r28)
843	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
844#else
8450:	pdtlb		%r0(%r28)
8461:	pitlb           %r0(%sr4,%r28)
847	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
848	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
849	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
850#endif
851
85288:	ldil		L%icache_stride, %r1
853	ldw		R%icache_stride(%r1), %r31
854
855#ifdef CONFIG_64BIT
856	depdi,z		1, 63-PAGE_SHIFT,1, %r25
857#else
858	depwi,z		1, 31-PAGE_SHIFT,1, %r25
859#endif
860	add		%r28, %r25, %r25
861	sub		%r25, %r31, %r25
862
863	/* fic only has the type 26 form on PA1.1, requiring an
864	 * explicit space specification, so use %sr4 */
8651:      fic,m		%r31(%sr4,%r28)
866	fic,m		%r31(%sr4,%r28)
867	fic,m		%r31(%sr4,%r28)
868	fic,m		%r31(%sr4,%r28)
869	fic,m		%r31(%sr4,%r28)
870	fic,m		%r31(%sr4,%r28)
871	fic,m		%r31(%sr4,%r28)
872	fic,m		%r31(%sr4,%r28)
873	fic,m		%r31(%sr4,%r28)
874	fic,m		%r31(%sr4,%r28)
875	fic,m		%r31(%sr4,%r28)
876	fic,m		%r31(%sr4,%r28)
877	fic,m		%r31(%sr4,%r28)
878	fic,m		%r31(%sr4,%r28)
879	fic,m		%r31(%sr4,%r28)
880	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
881	fic,m		%r31(%sr4,%r28)
882
88389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
884	sync
885	bv		%r0(%r2)
886	nop
887ENDPROC_CFI(flush_icache_page_asm)
888
889ENTRY_CFI(flush_kernel_dcache_page_asm)
89088:	ldil		L%dcache_stride, %r1
891	ldw		R%dcache_stride(%r1), %r23
892
893#ifdef CONFIG_64BIT
894	depdi,z		1, 63-PAGE_SHIFT,1, %r25
895#else
896	depwi,z		1, 31-PAGE_SHIFT,1, %r25
897#endif
898	add		%r26, %r25, %r25
899	sub		%r25, %r23, %r25
900
9011:      fdc,m		%r23(%r26)
902	fdc,m		%r23(%r26)
903	fdc,m		%r23(%r26)
904	fdc,m		%r23(%r26)
905	fdc,m		%r23(%r26)
906	fdc,m		%r23(%r26)
907	fdc,m		%r23(%r26)
908	fdc,m		%r23(%r26)
909	fdc,m		%r23(%r26)
910	fdc,m		%r23(%r26)
911	fdc,m		%r23(%r26)
912	fdc,m		%r23(%r26)
913	fdc,m		%r23(%r26)
914	fdc,m		%r23(%r26)
915	fdc,m		%r23(%r26)
916	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
917	fdc,m		%r23(%r26)
918
91989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
920	sync
921	bv		%r0(%r2)
922	nop
923ENDPROC_CFI(flush_kernel_dcache_page_asm)
924
925ENTRY_CFI(purge_kernel_dcache_page_asm)
92688:	ldil		L%dcache_stride, %r1
927	ldw		R%dcache_stride(%r1), %r23
928
929#ifdef CONFIG_64BIT
930	depdi,z		1, 63-PAGE_SHIFT,1, %r25
931#else
932	depwi,z		1, 31-PAGE_SHIFT,1, %r25
933#endif
934	add		%r26, %r25, %r25
935	sub		%r25, %r23, %r25
936
9371:      pdc,m		%r23(%r26)
938	pdc,m		%r23(%r26)
939	pdc,m		%r23(%r26)
940	pdc,m		%r23(%r26)
941	pdc,m		%r23(%r26)
942	pdc,m		%r23(%r26)
943	pdc,m		%r23(%r26)
944	pdc,m		%r23(%r26)
945	pdc,m		%r23(%r26)
946	pdc,m		%r23(%r26)
947	pdc,m		%r23(%r26)
948	pdc,m		%r23(%r26)
949	pdc,m		%r23(%r26)
950	pdc,m		%r23(%r26)
951	pdc,m		%r23(%r26)
952	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
953	pdc,m		%r23(%r26)
954
95589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
956	sync
957	bv		%r0(%r2)
958	nop
959ENDPROC_CFI(purge_kernel_dcache_page_asm)
960
961ENTRY_CFI(flush_user_dcache_range_asm)
96288:	ldil		L%dcache_stride, %r1
963	ldw		R%dcache_stride(%r1), %r23
964	ldo		-1(%r23), %r21
965	ANDCM		%r26, %r21, %r26
966
967#ifdef CONFIG_64BIT
968	depd,z		%r23, 59, 60, %r21
969#else
970	depw,z		%r23, 27, 28, %r21
971#endif
972	add		%r26, %r21, %r22
973	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
9741:	add		%r22, %r21, %r22
975	fdc,m		%r23(%sr3, %r26)
976	fdc,m		%r23(%sr3, %r26)
977	fdc,m		%r23(%sr3, %r26)
978	fdc,m		%r23(%sr3, %r26)
979	fdc,m		%r23(%sr3, %r26)
980	fdc,m		%r23(%sr3, %r26)
981	fdc,m		%r23(%sr3, %r26)
982	fdc,m		%r23(%sr3, %r26)
983	fdc,m		%r23(%sr3, %r26)
984	fdc,m		%r23(%sr3, %r26)
985	fdc,m		%r23(%sr3, %r26)
986	fdc,m		%r23(%sr3, %r26)
987	fdc,m		%r23(%sr3, %r26)
988	fdc,m		%r23(%sr3, %r26)
989	fdc,m		%r23(%sr3, %r26)
990	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
991	fdc,m		%r23(%sr3, %r26)
992
9932:	cmpb,COND(>>),n	%r25, %r26, 2b
994	fdc,m		%r23(%sr3, %r26)
995
99689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
997	sync
998	bv		%r0(%r2)
999	nop
1000ENDPROC_CFI(flush_user_dcache_range_asm)
1001
1002ENTRY_CFI(flush_kernel_dcache_range_asm)
100388:	ldil		L%dcache_stride, %r1
1004	ldw		R%dcache_stride(%r1), %r23
1005	ldo		-1(%r23), %r21
1006	ANDCM		%r26, %r21, %r26
1007
1008#ifdef CONFIG_64BIT
1009	depd,z		%r23, 59, 60, %r21
1010#else
1011	depw,z		%r23, 27, 28, %r21
1012#endif
1013	add		%r26, %r21, %r22
1014	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10151:	add		%r22, %r21, %r22
1016	fdc,m		%r23(%r26)
1017	fdc,m		%r23(%r26)
1018	fdc,m		%r23(%r26)
1019	fdc,m		%r23(%r26)
1020	fdc,m		%r23(%r26)
1021	fdc,m		%r23(%r26)
1022	fdc,m		%r23(%r26)
1023	fdc,m		%r23(%r26)
1024	fdc,m		%r23(%r26)
1025	fdc,m		%r23(%r26)
1026	fdc,m		%r23(%r26)
1027	fdc,m		%r23(%r26)
1028	fdc,m		%r23(%r26)
1029	fdc,m		%r23(%r26)
1030	fdc,m		%r23(%r26)
1031	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1032	fdc,m		%r23(%r26)
1033
10342:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1035	fdc,m		%r23(%r26)
1036
1037	sync
103889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1039	bv		%r0(%r2)
1040	nop
1041ENDPROC_CFI(flush_kernel_dcache_range_asm)
1042
1043ENTRY_CFI(purge_kernel_dcache_range_asm)
104488:	ldil		L%dcache_stride, %r1
1045	ldw		R%dcache_stride(%r1), %r23
1046	ldo		-1(%r23), %r21
1047	ANDCM		%r26, %r21, %r26
1048
1049#ifdef CONFIG_64BIT
1050	depd,z		%r23, 59, 60, %r21
1051#else
1052	depw,z		%r23, 27, 28, %r21
1053#endif
1054	add		%r26, %r21, %r22
1055	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10561:	add		%r22, %r21, %r22
1057	pdc,m		%r23(%r26)
1058	pdc,m		%r23(%r26)
1059	pdc,m		%r23(%r26)
1060	pdc,m		%r23(%r26)
1061	pdc,m		%r23(%r26)
1062	pdc,m		%r23(%r26)
1063	pdc,m		%r23(%r26)
1064	pdc,m		%r23(%r26)
1065	pdc,m		%r23(%r26)
1066	pdc,m		%r23(%r26)
1067	pdc,m		%r23(%r26)
1068	pdc,m		%r23(%r26)
1069	pdc,m		%r23(%r26)
1070	pdc,m		%r23(%r26)
1071	pdc,m		%r23(%r26)
1072	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1073	pdc,m		%r23(%r26)
1074
10752:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1076	pdc,m		%r23(%r26)
1077
1078	sync
107989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1080	bv		%r0(%r2)
1081	nop
1082ENDPROC_CFI(purge_kernel_dcache_range_asm)
1083
1084ENTRY_CFI(flush_user_icache_range_asm)
108588:	ldil		L%icache_stride, %r1
1086	ldw		R%icache_stride(%r1), %r23
1087	ldo		-1(%r23), %r21
1088	ANDCM		%r26, %r21, %r26
1089
1090#ifdef CONFIG_64BIT
1091	depd,z		%r23, 59, 60, %r21
1092#else
1093	depw,z		%r23, 27, 28, %r21
1094#endif
1095	add		%r26, %r21, %r22
1096	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10971:	add		%r22, %r21, %r22
1098	fic,m		%r23(%sr3, %r26)
1099	fic,m		%r23(%sr3, %r26)
1100	fic,m		%r23(%sr3, %r26)
1101	fic,m		%r23(%sr3, %r26)
1102	fic,m		%r23(%sr3, %r26)
1103	fic,m		%r23(%sr3, %r26)
1104	fic,m		%r23(%sr3, %r26)
1105	fic,m		%r23(%sr3, %r26)
1106	fic,m		%r23(%sr3, %r26)
1107	fic,m		%r23(%sr3, %r26)
1108	fic,m		%r23(%sr3, %r26)
1109	fic,m		%r23(%sr3, %r26)
1110	fic,m		%r23(%sr3, %r26)
1111	fic,m		%r23(%sr3, %r26)
1112	fic,m		%r23(%sr3, %r26)
1113	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1114	fic,m		%r23(%sr3, %r26)
1115
11162:	cmpb,COND(>>),n	%r25, %r26, 2b
1117	fic,m		%r23(%sr3, %r26)
1118
111989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1120	sync
1121	bv		%r0(%r2)
1122	nop
1123ENDPROC_CFI(flush_user_icache_range_asm)
1124
1125ENTRY_CFI(flush_kernel_icache_page)
112688:	ldil		L%icache_stride, %r1
1127	ldw		R%icache_stride(%r1), %r23
1128
1129#ifdef CONFIG_64BIT
1130	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1131#else
1132	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1133#endif
1134	add		%r26, %r25, %r25
1135	sub		%r25, %r23, %r25
1136
1137
11381:      fic,m		%r23(%sr4, %r26)
1139	fic,m		%r23(%sr4, %r26)
1140	fic,m		%r23(%sr4, %r26)
1141	fic,m		%r23(%sr4, %r26)
1142	fic,m		%r23(%sr4, %r26)
1143	fic,m		%r23(%sr4, %r26)
1144	fic,m		%r23(%sr4, %r26)
1145	fic,m		%r23(%sr4, %r26)
1146	fic,m		%r23(%sr4, %r26)
1147	fic,m		%r23(%sr4, %r26)
1148	fic,m		%r23(%sr4, %r26)
1149	fic,m		%r23(%sr4, %r26)
1150	fic,m		%r23(%sr4, %r26)
1151	fic,m		%r23(%sr4, %r26)
1152	fic,m		%r23(%sr4, %r26)
1153	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1154	fic,m		%r23(%sr4, %r26)
1155
115689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1157	sync
1158	bv		%r0(%r2)
1159	nop
1160ENDPROC_CFI(flush_kernel_icache_page)
1161
1162ENTRY_CFI(flush_kernel_icache_range_asm)
116388:	ldil		L%icache_stride, %r1
1164	ldw		R%icache_stride(%r1), %r23
1165	ldo		-1(%r23), %r21
1166	ANDCM		%r26, %r21, %r26
1167
1168#ifdef CONFIG_64BIT
1169	depd,z		%r23, 59, 60, %r21
1170#else
1171	depw,z		%r23, 27, 28, %r21
1172#endif
1173	add		%r26, %r21, %r22
1174	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11751:	add		%r22, %r21, %r22
1176	fic,m		%r23(%sr4, %r26)
1177	fic,m		%r23(%sr4, %r26)
1178	fic,m		%r23(%sr4, %r26)
1179	fic,m		%r23(%sr4, %r26)
1180	fic,m		%r23(%sr4, %r26)
1181	fic,m		%r23(%sr4, %r26)
1182	fic,m		%r23(%sr4, %r26)
1183	fic,m		%r23(%sr4, %r26)
1184	fic,m		%r23(%sr4, %r26)
1185	fic,m		%r23(%sr4, %r26)
1186	fic,m		%r23(%sr4, %r26)
1187	fic,m		%r23(%sr4, %r26)
1188	fic,m		%r23(%sr4, %r26)
1189	fic,m		%r23(%sr4, %r26)
1190	fic,m		%r23(%sr4, %r26)
1191	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1192	fic,m		%r23(%sr4, %r26)
1193
11942:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1195	fic,m		%r23(%sr4, %r26)
1196
119789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1198	sync
1199	bv		%r0(%r2)
1200	nop
1201ENDPROC_CFI(flush_kernel_icache_range_asm)
1202
1203	.text
1204
1205	/* align should cover use of rfi in disable_sr_hashing_asm and
1206	 * srdis_done.
1207	 */
1208	.align	256
1209ENTRY_CFI(disable_sr_hashing_asm)
1210	/*
1211	 * Switch to real mode
1212	 */
1213	/* pcxt_ssm_bug */
1214	rsm		PSW_SM_I, %r0
1215	load32		PA(1f), %r1
1216	nop
1217	nop
1218	nop
1219	nop
1220	nop
1221
1222	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1223	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1224	mtctl		%r0, %cr17		/* Clear IIASQ head */
1225	mtctl		%r1, %cr18		/* IIAOQ head */
1226	ldo		4(%r1), %r1
1227	mtctl		%r1, %cr18		/* IIAOQ tail */
1228	load32		REAL_MODE_PSW, %r1
1229	mtctl		%r1, %ipsw
1230	rfi
1231	nop
1232
12331:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1234	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1235	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1236	b,n		srdis_done
1237
1238srdis_pcxs:
1239
1240	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1241
1242	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1243	.word		0x141c1a00		/* must issue twice */
1244	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1245	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1246	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1247	.word		0x141c1600		/* must issue twice */
1248	b,n		srdis_done
1249
1250srdis_pcxl:
1251
1252	/* Disable Space Register Hashing for PCXL */
1253
1254	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1255	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1256	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1257	b,n		srdis_done
1258
1259srdis_pa20:
1260
1261	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1262
1263	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1264	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1265	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1266
1267
1268srdis_done:
1269	/* Switch back to virtual mode */
1270	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1271	load32 	   	2f, %r1
1272	nop
1273	nop
1274	nop
1275	nop
1276	nop
1277
1278	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1279	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1280	mtctl		%r0, %cr17		/* Clear IIASQ head */
1281	mtctl		%r1, %cr18		/* IIAOQ head */
1282	ldo		4(%r1), %r1
1283	mtctl		%r1, %cr18		/* IIAOQ tail */
1284	load32		KERNEL_PSW, %r1
1285	mtctl		%r1, %ipsw
1286	rfi
1287	nop
1288
12892:      bv		%r0(%r2)
1290	nop
1291ENDPROC_CFI(disable_sr_hashing_asm)
1292
1293	.end
1294