1/*
2 *  PowerPC version
3 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
5 *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
6 *  Adapted for Power Macintosh by Paul Mackerras.
7 *  Low-level exception handlers and MMU support
8 *  rewritten by Paul Mackerras.
9 *    Copyright (C) 1996 Paul Mackerras.
10 *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
11 *
12 *  This file contains the system call entry code, context switch
13 *  code, and exception/interrupt return code for PowerPC.
14 *
15 *  This program is free software; you can redistribute it and/or
16 *  modify it under the terms of the GNU General Public License
17 *  as published by the Free Software Foundation; either version
18 *  2 of the License, or (at your option) any later version.
19 */
20
21#include <linux/errno.h>
22#include <asm/unistd.h>
23#include <asm/processor.h>
24#include <asm/page.h>
25#include <asm/mmu.h>
26#include <asm/thread_info.h>
27#include <asm/ppc_asm.h>
28#include <asm/asm-offsets.h>
29#include <asm/cputable.h>
30#include <asm/firmware.h>
31#include <asm/bug.h>
32#include <asm/ptrace.h>
33#include <asm/irqflags.h>
34#include <asm/ftrace.h>
35#include <asm/hw_irq.h>
36
37/*
38 * System calls.
39 */
40	.section	".toc","aw"
41.SYS_CALL_TABLE:
42	.tc .sys_call_table[TC],.sys_call_table
43
44/* This value is used to mark exception frames on the stack. */
45exception_marker:
46	.tc	ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
47
48	.section	".text"
49	.align 7
50
51#undef SHOW_SYSCALLS
52
53	.globl system_call_common
54system_call_common:
55	andi.	r10,r12,MSR_PR
56	mr	r10,r1
57	addi	r1,r1,-INT_FRAME_SIZE
58	beq-	1f
59	ld	r1,PACAKSAVE(r13)
601:	std	r10,0(r1)
61	std	r11,_NIP(r1)
62	std	r12,_MSR(r1)
63	std	r0,GPR0(r1)
64	std	r10,GPR1(r1)
65	ACCOUNT_CPU_USER_ENTRY(r10, r11)
66	/*
67	 * This "crclr so" clears CR0.SO, which is the error indication on
68	 * return from this system call.  There must be no cmp instruction
69	 * between it and the "mfcr r9" below, otherwise if XER.SO is set,
70	 * CR0.SO will get set, causing all system calls to appear to fail.
71	 */
72	crclr	so
73	std	r2,GPR2(r1)
74	std	r3,GPR3(r1)
75	std	r4,GPR4(r1)
76	std	r5,GPR5(r1)
77	std	r6,GPR6(r1)
78	std	r7,GPR7(r1)
79	std	r8,GPR8(r1)
80	li	r11,0
81	std	r11,GPR9(r1)
82	std	r11,GPR10(r1)
83	std	r11,GPR11(r1)
84	std	r11,GPR12(r1)
85	std	r9,GPR13(r1)
86	mfcr	r9
87	mflr	r10
88	li	r11,0xc01
89	std	r9,_CCR(r1)
90	std	r10,_LINK(r1)
91	std	r11,_TRAP(r1)
92	mfxer	r9
93	mfctr	r10
94	std	r9,_XER(r1)
95	std	r10,_CTR(r1)
96	std	r3,ORIG_GPR3(r1)
97	ld	r2,PACATOC(r13)
98	addi	r9,r1,STACK_FRAME_OVERHEAD
99	ld	r11,exception_marker@toc(r2)
100	std	r11,-16(r9)		/* "regshere" marker */
101#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
102BEGIN_FW_FTR_SECTION
103	beq	33f
104	/* if from user, see if there are any DTL entries to process */
105	ld	r10,PACALPPACAPTR(r13)	/* get ptr to VPA */
106	ld	r11,PACA_DTL_RIDX(r13)	/* get log read index */
107	ld	r10,LPPACA_DTLIDX(r10)	/* get log write index */
108	cmpd	cr1,r11,r10
109	beq+	cr1,33f
110	bl	.accumulate_stolen_time
111	REST_GPR(0,r1)
112	REST_4GPRS(3,r1)
113	REST_2GPRS(7,r1)
114	addi	r9,r1,STACK_FRAME_OVERHEAD
11533:
116END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
117#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
118
119	/*
120	 * A syscall should always be called with interrupts enabled
121	 * so we just unconditionally hard-enable here. When some kind
122	 * of irq tracing is used, we additionally check that condition
123	 * is correct
124	 */
125#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
126	lbz	r10,PACASOFTIRQEN(r13)
127	xori	r10,r10,1
1281:	tdnei	r10,0
129	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
130#endif
131
132#ifdef CONFIG_PPC_BOOK3E
133	wrteei	1
134#else
135	ld	r11,PACAKMSR(r13)
136	ori	r11,r11,MSR_EE
137	mtmsrd	r11,1
138#endif /* CONFIG_PPC_BOOK3E */
139
140	/* We do need to set SOFTE in the stack frame or the return
141	 * from interrupt will be painful
142	 */
143	li	r10,1
144	std	r10,SOFTE(r1)
145
146#ifdef SHOW_SYSCALLS
147	bl	.do_show_syscall
148	REST_GPR(0,r1)
149	REST_4GPRS(3,r1)
150	REST_2GPRS(7,r1)
151	addi	r9,r1,STACK_FRAME_OVERHEAD
152#endif
153	clrrdi	r11,r1,THREAD_SHIFT
154	ld	r10,TI_FLAGS(r11)
155	andi.	r11,r10,_TIF_SYSCALL_T_OR_A
156	bne-	syscall_dotrace
157syscall_dotrace_cont:
158	cmpldi	0,r0,NR_syscalls
159	bge-	syscall_enosys
160
161system_call:			/* label this so stack traces look sane */
162/*
163 * Need to vector to 32 Bit or default sys_call_table here,
164 * based on caller's run-mode / personality.
165 */
166	ld	r11,.SYS_CALL_TABLE@toc(2)
167	andi.	r10,r10,_TIF_32BIT
168	beq	15f
169	addi	r11,r11,8	/* use 32-bit syscall entries */
170	clrldi	r3,r3,32
171	clrldi	r4,r4,32
172	clrldi	r5,r5,32
173	clrldi	r6,r6,32
174	clrldi	r7,r7,32
175	clrldi	r8,r8,32
17615:
177	slwi	r0,r0,4
178	ldx	r10,r11,r0	/* Fetch system call handler [ptr] */
179	mtctr   r10
180	bctrl			/* Call handler */
181
182syscall_exit:
183	std	r3,RESULT(r1)
184#ifdef SHOW_SYSCALLS
185	bl	.do_show_syscall_exit
186	ld	r3,RESULT(r1)
187#endif
188	clrrdi	r12,r1,THREAD_SHIFT
189
190	ld	r8,_MSR(r1)
191#ifdef CONFIG_PPC_BOOK3S
192	/* No MSR:RI on BookE */
193	andi.	r10,r8,MSR_RI
194	beq-	unrecov_restore
195#endif
196	/*
197	 * Disable interrupts so current_thread_info()->flags can't change,
198	 * and so that we don't get interrupted after loading SRR0/1.
199	 */
200#ifdef CONFIG_PPC_BOOK3E
201	wrteei	0
202#else
203	ld	r10,PACAKMSR(r13)
204	mtmsrd	r10,1
205#endif /* CONFIG_PPC_BOOK3E */
206
207	ld	r9,TI_FLAGS(r12)
208	li	r11,-_LAST_ERRNO
209	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
210	bne-	syscall_exit_work
211	cmpld	r3,r11
212	ld	r5,_CCR(r1)
213	bge-	syscall_error
214syscall_error_cont:
215	ld	r7,_NIP(r1)
216BEGIN_FTR_SECTION
217	stdcx.	r0,0,r1			/* to clear the reservation */
218END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
219	andi.	r6,r8,MSR_PR
220	ld	r4,_LINK(r1)
221	/*
222	 * Clear RI before restoring r13.  If we are returning to
223	 * userspace and we take an exception after restoring r13,
224	 * we end up corrupting the userspace r13 value.
225	 */
226#ifdef CONFIG_PPC_BOOK3S
227	/* No MSR:RI on BookE */
228	li	r12,MSR_RI
229	andc	r11,r10,r12
230	mtmsrd	r11,1			/* clear MSR.RI */
231#endif /* CONFIG_PPC_BOOK3S */
232
233	beq-	1f
234	ACCOUNT_CPU_USER_EXIT(r11, r12)
235	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
2361:	ld	r2,GPR2(r1)
237	ld	r1,GPR1(r1)
238	mtlr	r4
239	mtcr	r5
240	mtspr	SPRN_SRR0,r7
241	mtspr	SPRN_SRR1,r8
242	RFI
243	b	.	/* prevent speculative execution */
244
245syscall_error:
246	oris	r5,r5,0x1000	/* Set SO bit in CR */
247	neg	r3,r3
248	std	r5,_CCR(r1)
249	b	syscall_error_cont
250
251/* Traced system call support */
252syscall_dotrace:
253	bl	.save_nvgprs
254	addi	r3,r1,STACK_FRAME_OVERHEAD
255	bl	.do_syscall_trace_enter
256	/*
257	 * Restore argument registers possibly just changed.
258	 * We use the return value of do_syscall_trace_enter
259	 * for the call number to look up in the table (r0).
260	 */
261	mr	r0,r3
262	ld	r3,GPR3(r1)
263	ld	r4,GPR4(r1)
264	ld	r5,GPR5(r1)
265	ld	r6,GPR6(r1)
266	ld	r7,GPR7(r1)
267	ld	r8,GPR8(r1)
268	addi	r9,r1,STACK_FRAME_OVERHEAD
269	clrrdi	r10,r1,THREAD_SHIFT
270	ld	r10,TI_FLAGS(r10)
271	b	syscall_dotrace_cont
272
273syscall_enosys:
274	li	r3,-ENOSYS
275	b	syscall_exit
276
277syscall_exit_work:
278	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
279	 If TIF_NOERROR is set, just save r3 as it is. */
280
281	andi.	r0,r9,_TIF_RESTOREALL
282	beq+	0f
283	REST_NVGPRS(r1)
284	b	2f
2850:	cmpld	r3,r11		/* r10 is -LAST_ERRNO */
286	blt+	1f
287	andi.	r0,r9,_TIF_NOERROR
288	bne-	1f
289	ld	r5,_CCR(r1)
290	neg	r3,r3
291	oris	r5,r5,0x1000	/* Set SO bit in CR */
292	std	r5,_CCR(r1)
2931:	std	r3,GPR3(r1)
2942:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
295	beq	4f
296
297	/* Clear per-syscall TIF flags if any are set.  */
298
299	li	r11,_TIF_PERSYSCALL_MASK
300	addi	r12,r12,TI_FLAGS
3013:	ldarx	r10,0,r12
302	andc	r10,r10,r11
303	stdcx.	r10,0,r12
304	bne-	3b
305	subi	r12,r12,TI_FLAGS
306
3074:	/* Anything else left to do? */
308	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
309	beq	.ret_from_except_lite
310
311	/* Re-enable interrupts */
312#ifdef CONFIG_PPC_BOOK3E
313	wrteei	1
314#else
315	ld	r10,PACAKMSR(r13)
316	ori	r10,r10,MSR_EE
317	mtmsrd	r10,1
318#endif /* CONFIG_PPC_BOOK3E */
319
320	bl	.save_nvgprs
321	addi	r3,r1,STACK_FRAME_OVERHEAD
322	bl	.do_syscall_trace_leave
323	b	.ret_from_except
324
325/* Save non-volatile GPRs, if not already saved. */
326_GLOBAL(save_nvgprs)
327	ld	r11,_TRAP(r1)
328	andi.	r0,r11,1
329	beqlr-
330	SAVE_NVGPRS(r1)
331	clrrdi	r0,r11,1
332	std	r0,_TRAP(r1)
333	blr
334
335
336/*
337 * The sigsuspend and rt_sigsuspend system calls can call do_signal
338 * and thus put the process into the stopped state where we might
339 * want to examine its user state with ptrace.  Therefore we need
340 * to save all the nonvolatile registers (r14 - r31) before calling
341 * the C code.  Similarly, fork, vfork and clone need the full
342 * register state on the stack so that it can be copied to the child.
343 */
344
345_GLOBAL(ppc_fork)
346	bl	.save_nvgprs
347	bl	.sys_fork
348	b	syscall_exit
349
350_GLOBAL(ppc_vfork)
351	bl	.save_nvgprs
352	bl	.sys_vfork
353	b	syscall_exit
354
355_GLOBAL(ppc_clone)
356	bl	.save_nvgprs
357	bl	.sys_clone
358	b	syscall_exit
359
360_GLOBAL(ppc32_swapcontext)
361	bl	.save_nvgprs
362	bl	.compat_sys_swapcontext
363	b	syscall_exit
364
365_GLOBAL(ppc64_swapcontext)
366	bl	.save_nvgprs
367	bl	.sys_swapcontext
368	b	syscall_exit
369
370_GLOBAL(ret_from_fork)
371	bl	.schedule_tail
372	REST_NVGPRS(r1)
373	li	r3,0
374	b	syscall_exit
375
376	.section	".toc","aw"
377DSCR_DEFAULT:
378	.tc dscr_default[TC],dscr_default
379
380	.section	".text"
381
382/*
383 * This routine switches between two different tasks.  The process
384 * state of one is saved on its kernel stack.  Then the state
385 * of the other is restored from its kernel stack.  The memory
386 * management hardware is updated to the second process's state.
387 * Finally, we can return to the second process, via ret_from_except.
388 * On entry, r3 points to the THREAD for the current task, r4
389 * points to the THREAD for the new task.
390 *
391 * Note: there are two ways to get to the "going out" portion
392 * of this code; either by coming in via the entry (_switch)
393 * or via "fork" which must set up an environment equivalent
394 * to the "_switch" path.  If you change this you'll have to change
395 * the fork code also.
396 *
397 * The code which creates the new task context is in 'copy_thread'
398 * in arch/powerpc/kernel/process.c
399 */
400	.align	7
401_GLOBAL(_switch)
402	mflr	r0
403	std	r0,16(r1)
404	stdu	r1,-SWITCH_FRAME_SIZE(r1)
405	/* r3-r13 are caller saved -- Cort */
406	SAVE_8GPRS(14, r1)
407	SAVE_10GPRS(22, r1)
408	mflr	r20		/* Return to switch caller */
409	mfmsr	r22
410	li	r0, MSR_FP
411#ifdef CONFIG_VSX
412BEGIN_FTR_SECTION
413	oris	r0,r0,MSR_VSX@h	/* Disable VSX */
414END_FTR_SECTION_IFSET(CPU_FTR_VSX)
415#endif /* CONFIG_VSX */
416#ifdef CONFIG_ALTIVEC
417BEGIN_FTR_SECTION
418	oris	r0,r0,MSR_VEC@h	/* Disable altivec */
419	mfspr	r24,SPRN_VRSAVE	/* save vrsave register value */
420	std	r24,THREAD_VRSAVE(r3)
421END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
422#endif /* CONFIG_ALTIVEC */
423#ifdef CONFIG_PPC64
424BEGIN_FTR_SECTION
425	mfspr	r25,SPRN_DSCR
426	std	r25,THREAD_DSCR(r3)
427END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
428#endif
429	and.	r0,r0,r22
430	beq+	1f
431	andc	r22,r22,r0
432	MTMSRD(r22)
433	isync
4341:	std	r20,_NIP(r1)
435	mfcr	r23
436	std	r23,_CCR(r1)
437	std	r1,KSP(r3)	/* Set old stack pointer */
438
439#ifdef CONFIG_SMP
440	/* We need a sync somewhere here to make sure that if the
441	 * previous task gets rescheduled on another CPU, it sees all
442	 * stores it has performed on this one.
443	 */
444	sync
445#endif /* CONFIG_SMP */
446
447	/*
448	 * If we optimise away the clear of the reservation in system
449	 * calls because we know the CPU tracks the address of the
450	 * reservation, then we need to clear it here to cover the
451	 * case that the kernel context switch path has no larx
452	 * instructions.
453	 */
454BEGIN_FTR_SECTION
455	ldarx	r6,0,r1
456END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
457
458	addi	r6,r4,-THREAD	/* Convert THREAD to 'current' */
459	std	r6,PACACURRENT(r13)	/* Set new 'current' */
460
461	ld	r8,KSP(r4)	/* new stack pointer */
462#ifdef CONFIG_PPC_BOOK3S
463BEGIN_FTR_SECTION
464  BEGIN_FTR_SECTION_NESTED(95)
465	clrrdi	r6,r8,28	/* get its ESID */
466	clrrdi	r9,r1,28	/* get current sp ESID */
467  FTR_SECTION_ELSE_NESTED(95)
468	clrrdi	r6,r8,40	/* get its 1T ESID */
469	clrrdi	r9,r1,40	/* get current sp 1T ESID */
470  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
471FTR_SECTION_ELSE
472	b	2f
473ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
474	clrldi.	r0,r6,2		/* is new ESID c00000000? */
475	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
476	cror	eq,4*cr1+eq,eq
477	beq	2f		/* if yes, don't slbie it */
478
479	/* Bolt in the new stack SLB entry */
480	ld	r7,KSP_VSID(r4)	/* Get new stack's VSID */
481	oris	r0,r6,(SLB_ESID_V)@h
482	ori	r0,r0,(SLB_NUM_BOLTED-1)@l
483BEGIN_FTR_SECTION
484	li	r9,MMU_SEGSIZE_1T	/* insert B field */
485	oris	r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
486	rldimi	r7,r9,SLB_VSID_SSIZE_SHIFT,0
487END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
488
489	/* Update the last bolted SLB.  No write barriers are needed
490	 * here, provided we only update the current CPU's SLB shadow
491	 * buffer.
492	 */
493	ld	r9,PACA_SLBSHADOWPTR(r13)
494	li	r12,0
495	std	r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
496	std	r7,SLBSHADOW_STACKVSID(r9)  /* Save VSID */
497	std	r0,SLBSHADOW_STACKESID(r9)  /* Save ESID */
498
499	/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
500	 * we have 1TB segments, the only CPUs known to have the errata
501	 * only support less than 1TB of system memory and we'll never
502	 * actually hit this code path.
503	 */
504
505	slbie	r6
506	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
507	slbmte	r7,r0
508	isync
5092:
510#endif /* !CONFIG_PPC_BOOK3S */
511
512	clrrdi	r7,r8,THREAD_SHIFT	/* base of new stack */
513	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
514	   because we don't need to leave the 288-byte ABI gap at the
515	   top of the kernel stack. */
516	addi	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
517
518	mr	r1,r8		/* start using new stack pointer */
519	std	r7,PACAKSAVE(r13)
520
521#ifdef CONFIG_ALTIVEC
522BEGIN_FTR_SECTION
523	ld	r0,THREAD_VRSAVE(r4)
524	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
525END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
526#endif /* CONFIG_ALTIVEC */
527#ifdef CONFIG_PPC64
528BEGIN_FTR_SECTION
529	lwz	r6,THREAD_DSCR_INHERIT(r4)
530	ld	r7,DSCR_DEFAULT@toc(2)
531	ld	r0,THREAD_DSCR(r4)
532	cmpwi	r6,0
533	bne	1f
534	ld	r0,0(r7)
5351:	cmpd	r0,r25
536	beq	2f
537	mtspr	SPRN_DSCR,r0
5382:
539END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
540#endif
541
542	ld	r6,_CCR(r1)
543	mtcrf	0xFF,r6
544
545	/* r3-r13 are destroyed -- Cort */
546	REST_8GPRS(14, r1)
547	REST_10GPRS(22, r1)
548
549	/* convert old thread to its task_struct for return value */
550	addi	r3,r3,-THREAD
551	ld	r7,_NIP(r1)	/* Return to _switch caller in new task */
552	mtlr	r7
553	addi	r1,r1,SWITCH_FRAME_SIZE
554	blr
555
556	.align	7
557_GLOBAL(ret_from_except)
558	ld	r11,_TRAP(r1)
559	andi.	r0,r11,1
560	bne	.ret_from_except_lite
561	REST_NVGPRS(r1)
562
563_GLOBAL(ret_from_except_lite)
564	/*
565	 * Disable interrupts so that current_thread_info()->flags
566	 * can't change between when we test it and when we return
567	 * from the interrupt.
568	 */
569#ifdef CONFIG_PPC_BOOK3E
570	wrteei	0
571#else
572	ld	r10,PACAKMSR(r13) /* Get kernel MSR without EE */
573	mtmsrd	r10,1		  /* Update machine state */
574#endif /* CONFIG_PPC_BOOK3E */
575
576#ifdef CONFIG_PREEMPT
577	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */
578	li	r0,_TIF_NEED_RESCHED	/* bits to check */
579	ld	r3,_MSR(r1)
580	ld	r4,TI_FLAGS(r9)
581	/* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
582	rlwimi	r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
583	and.	r0,r4,r0	/* check NEED_RESCHED and maybe SIGPENDING */
584	bne	do_work
585
586#else /* !CONFIG_PREEMPT */
587	ld	r3,_MSR(r1)	/* Returning to user mode? */
588	andi.	r3,r3,MSR_PR
589	beq	restore		/* if not, just restore regs and return */
590
591	/* Check current_thread_info()->flags */
592	clrrdi	r9,r1,THREAD_SHIFT
593	ld	r4,TI_FLAGS(r9)
594	andi.	r0,r4,_TIF_USER_WORK_MASK
595	bne	do_work
596#endif /* !CONFIG_PREEMPT */
597
598	.globl	fast_exc_return_irq
599fast_exc_return_irq:
600restore:
601	/*
602	 * This is the main kernel exit path. First we check if we
603	 * are about to re-enable interrupts
604	 */
605	ld	r5,SOFTE(r1)
606	lbz	r6,PACASOFTIRQEN(r13)
607	cmpwi	cr0,r5,0
608	beq	restore_irq_off
609
610	/* We are enabling, were we already enabled ? Yes, just return */
611	cmpwi	cr0,r6,1
612	beq	cr0,do_restore
613
614	/*
615	 * We are about to soft-enable interrupts (we are hard disabled
616	 * at this point). We check if there's anything that needs to
617	 * be replayed first.
618	 */
619	lbz	r0,PACAIRQHAPPENED(r13)
620	cmpwi	cr0,r0,0
621	bne-	restore_check_irq_replay
622
623	/*
624	 * Get here when nothing happened while soft-disabled, just
625	 * soft-enable and move-on. We will hard-enable as a side
626	 * effect of rfi
627	 */
628restore_no_replay:
629	TRACE_ENABLE_INTS
630	li	r0,1
631	stb	r0,PACASOFTIRQEN(r13);
632
633	/*
634	 * Final return path. BookE is handled in a different file
635	 */
636do_restore:
637#ifdef CONFIG_PPC_BOOK3E
638	b	.exception_return_book3e
639#else
640	/*
641	 * Clear the reservation. If we know the CPU tracks the address of
642	 * the reservation then we can potentially save some cycles and use
643	 * a larx. On POWER6 and POWER7 this is significantly faster.
644	 */
645BEGIN_FTR_SECTION
646	stdcx.	r0,0,r1		/* to clear the reservation */
647FTR_SECTION_ELSE
648	ldarx	r4,0,r1
649ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
650
651	/*
652	 * Some code path such as load_up_fpu or altivec return directly
653	 * here. They run entirely hard disabled and do not alter the
654	 * interrupt state. They also don't use lwarx/stwcx. and thus
655	 * are known not to leave dangling reservations.
656	 */
657	.globl	fast_exception_return
658fast_exception_return:
659	ld	r3,_MSR(r1)
660	ld	r4,_CTR(r1)
661	ld	r0,_LINK(r1)
662	mtctr	r4
663	mtlr	r0
664	ld	r4,_XER(r1)
665	mtspr	SPRN_XER,r4
666
667	REST_8GPRS(5, r1)
668
669	andi.	r0,r3,MSR_RI
670	beq-	unrecov_restore
671
672	/*
673	 * Clear RI before restoring r13.  If we are returning to
674	 * userspace and we take an exception after restoring r13,
675	 * we end up corrupting the userspace r13 value.
676	 */
677	ld	r4,PACAKMSR(r13) /* Get kernel MSR without EE */
678	andc	r4,r4,r0	 /* r0 contains MSR_RI here */
679	mtmsrd	r4,1
680
681	/*
682	 * r13 is our per cpu area, only restore it if we are returning to
683	 * userspace the value stored in the stack frame may belong to
684	 * another CPU.
685	 */
686	andi.	r0,r3,MSR_PR
687	beq	1f
688	ACCOUNT_CPU_USER_EXIT(r2, r4)
689	REST_GPR(13, r1)
6901:
691	mtspr	SPRN_SRR1,r3
692
693	ld	r2,_CCR(r1)
694	mtcrf	0xFF,r2
695	ld	r2,_NIP(r1)
696	mtspr	SPRN_SRR0,r2
697
698	ld	r0,GPR0(r1)
699	ld	r2,GPR2(r1)
700	ld	r3,GPR3(r1)
701	ld	r4,GPR4(r1)
702	ld	r1,GPR1(r1)
703
704	rfid
705	b	.	/* prevent speculative execution */
706
707#endif /* CONFIG_PPC_BOOK3E */
708
709	/*
710	 * We are returning to a context with interrupts soft disabled.
711	 *
712	 * However, we may also about to hard enable, so we need to
713	 * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
714	 * or that bit can get out of sync and bad things will happen
715	 */
716restore_irq_off:
717	ld	r3,_MSR(r1)
718	lbz	r7,PACAIRQHAPPENED(r13)
719	andi.	r0,r3,MSR_EE
720	beq	1f
721	rlwinm	r7,r7,0,~PACA_IRQ_HARD_DIS
722	stb	r7,PACAIRQHAPPENED(r13)
7231:	li	r0,0
724	stb	r0,PACASOFTIRQEN(r13);
725	TRACE_DISABLE_INTS
726	b	do_restore
727
728	/*
729	 * Something did happen, check if a re-emit is needed
730	 * (this also clears paca->irq_happened)
731	 */
732restore_check_irq_replay:
733	/* XXX: We could implement a fast path here where we check
734	 * for irq_happened being just 0x01, in which case we can
735	 * clear it and return. That means that we would potentially
736	 * miss a decrementer having wrapped all the way around.
737	 *
738	 * Still, this might be useful for things like hash_page
739	 */
740	bl	.__check_irq_replay
741	cmpwi	cr0,r3,0
742 	beq	restore_no_replay
743
744	/*
745	 * We need to re-emit an interrupt. We do so by re-using our
746	 * existing exception frame. We first change the trap value,
747	 * but we need to ensure we preserve the low nibble of it
748	 */
749	ld	r4,_TRAP(r1)
750	clrldi	r4,r4,60
751	or	r4,r4,r3
752	std	r4,_TRAP(r1)
753
754	/*
755	 * Then find the right handler and call it. Interrupts are
756	 * still soft-disabled and we keep them that way.
757	*/
758	cmpwi	cr0,r3,0x500
759	bne	1f
760	addi	r3,r1,STACK_FRAME_OVERHEAD;
761 	bl	.do_IRQ
762	b	.ret_from_except
7631:	cmpwi	cr0,r3,0x900
764	bne	1f
765	addi	r3,r1,STACK_FRAME_OVERHEAD;
766	bl	.timer_interrupt
767	b	.ret_from_except
768#ifdef CONFIG_PPC_BOOK3E
7691:	cmpwi	cr0,r3,0x280
770	bne	1f
771	addi	r3,r1,STACK_FRAME_OVERHEAD;
772	bl	.doorbell_exception
773	b	.ret_from_except
774#endif /* CONFIG_PPC_BOOK3E */
7751:	b	.ret_from_except /* What else to do here ? */
776
777
778
7793:
780do_work:
781#ifdef CONFIG_PREEMPT
782	andi.	r0,r3,MSR_PR	/* Returning to user mode? */
783	bne	user_work
784	/* Check that preempt_count() == 0 and interrupts are enabled */
785	lwz	r8,TI_PREEMPT(r9)
786	cmpwi	cr1,r8,0
787	ld	r0,SOFTE(r1)
788	cmpdi	r0,0
789	crandc	eq,cr1*4+eq,eq
790	bne	restore
791
792	/*
793	 * Here we are preempting the current task. We want to make
794	 * sure we are soft-disabled first
795	 */
796	SOFT_DISABLE_INTS(r3,r4)
7971:	bl	.preempt_schedule_irq
798
799	/* Re-test flags and eventually loop */
800	clrrdi	r9,r1,THREAD_SHIFT
801	ld	r4,TI_FLAGS(r9)
802	andi.	r0,r4,_TIF_NEED_RESCHED
803	bne	1b
804	b	restore
805
806user_work:
807#endif /* CONFIG_PREEMPT */
808
809	andi.	r0,r4,_TIF_NEED_RESCHED
810	beq	1f
811	bl	.restore_interrupts
812	bl	.schedule
813	b	.ret_from_except_lite
814
8151:	bl	.save_nvgprs
816	bl	.restore_interrupts
817	addi	r3,r1,STACK_FRAME_OVERHEAD
818	bl	.do_notify_resume
819	b	.ret_from_except
820
821unrecov_restore:
822	addi	r3,r1,STACK_FRAME_OVERHEAD
823	bl	.unrecoverable_exception
824	b	unrecov_restore
825
826#ifdef CONFIG_PPC_RTAS
827/*
828 * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
829 * called with the MMU off.
830 *
831 * In addition, we need to be in 32b mode, at least for now.
832 *
833 * Note: r3 is an input parameter to rtas, so don't trash it...
834 */
835_GLOBAL(enter_rtas)
836	mflr	r0
837	std	r0,16(r1)
838        stdu	r1,-RTAS_FRAME_SIZE(r1)	/* Save SP and create stack space. */
839
840	/* Because RTAS is running in 32b mode, it clobbers the high order half
841	 * of all registers that it saves.  We therefore save those registers
842	 * RTAS might touch to the stack.  (r0, r3-r13 are caller saved)
843   	 */
844	SAVE_GPR(2, r1)			/* Save the TOC */
845	SAVE_GPR(13, r1)		/* Save paca */
846	SAVE_8GPRS(14, r1)		/* Save the non-volatiles */
847	SAVE_10GPRS(22, r1)		/* ditto */
848
849	mfcr	r4
850	std	r4,_CCR(r1)
851	mfctr	r5
852	std	r5,_CTR(r1)
853	mfspr	r6,SPRN_XER
854	std	r6,_XER(r1)
855	mfdar	r7
856	std	r7,_DAR(r1)
857	mfdsisr	r8
858	std	r8,_DSISR(r1)
859
860	/* Temporary workaround to clear CR until RTAS can be modified to
861	 * ignore all bits.
862	 */
863	li	r0,0
864	mtcr	r0
865
866#ifdef CONFIG_BUG
867	/* There is no way it is acceptable to get here with interrupts enabled,
868	 * check it with the asm equivalent of WARN_ON
869	 */
870	lbz	r0,PACASOFTIRQEN(r13)
8711:	tdnei	r0,0
872	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
873#endif
874
875	/* Hard-disable interrupts */
876	mfmsr	r6
877	rldicl	r7,r6,48,1
878	rotldi	r7,r7,16
879	mtmsrd	r7,1
880
881	/* Unfortunately, the stack pointer and the MSR are also clobbered,
882	 * so they are saved in the PACA which allows us to restore
883	 * our original state after RTAS returns.
884         */
885	std	r1,PACAR1(r13)
886        std	r6,PACASAVEDMSR(r13)
887
888	/* Setup our real return addr */
889	LOAD_REG_ADDR(r4,.rtas_return_loc)
890	clrldi	r4,r4,2			/* convert to realmode address */
891       	mtlr	r4
892
893	li	r0,0
894	ori	r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
895	andc	r0,r6,r0
896
897        li      r9,1
898        rldicr  r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
899	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI
900	andc	r6,r0,r9
901	sync				/* disable interrupts so SRR0/1 */
902	mtmsrd	r0			/* don't get trashed */
903
904	LOAD_REG_ADDR(r4, rtas)
905	ld	r5,RTASENTRY(r4)	/* get the rtas->entry value */
906	ld	r4,RTASBASE(r4)		/* get the rtas->base value */
907
908	mtspr	SPRN_SRR0,r5
909	mtspr	SPRN_SRR1,r6
910	rfid
911	b	.	/* prevent speculative execution */
912
913_STATIC(rtas_return_loc)
914	/* relocation is off at this point */
915	GET_PACA(r4)
916	clrldi	r4,r4,2			/* convert to realmode address */
917
918	bcl	20,31,$+4
9190:	mflr	r3
920	ld	r3,(1f-0b)(r3)		/* get &.rtas_restore_regs */
921
922	mfmsr   r6
923	li	r0,MSR_RI
924	andc	r6,r6,r0
925	sync
926	mtmsrd  r6
927
928        ld	r1,PACAR1(r4)           /* Restore our SP */
929        ld	r4,PACASAVEDMSR(r4)     /* Restore our MSR */
930
931	mtspr	SPRN_SRR0,r3
932	mtspr	SPRN_SRR1,r4
933	rfid
934	b	.	/* prevent speculative execution */
935
936	.align	3
9371:	.llong	.rtas_restore_regs
938
939_STATIC(rtas_restore_regs)
940	/* relocation is on at this point */
941	REST_GPR(2, r1)			/* Restore the TOC */
942	REST_GPR(13, r1)		/* Restore paca */
943	REST_8GPRS(14, r1)		/* Restore the non-volatiles */
944	REST_10GPRS(22, r1)		/* ditto */
945
946	GET_PACA(r13)
947
948	ld	r4,_CCR(r1)
949	mtcr	r4
950	ld	r5,_CTR(r1)
951	mtctr	r5
952	ld	r6,_XER(r1)
953	mtspr	SPRN_XER,r6
954	ld	r7,_DAR(r1)
955	mtdar	r7
956	ld	r8,_DSISR(r1)
957	mtdsisr	r8
958
959        addi	r1,r1,RTAS_FRAME_SIZE	/* Unstack our frame */
960	ld	r0,16(r1)		/* get return address */
961
962	mtlr    r0
963        blr				/* return to caller */
964
965#endif /* CONFIG_PPC_RTAS */
966
967_GLOBAL(enter_prom)
968	mflr	r0
969	std	r0,16(r1)
970        stdu	r1,-PROM_FRAME_SIZE(r1)	/* Save SP and create stack space */
971
972	/* Because PROM is running in 32b mode, it clobbers the high order half
973	 * of all registers that it saves.  We therefore save those registers
974	 * PROM might touch to the stack.  (r0, r3-r13 are caller saved)
975   	 */
976	SAVE_GPR(2, r1)
977	SAVE_GPR(13, r1)
978	SAVE_8GPRS(14, r1)
979	SAVE_10GPRS(22, r1)
980	mfcr	r10
981	mfmsr	r11
982	std	r10,_CCR(r1)
983	std	r11,_MSR(r1)
984
985	/* Get the PROM entrypoint */
986	mtlr	r4
987
988	/* Switch MSR to 32 bits mode
989	 */
990#ifdef CONFIG_PPC_BOOK3E
991	rlwinm	r11,r11,0,1,31
992	mtmsr	r11
993#else /* CONFIG_PPC_BOOK3E */
994        mfmsr   r11
995        li      r12,1
996        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
997        andc    r11,r11,r12
998        li      r12,1
999        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
1000        andc    r11,r11,r12
1001        mtmsrd  r11
1002#endif /* CONFIG_PPC_BOOK3E */
1003        isync
1004
1005	/* Enter PROM here... */
1006	blrl
1007
1008	/* Just make sure that r1 top 32 bits didn't get
1009	 * corrupt by OF
1010	 */
1011	rldicl	r1,r1,0,32
1012
1013	/* Restore the MSR (back to 64 bits) */
1014	ld	r0,_MSR(r1)
1015	MTMSRD(r0)
1016        isync
1017
1018	/* Restore other registers */
1019	REST_GPR(2, r1)
1020	REST_GPR(13, r1)
1021	REST_8GPRS(14, r1)
1022	REST_10GPRS(22, r1)
1023	ld	r4,_CCR(r1)
1024	mtcr	r4
1025
1026        addi	r1,r1,PROM_FRAME_SIZE
1027	ld	r0,16(r1)
1028	mtlr    r0
1029        blr
1030
1031#ifdef CONFIG_FUNCTION_TRACER
1032#ifdef CONFIG_DYNAMIC_FTRACE
1033_GLOBAL(mcount)
1034_GLOBAL(_mcount)
1035	blr
1036
1037_GLOBAL(ftrace_caller)
1038	/* Taken from output of objdump from lib64/glibc */
1039	mflr	r3
1040	ld	r11, 0(r1)
1041	stdu	r1, -112(r1)
1042	std	r3, 128(r1)
1043	ld	r4, 16(r11)
1044	subi	r3, r3, MCOUNT_INSN_SIZE
1045.globl ftrace_call
1046ftrace_call:
1047	bl	ftrace_stub
1048	nop
1049#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1050.globl ftrace_graph_call
1051ftrace_graph_call:
1052	b	ftrace_graph_stub
1053_GLOBAL(ftrace_graph_stub)
1054#endif
1055	ld	r0, 128(r1)
1056	mtlr	r0
1057	addi	r1, r1, 112
1058_GLOBAL(ftrace_stub)
1059	blr
1060#else
1061_GLOBAL(mcount)
1062	blr
1063
1064_GLOBAL(_mcount)
1065	/* Taken from output of objdump from lib64/glibc */
1066	mflr	r3
1067	ld	r11, 0(r1)
1068	stdu	r1, -112(r1)
1069	std	r3, 128(r1)
1070	ld	r4, 16(r11)
1071
1072	subi	r3, r3, MCOUNT_INSN_SIZE
1073	LOAD_REG_ADDR(r5,ftrace_trace_function)
1074	ld	r5,0(r5)
1075	ld	r5,0(r5)
1076	mtctr	r5
1077	bctrl
1078	nop
1079
1080
1081#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1082	b	ftrace_graph_caller
1083#endif
1084	ld	r0, 128(r1)
1085	mtlr	r0
1086	addi	r1, r1, 112
1087_GLOBAL(ftrace_stub)
1088	blr
1089
1090#endif /* CONFIG_DYNAMIC_FTRACE */
1091
1092#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1093_GLOBAL(ftrace_graph_caller)
1094	/* load r4 with local address */
1095	ld	r4, 128(r1)
1096	subi	r4, r4, MCOUNT_INSN_SIZE
1097
1098	/* get the parent address */
1099	ld	r11, 112(r1)
1100	addi	r3, r11, 16
1101
1102	bl	.prepare_ftrace_return
1103	nop
1104
1105	ld	r0, 128(r1)
1106	mtlr	r0
1107	addi	r1, r1, 112
1108	blr
1109
1110_GLOBAL(return_to_handler)
1111	/* need to save return values */
1112	std	r4,  -24(r1)
1113	std	r3,  -16(r1)
1114	std	r31, -8(r1)
1115	mr	r31, r1
1116	stdu	r1, -112(r1)
1117
1118	bl	.ftrace_return_to_handler
1119	nop
1120
1121	/* return value has real return address */
1122	mtlr	r3
1123
1124	ld	r1, 0(r1)
1125	ld	r4,  -24(r1)
1126	ld	r3,  -16(r1)
1127	ld	r31, -8(r1)
1128
1129	/* Jump back to real return address */
1130	blr
1131
1132_GLOBAL(mod_return_to_handler)
1133	/* need to save return values */
1134	std	r4,  -32(r1)
1135	std	r3,  -24(r1)
1136	/* save TOC */
1137	std	r2,  -16(r1)
1138	std	r31, -8(r1)
1139	mr	r31, r1
1140	stdu	r1, -112(r1)
1141
1142	/*
1143	 * We are in a module using the module's TOC.
1144	 * Switch to our TOC to run inside the core kernel.
1145	 */
1146	ld	r2, PACATOC(r13)
1147
1148	bl	.ftrace_return_to_handler
1149	nop
1150
1151	/* return value has real return address */
1152	mtlr	r3
1153
1154	ld	r1, 0(r1)
1155	ld	r4,  -32(r1)
1156	ld	r3,  -24(r1)
1157	ld	r2,  -16(r1)
1158	ld	r31, -8(r1)
1159
1160	/* Jump back to real return address */
1161	blr
1162#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1163#endif /* CONFIG_FUNCTION_TRACER */
1164