1 /*
2  *  arch/ppc/mm/fault.c
3  *
4  *  PowerPC version
5  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6  *
7  *  Derived from "arch/i386/mm/fault.c"
8  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
9  *
10  *  Modified by Cort Dougan and Paul Mackerras.
11  *
12  *  This program is free software; you can redistribute it and/or
13  *  modify it under the terms of the GNU General Public License
14  *  as published by the Free Software Foundation; either version
15  *  2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/config.h>
19 #include <linux/signal.h>
20 #include <linux/sched.h>
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/string.h>
24 #include <linux/types.h>
25 #include <linux/ptrace.h>
26 #include <linux/mman.h>
27 #include <linux/mm.h>
28 #include <linux/interrupt.h>
29 
30 #include <asm/page.h>
31 #include <asm/pgtable.h>
32 #include <asm/mmu.h>
33 #include <asm/mmu_context.h>
34 #include <asm/system.h>
35 #include <asm/uaccess.h>
36 
37 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
38 extern void (*debugger)(struct pt_regs *);
39 extern void (*debugger_fault_handler)(struct pt_regs *);
40 extern int (*debugger_dabr_match)(struct pt_regs *);
41 int debugger_kernel_faults = 1;
42 #endif
43 
44 unsigned long htab_reloads;	/* updated by hashtable.S:hash_page() */
45 unsigned long htab_evicts; 	/* updated by hashtable.S:hash_page() */
46 unsigned long htab_preloads;	/* updated by hashtable.S:add_hash_page() */
47 unsigned long pte_misses;	/* updated by do_page_fault() */
48 unsigned long pte_errors;	/* updated by do_page_fault() */
49 unsigned int probingmem;
50 
51 extern void die_if_kernel(char *, struct pt_regs *, long);
52 void bad_page_fault(struct pt_regs *, unsigned long, int sig);
53 void do_page_fault(struct pt_regs *, unsigned long, unsigned long);
54 
55 /*
56  * Check whether the instruction at regs->nip is a store using
57  * an update addressing form which will update r1.
58  */
store_updates_sp(struct pt_regs * regs)59 static int store_updates_sp(struct pt_regs *regs)
60 {
61 	unsigned int inst;
62 
63 	if (get_user(inst, (unsigned int *)regs->nip))
64 		return 0;
65 	/* check for 1 in the rA field */
66 	if (((inst >> 16) & 0x1f) != 1)
67 		return 0;
68 	/* check major opcode */
69 	switch (inst >> 26) {
70 	case 37:	/* stwu */
71 	case 39:	/* stbu */
72 	case 45:	/* sthu */
73 	case 53:	/* stfsu */
74 	case 55:	/* stfdu */
75 		return 1;
76 	case 31:
77 		/* check minor opcode */
78 		switch ((inst >> 1) & 0x3ff) {
79 		case 183:	/* stwux */
80 		case 247:	/* stbux */
81 		case 439:	/* sthux */
82 		case 695:	/* stfsux */
83 		case 759:	/* stfdux */
84 			return 1;
85 		}
86 	}
87 	return 0;
88 }
89 
90 /*
91  * For 600- and 800-family processors, the error_code parameter is DSISR
92  * for a data fault, SRR1 for an instruction fault. For 400-family processors
93  * the error_code parameter is ESR for a data fault, 0 for an instruction
94  * fault.
95  */
do_page_fault(struct pt_regs * regs,unsigned long address,unsigned long error_code)96 void do_page_fault(struct pt_regs *regs, unsigned long address,
97 		   unsigned long error_code)
98 {
99 	struct vm_area_struct * vma;
100 	struct mm_struct *mm = current->mm;
101 	siginfo_t info;
102 	int code = SEGV_MAPERR;
103 #if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
104 	int is_write = error_code & ESR_DST;
105 #else
106 	int is_write = 0;
107 
108 	/*
109 	 * Fortunately the bit assignments in SRR1 for an instruction
110 	 * fault and DSISR for a data fault are mostly the same for the
111 	 * bits we are interested in.  But there are some bits which
112 	 * indicate errors in DSISR but can validly be set in SRR1.
113 	 */
114 	if (regs->trap == 0x400)
115 		error_code &= 0x48200000;
116 	else
117 		is_write = error_code & 0x02000000;
118 #endif /* CONFIG_4xx || CONFIG_BOOKE */
119 
120 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
121 	if (debugger_fault_handler && regs->trap == 0x300) {
122 		debugger_fault_handler(regs);
123 		return;
124 	}
125 #ifndef CONFIG_4xx
126 	if (error_code & 0x00400000) {
127 		/* DABR match */
128 		if (debugger_dabr_match(regs))
129 			return;
130 	}
131 #endif /* !CONFIG_4xx */
132 #endif /* CONFIG_XMON || CONFIG_KGDB */
133 
134 	if (in_interrupt() || mm == NULL) {
135 		bad_page_fault(regs, address, SIGSEGV);
136 		return;
137 	}
138 	down_read(&mm->mmap_sem);
139 	vma = find_vma(mm, address);
140 	if (!vma)
141 		goto bad_area;
142 	if (vma->vm_start <= address)
143 		goto good_area;
144 	if (!(vma->vm_flags & VM_GROWSDOWN))
145 		goto bad_area;
146 	if (!is_write)
147                 goto bad_area;
148 
149 	/*
150 	 * N.B. The rs6000/xcoff ABI allows programs to access up to
151 	 * a few hundred bytes below the stack pointer.
152 	 * The kernel signal delivery code writes up to about 1.5kB
153 	 * below the stack pointer (r1) before decrementing it.
154 	 * The exec code can write slightly over 640kB to the stack
155 	 * before setting the user r1.  Thus we allow the stack to
156 	 * expand to 1MB without further checks.
157 	 */
158 	if (address + 0x100000 < vma->vm_end) {
159 		/* get user regs even if this fault is in kernel mode */
160 		struct pt_regs *uregs = current->thread.regs;
161 		if (uregs == NULL)
162 			goto bad_area;
163 
164 		/*
165 		 * A user-mode access to an address a long way below
166 		 * the stack pointer is only valid if the instruction
167 		 * is one which would update the stack pointer to the
168 		 * address accessed if the instruction completed,
169 		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
170 		 * (or the byte, halfword, float or double forms).
171 		 *
172 		 * If we don't check this then any write to the area
173 		 * between the last mapped region and the stack will
174 		 * expand the stack rather than segfaulting.
175 		 */
176 		if (address + 2048 < uregs->gpr[1]
177 		    && (!user_mode(regs) || !store_updates_sp(regs)))
178 			goto bad_area;
179 	}
180 	if (expand_stack(vma, address))
181 		goto bad_area;
182 
183 good_area:
184 	code = SEGV_ACCERR;
185 #if defined(CONFIG_6xx)
186 	if (error_code & 0x95700000)
187 		/* an error such as lwarx to I/O controller space,
188 		   address matching DABR, eciwx, etc. */
189 		goto bad_area;
190 #endif /* CONFIG_6xx */
191 #if defined(CONFIG_8xx)
192         /* The MPC8xx seems to always set 0x80000000, which is
193          * "undefined".  Of those that can be set, this is the only
194          * one which seems bad.
195          */
196 	if (error_code & 0x10000000)
197                 /* Guarded storage error. */
198 		goto bad_area;
199 #endif /* CONFIG_8xx */
200 
201 	/* a write */
202 	if (is_write) {
203 		if (!(vma->vm_flags & VM_WRITE))
204 			goto bad_area;
205 	/* a read */
206 	} else {
207 		/* protection fault */
208 		if (error_code & 0x08000000)
209 			goto bad_area;
210 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
211 			goto bad_area;
212 	}
213 
214 	/*
215 	 * If for any reason at all we couldn't handle the fault,
216 	 * make sure we exit gracefully rather than endlessly redo
217 	 * the fault.
218 	 */
219  survive:
220         switch (handle_mm_fault(mm, vma, address, is_write)) {
221         case 1:
222                 current->min_flt++;
223                 break;
224         case 2:
225                 current->maj_flt++;
226                 break;
227         case 0:
228                 goto do_sigbus;
229         default:
230                 goto out_of_memory;
231 	}
232 
233 	up_read(&mm->mmap_sem);
234 	/*
235 	 * keep track of tlb+htab misses that are good addrs but
236 	 * just need pte's created via handle_mm_fault()
237 	 * -- Cort
238 	 */
239 	pte_misses++;
240 	return;
241 
242 bad_area:
243 	up_read(&mm->mmap_sem);
244 	pte_errors++;
245 
246 	/* User mode accesses cause a SIGSEGV */
247 	if (user_mode(regs)) {
248 		info.si_signo = SIGSEGV;
249 		info.si_errno = 0;
250 		info.si_code = code;
251 		info.si_addr = (void *) address;
252 		force_sig_info(SIGSEGV, &info, current);
253 		return;
254 	}
255 
256 	bad_page_fault(regs, address, SIGSEGV);
257 	return;
258 
259 /*
260  * We ran out of memory, or some other thing happened to us that made
261  * us unable to handle the page fault gracefully.
262  */
263 out_of_memory:
264 	if (current->pid == 1) {
265 		yield();
266 		goto survive;
267 	}
268 	up_read(&mm->mmap_sem);
269 	printk("VM: killing process %s\n", current->comm);
270 	if (user_mode(regs))
271 		do_exit(SIGKILL);
272 	bad_page_fault(regs, address, SIGKILL);
273 	return;
274 
275 do_sigbus:
276 	up_read(&mm->mmap_sem);
277 	info.si_signo = SIGBUS;
278 	info.si_errno = 0;
279 	info.si_code = BUS_ADRERR;
280 	info.si_addr = (void *)address;
281 	force_sig_info (SIGBUS, &info, current);
282 	if (!user_mode(regs))
283 		bad_page_fault(regs, address, SIGBUS);
284 }
285 
286 /*
287  * bad_page_fault is called when we have a bad access from the kernel.
288  * It is called from do_page_fault above and from some of the procedures
289  * in traps.c.
290  */
291 void
bad_page_fault(struct pt_regs * regs,unsigned long address,int sig)292 bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
293 {
294 	extern void die(const char *,struct pt_regs *,long);
295 
296 	unsigned long fixup;
297 
298 	/* Are we prepared to handle this fault?  */
299 	if ((fixup = search_exception_table(regs->nip)) != 0) {
300 		regs->nip = fixup;
301 		return;
302 	}
303 
304 	/* kernel has accessed a bad area */
305 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
306 	if (debugger_kernel_faults)
307 		debugger(regs);
308 #endif
309 	die("kernel access of bad area", regs, sig);
310 }
311 
312 #ifdef CONFIG_8xx
313 
314 /* The pgtable.h claims some functions generically exist, but I
315  * can't find them......
316  */
va_to_pte(unsigned long address)317 pte_t *va_to_pte(unsigned long address)
318 {
319 	pgd_t *dir;
320 	pmd_t *pmd;
321 	pte_t *pte;
322 	struct mm_struct *mm;
323 
324 	if (address < TASK_SIZE)
325 		mm = current->mm;
326 	else
327 		mm = &init_mm;
328 
329 	dir = pgd_offset(mm, address & PAGE_MASK);
330 	if (dir) {
331 		pmd = pmd_offset(dir, address & PAGE_MASK);
332 		if (pmd && pmd_present(*pmd)) {
333 			pte = pte_offset(pmd, address & PAGE_MASK);
334 			if (pte && pte_present(*pte)) {
335 				return(pte);
336 			}
337 		}
338 		else {
339 			return (0);
340 		}
341 	}
342 	else {
343 		return (0);
344 	}
345 	return (0);
346 }
347 
va_to_phys(unsigned long address)348 unsigned long va_to_phys(unsigned long address)
349 {
350 	pte_t *pte;
351 
352 	pte = va_to_pte(address);
353 	if (pte)
354 		return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
355 	return (0);
356 }
357 
358 void
print_8xx_pte(struct mm_struct * mm,unsigned long addr)359 print_8xx_pte(struct mm_struct *mm, unsigned long addr)
360 {
361         pgd_t * pgd;
362         pmd_t * pmd;
363         pte_t * pte;
364 
365         printk(" pte @ 0x%8lx: ", addr);
366         pgd = pgd_offset(mm, addr & PAGE_MASK);
367         if (pgd) {
368                 pmd = pmd_offset(pgd, addr & PAGE_MASK);
369                 if (pmd && pmd_present(*pmd)) {
370                         pte = pte_offset(pmd, addr & PAGE_MASK);
371                         if (pte) {
372                                 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
373                                         (long)pgd, (long)pte, (long)pte_val(*pte));
374 #define pp ((long)pte_val(*pte))
375 				printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
376 				       "CI: %lx v: %lx\n",
377 				       pp>>12,    /* rpn */
378 				       (pp>>10)&3, /* pp */
379 				       (pp>>3)&1, /* small */
380 				       (pp>>2)&1, /* shared */
381 				       (pp>>1)&1, /* cache inhibit */
382 				       pp&1       /* valid */
383 				       );
384 #undef pp
385                         }
386                         else {
387                                 printk("no pte\n");
388                         }
389                 }
390                 else {
391                         printk("no pmd\n");
392                 }
393         }
394         else {
395                 printk("no pgd\n");
396         }
397 }
398 
399 int
get_8xx_pte(struct mm_struct * mm,unsigned long addr)400 get_8xx_pte(struct mm_struct *mm, unsigned long addr)
401 {
402         pgd_t * pgd;
403         pmd_t * pmd;
404         pte_t * pte;
405         int     retval = 0;
406 
407         pgd = pgd_offset(mm, addr & PAGE_MASK);
408         if (pgd) {
409                 pmd = pmd_offset(pgd, addr & PAGE_MASK);
410                 if (pmd && pmd_present(*pmd)) {
411                         pte = pte_offset(pmd, addr & PAGE_MASK);
412                         if (pte) {
413                                         retval = (int)pte_val(*pte);
414                         }
415                 }
416         }
417         return(retval);
418 }
419 #endif /* CONFIG_8xx */
420