1 /*
2 * arch/ppc/mm/fault.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/i386/mm/fault.c"
8 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
9 *
10 * Modified by Cort Dougan and Paul Mackerras.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18 #include <linux/config.h>
19 #include <linux/signal.h>
20 #include <linux/sched.h>
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/string.h>
24 #include <linux/types.h>
25 #include <linux/ptrace.h>
26 #include <linux/mman.h>
27 #include <linux/mm.h>
28 #include <linux/interrupt.h>
29
30 #include <asm/page.h>
31 #include <asm/pgtable.h>
32 #include <asm/mmu.h>
33 #include <asm/mmu_context.h>
34 #include <asm/system.h>
35 #include <asm/uaccess.h>
36
37 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
38 extern void (*debugger)(struct pt_regs *);
39 extern void (*debugger_fault_handler)(struct pt_regs *);
40 extern int (*debugger_dabr_match)(struct pt_regs *);
41 int debugger_kernel_faults = 1;
42 #endif
43
44 unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
45 unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
46 unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
47 unsigned long pte_misses; /* updated by do_page_fault() */
48 unsigned long pte_errors; /* updated by do_page_fault() */
49 unsigned int probingmem;
50
51 extern void die_if_kernel(char *, struct pt_regs *, long);
52 void bad_page_fault(struct pt_regs *, unsigned long, int sig);
53 void do_page_fault(struct pt_regs *, unsigned long, unsigned long);
54
55 /*
56 * Check whether the instruction at regs->nip is a store using
57 * an update addressing form which will update r1.
58 */
store_updates_sp(struct pt_regs * regs)59 static int store_updates_sp(struct pt_regs *regs)
60 {
61 unsigned int inst;
62
63 if (get_user(inst, (unsigned int *)regs->nip))
64 return 0;
65 /* check for 1 in the rA field */
66 if (((inst >> 16) & 0x1f) != 1)
67 return 0;
68 /* check major opcode */
69 switch (inst >> 26) {
70 case 37: /* stwu */
71 case 39: /* stbu */
72 case 45: /* sthu */
73 case 53: /* stfsu */
74 case 55: /* stfdu */
75 return 1;
76 case 31:
77 /* check minor opcode */
78 switch ((inst >> 1) & 0x3ff) {
79 case 183: /* stwux */
80 case 247: /* stbux */
81 case 439: /* sthux */
82 case 695: /* stfsux */
83 case 759: /* stfdux */
84 return 1;
85 }
86 }
87 return 0;
88 }
89
90 /*
91 * For 600- and 800-family processors, the error_code parameter is DSISR
92 * for a data fault, SRR1 for an instruction fault. For 400-family processors
93 * the error_code parameter is ESR for a data fault, 0 for an instruction
94 * fault.
95 */
do_page_fault(struct pt_regs * regs,unsigned long address,unsigned long error_code)96 void do_page_fault(struct pt_regs *regs, unsigned long address,
97 unsigned long error_code)
98 {
99 struct vm_area_struct * vma;
100 struct mm_struct *mm = current->mm;
101 siginfo_t info;
102 int code = SEGV_MAPERR;
103 #if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
104 int is_write = error_code & ESR_DST;
105 #else
106 int is_write = 0;
107
108 /*
109 * Fortunately the bit assignments in SRR1 for an instruction
110 * fault and DSISR for a data fault are mostly the same for the
111 * bits we are interested in. But there are some bits which
112 * indicate errors in DSISR but can validly be set in SRR1.
113 */
114 if (regs->trap == 0x400)
115 error_code &= 0x48200000;
116 else
117 is_write = error_code & 0x02000000;
118 #endif /* CONFIG_4xx || CONFIG_BOOKE */
119
120 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
121 if (debugger_fault_handler && regs->trap == 0x300) {
122 debugger_fault_handler(regs);
123 return;
124 }
125 #ifndef CONFIG_4xx
126 if (error_code & 0x00400000) {
127 /* DABR match */
128 if (debugger_dabr_match(regs))
129 return;
130 }
131 #endif /* !CONFIG_4xx */
132 #endif /* CONFIG_XMON || CONFIG_KGDB */
133
134 if (in_interrupt() || mm == NULL) {
135 bad_page_fault(regs, address, SIGSEGV);
136 return;
137 }
138 down_read(&mm->mmap_sem);
139 vma = find_vma(mm, address);
140 if (!vma)
141 goto bad_area;
142 if (vma->vm_start <= address)
143 goto good_area;
144 if (!(vma->vm_flags & VM_GROWSDOWN))
145 goto bad_area;
146 if (!is_write)
147 goto bad_area;
148
149 /*
150 * N.B. The rs6000/xcoff ABI allows programs to access up to
151 * a few hundred bytes below the stack pointer.
152 * The kernel signal delivery code writes up to about 1.5kB
153 * below the stack pointer (r1) before decrementing it.
154 * The exec code can write slightly over 640kB to the stack
155 * before setting the user r1. Thus we allow the stack to
156 * expand to 1MB without further checks.
157 */
158 if (address + 0x100000 < vma->vm_end) {
159 /* get user regs even if this fault is in kernel mode */
160 struct pt_regs *uregs = current->thread.regs;
161 if (uregs == NULL)
162 goto bad_area;
163
164 /*
165 * A user-mode access to an address a long way below
166 * the stack pointer is only valid if the instruction
167 * is one which would update the stack pointer to the
168 * address accessed if the instruction completed,
169 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
170 * (or the byte, halfword, float or double forms).
171 *
172 * If we don't check this then any write to the area
173 * between the last mapped region and the stack will
174 * expand the stack rather than segfaulting.
175 */
176 if (address + 2048 < uregs->gpr[1]
177 && (!user_mode(regs) || !store_updates_sp(regs)))
178 goto bad_area;
179 }
180 if (expand_stack(vma, address))
181 goto bad_area;
182
183 good_area:
184 code = SEGV_ACCERR;
185 #if defined(CONFIG_6xx)
186 if (error_code & 0x95700000)
187 /* an error such as lwarx to I/O controller space,
188 address matching DABR, eciwx, etc. */
189 goto bad_area;
190 #endif /* CONFIG_6xx */
191 #if defined(CONFIG_8xx)
192 /* The MPC8xx seems to always set 0x80000000, which is
193 * "undefined". Of those that can be set, this is the only
194 * one which seems bad.
195 */
196 if (error_code & 0x10000000)
197 /* Guarded storage error. */
198 goto bad_area;
199 #endif /* CONFIG_8xx */
200
201 /* a write */
202 if (is_write) {
203 if (!(vma->vm_flags & VM_WRITE))
204 goto bad_area;
205 /* a read */
206 } else {
207 /* protection fault */
208 if (error_code & 0x08000000)
209 goto bad_area;
210 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
211 goto bad_area;
212 }
213
214 /*
215 * If for any reason at all we couldn't handle the fault,
216 * make sure we exit gracefully rather than endlessly redo
217 * the fault.
218 */
219 survive:
220 switch (handle_mm_fault(mm, vma, address, is_write)) {
221 case 1:
222 current->min_flt++;
223 break;
224 case 2:
225 current->maj_flt++;
226 break;
227 case 0:
228 goto do_sigbus;
229 default:
230 goto out_of_memory;
231 }
232
233 up_read(&mm->mmap_sem);
234 /*
235 * keep track of tlb+htab misses that are good addrs but
236 * just need pte's created via handle_mm_fault()
237 * -- Cort
238 */
239 pte_misses++;
240 return;
241
242 bad_area:
243 up_read(&mm->mmap_sem);
244 pte_errors++;
245
246 /* User mode accesses cause a SIGSEGV */
247 if (user_mode(regs)) {
248 info.si_signo = SIGSEGV;
249 info.si_errno = 0;
250 info.si_code = code;
251 info.si_addr = (void *) address;
252 force_sig_info(SIGSEGV, &info, current);
253 return;
254 }
255
256 bad_page_fault(regs, address, SIGSEGV);
257 return;
258
259 /*
260 * We ran out of memory, or some other thing happened to us that made
261 * us unable to handle the page fault gracefully.
262 */
263 out_of_memory:
264 if (current->pid == 1) {
265 yield();
266 goto survive;
267 }
268 up_read(&mm->mmap_sem);
269 printk("VM: killing process %s\n", current->comm);
270 if (user_mode(regs))
271 do_exit(SIGKILL);
272 bad_page_fault(regs, address, SIGKILL);
273 return;
274
275 do_sigbus:
276 up_read(&mm->mmap_sem);
277 info.si_signo = SIGBUS;
278 info.si_errno = 0;
279 info.si_code = BUS_ADRERR;
280 info.si_addr = (void *)address;
281 force_sig_info (SIGBUS, &info, current);
282 if (!user_mode(regs))
283 bad_page_fault(regs, address, SIGBUS);
284 }
285
286 /*
287 * bad_page_fault is called when we have a bad access from the kernel.
288 * It is called from do_page_fault above and from some of the procedures
289 * in traps.c.
290 */
291 void
bad_page_fault(struct pt_regs * regs,unsigned long address,int sig)292 bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
293 {
294 extern void die(const char *,struct pt_regs *,long);
295
296 unsigned long fixup;
297
298 /* Are we prepared to handle this fault? */
299 if ((fixup = search_exception_table(regs->nip)) != 0) {
300 regs->nip = fixup;
301 return;
302 }
303
304 /* kernel has accessed a bad area */
305 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
306 if (debugger_kernel_faults)
307 debugger(regs);
308 #endif
309 die("kernel access of bad area", regs, sig);
310 }
311
312 #ifdef CONFIG_8xx
313
314 /* The pgtable.h claims some functions generically exist, but I
315 * can't find them......
316 */
va_to_pte(unsigned long address)317 pte_t *va_to_pte(unsigned long address)
318 {
319 pgd_t *dir;
320 pmd_t *pmd;
321 pte_t *pte;
322 struct mm_struct *mm;
323
324 if (address < TASK_SIZE)
325 mm = current->mm;
326 else
327 mm = &init_mm;
328
329 dir = pgd_offset(mm, address & PAGE_MASK);
330 if (dir) {
331 pmd = pmd_offset(dir, address & PAGE_MASK);
332 if (pmd && pmd_present(*pmd)) {
333 pte = pte_offset(pmd, address & PAGE_MASK);
334 if (pte && pte_present(*pte)) {
335 return(pte);
336 }
337 }
338 else {
339 return (0);
340 }
341 }
342 else {
343 return (0);
344 }
345 return (0);
346 }
347
va_to_phys(unsigned long address)348 unsigned long va_to_phys(unsigned long address)
349 {
350 pte_t *pte;
351
352 pte = va_to_pte(address);
353 if (pte)
354 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
355 return (0);
356 }
357
358 void
print_8xx_pte(struct mm_struct * mm,unsigned long addr)359 print_8xx_pte(struct mm_struct *mm, unsigned long addr)
360 {
361 pgd_t * pgd;
362 pmd_t * pmd;
363 pte_t * pte;
364
365 printk(" pte @ 0x%8lx: ", addr);
366 pgd = pgd_offset(mm, addr & PAGE_MASK);
367 if (pgd) {
368 pmd = pmd_offset(pgd, addr & PAGE_MASK);
369 if (pmd && pmd_present(*pmd)) {
370 pte = pte_offset(pmd, addr & PAGE_MASK);
371 if (pte) {
372 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
373 (long)pgd, (long)pte, (long)pte_val(*pte));
374 #define pp ((long)pte_val(*pte))
375 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
376 "CI: %lx v: %lx\n",
377 pp>>12, /* rpn */
378 (pp>>10)&3, /* pp */
379 (pp>>3)&1, /* small */
380 (pp>>2)&1, /* shared */
381 (pp>>1)&1, /* cache inhibit */
382 pp&1 /* valid */
383 );
384 #undef pp
385 }
386 else {
387 printk("no pte\n");
388 }
389 }
390 else {
391 printk("no pmd\n");
392 }
393 }
394 else {
395 printk("no pgd\n");
396 }
397 }
398
399 int
get_8xx_pte(struct mm_struct * mm,unsigned long addr)400 get_8xx_pte(struct mm_struct *mm, unsigned long addr)
401 {
402 pgd_t * pgd;
403 pmd_t * pmd;
404 pte_t * pte;
405 int retval = 0;
406
407 pgd = pgd_offset(mm, addr & PAGE_MASK);
408 if (pgd) {
409 pmd = pmd_offset(pgd, addr & PAGE_MASK);
410 if (pmd && pmd_present(*pmd)) {
411 pte = pte_offset(pmd, addr & PAGE_MASK);
412 if (pte) {
413 retval = (int)pte_val(*pte);
414 }
415 }
416 }
417 return(retval);
418 }
419 #endif /* CONFIG_8xx */
420