1 /*
2 * linux/arch/cris/mm/fault.c
3 *
4 * Copyright (C) 2000, 2001 Axis Communications AB
5 *
6 * Authors: Bjorn Wesen
7 *
8 * $Log: fault.c,v $
9 * Revision 1.23 2003/10/16 05:32:32 starvik
10 * Only read TLB_SELECT if DEBUG
11 *
12 * Revision 1.22 2003/07/07 09:07:04 johana
13 * Added special CONFIG_ETRAX_DEBUG_INTERRUPT handling here
14 * to deal with a di in entry.S
15 *
16 * Revision 1.21 2002/05/28 14:24:56 bjornw
17 * Corrected typo
18 *
19 * Revision 1.20 2001/11/22 13:34:06 bjornw
20 * * Bug workaround (LX TR89): force a rerun of the whole of an interrupted
21 * unaligned write, because the second half of the write will be corrupted
22 * otherwise. Affected unaligned writes spanning not-yet mapped pages.
23 * * Optimization: use the wr_rd bit in R_MMU_CAUSE to know whether a miss
24 * was due to a read or a write (before we didn't know this until the next
25 * restart of the interrupted instruction, thus wasting one fault-irq)
26 *
27 * Revision 1.19 2001/11/12 19:02:10 pkj
28 * Fixed compiler warnings.
29 *
30 * Revision 1.18 2001/07/18 22:14:32 bjornw
31 * Enable interrupts in the bulk of do_page_fault
32 *
33 * Revision 1.17 2001/07/18 13:07:23 bjornw
34 * * Detect non-existant PTE's in vmalloc pmd synchronization
35 * * Remove comment about fast-paths for VMALLOC_START etc, because all that
36 * was totally bogus anyway it turned out :)
37 * * Fix detection of vmalloc-area synchronization
38 * * Add some comments
39 *
40 * Revision 1.16 2001/06/13 00:06:08 bjornw
41 * current_pgd should be volatile
42 *
43 * Revision 1.15 2001/06/13 00:02:23 bjornw
44 * Use a separate variable to store the current pgd to avoid races in schedule
45 *
46 * Revision 1.14 2001/05/16 17:41:07 hp
47 * Last comment tweak further tweaked.
48 *
49 * Revision 1.13 2001/05/15 00:58:44 hp
50 * Expand a bit on the comment why we compare address >= TASK_SIZE rather
51 * than >= VMALLOC_START.
52 *
53 * Revision 1.12 2001/04/04 10:51:14 bjornw
54 * mmap_sem is grabbed for reading
55 *
56 * Revision 1.11 2001/03/23 07:36:07 starvik
57 * Corrected according to review remarks
58 *
59 * Revision 1.10 2001/03/21 16:10:11 bjornw
60 * CRIS_FRAME_FIXUP not needed anymore, use FRAME_NORMAL
61 *
62 * Revision 1.9 2001/03/05 13:22:20 bjornw
63 * Spell-fix and fix in vmalloc_fault handling
64 *
65 * Revision 1.8 2000/11/22 14:45:31 bjornw
66 * * 2.4.0-test10 removed the set_pgdir instantaneous kernel global mapping
67 * into all processes. Instead we fill in the missing PTE entries on demand.
68 *
69 * Revision 1.7 2000/11/21 16:39:09 bjornw
70 * fixup switches frametype
71 *
72 * Revision 1.6 2000/11/17 16:54:08 bjornw
73 * More detailed siginfo reporting
74 *
75 *
76 */
77
78 #include <linux/config.h>
79 #include <linux/signal.h>
80 #include <linux/sched.h>
81 #include <linux/kernel.h>
82 #include <linux/errno.h>
83 #include <linux/string.h>
84 #include <linux/types.h>
85 #include <linux/ptrace.h>
86 #include <linux/mman.h>
87 #include <linux/mm.h>
88 #include <linux/interrupt.h>
89
90 #include <asm/system.h>
91 #include <asm/segment.h>
92 #include <asm/pgtable.h>
93 #include <asm/uaccess.h>
94 #include <asm/svinto.h>
95
96 extern void die_if_kernel(const char *,struct pt_regs *,long);
97
98 asmlinkage void do_invalid_op (struct pt_regs *, unsigned long);
99 asmlinkage void do_page_fault(unsigned long address, struct pt_regs *regs,
100 int error_code);
101
102 /* debug of low-level TLB reload */
103 #undef DEBUG
104
105 #ifdef DEBUG
106 #define D(x) x
107 #else
108 #define D(x)
109 #endif
110
111 /* debug of higher-level faults */
112 #define DPG(x)
113
114 /* current active page directory */
115
116 volatile pgd_t *current_pgd;
117
118 /* fast TLB-fill fault handler
119 * this is called from entry.S with interrupts disabled
120 */
121
122 void
handle_mmu_bus_fault(struct pt_regs * regs)123 handle_mmu_bus_fault(struct pt_regs *regs)
124 {
125 int cause;
126 #ifdef DEBUG
127 int select;
128 int index;
129 int page_id;
130 int acc, inv;
131 #endif
132 int miss, we, writeac;
133 pmd_t *pmd;
134 pte_t pte;
135 int errcode;
136 unsigned long address;
137
138 #ifdef CONFIG_ETRAX_DEBUG_INTERRUPT /* The di is actually in entry.S */
139 log_int(rdpc(), regs->dccr, 0);
140 #endif
141 cause = *R_MMU_CAUSE;
142
143 address = cause & PAGE_MASK; /* get faulting address */
144
145 #ifdef DEBUG
146 select = *R_TLB_SELECT;
147 page_id = IO_EXTRACT(R_MMU_CAUSE, page_id, cause);
148 acc = IO_EXTRACT(R_MMU_CAUSE, acc_excp, cause);
149 inv = IO_EXTRACT(R_MMU_CAUSE, inv_excp, cause);
150 index = IO_EXTRACT(R_TLB_SELECT, index, select);
151 #endif
152 miss = IO_EXTRACT(R_MMU_CAUSE, miss_excp, cause);
153 we = IO_EXTRACT(R_MMU_CAUSE, we_excp, cause);
154 writeac = IO_EXTRACT(R_MMU_CAUSE, wr_rd, cause);
155
156 /* ETRAX 100LX TR89 bugfix: if the second half of an unaligned
157 * write causes a MMU-fault, it will not be restarted correctly.
158 * This could happen if a write crosses a page-boundary and the
159 * second page is not yet COW'ed or even loaded. The workaround
160 * is to clear the unaligned bit in the CPU status record, so
161 * that the CPU will rerun both the first and second halves of
162 * the instruction. This will not have any sideeffects unless
163 * the first half goes to any device or memory that can't be
164 * written twice, and which is mapped through the MMU.
165 *
166 * We only need to do this for writes.
167 */
168
169 if(writeac)
170 regs->csrinstr &= ~(1 << 5);
171
172 /* Set errcode's R/W flag according to the mode which caused the
173 * fault
174 */
175
176 errcode = writeac << 1;
177
178 D(printk("bus_fault from IRP 0x%lx: addr 0x%lx, miss %d, inv %d, we %d, acc %d, dx %d pid %d\n",
179 regs->irp, address, miss, inv, we, acc, index, page_id));
180
181 /* for a miss, we need to reload the TLB entry */
182
183 if (miss) {
184 /* see if the pte exists at all
185 * refer through current_pgd, dont use mm->pgd
186 */
187
188 pmd = (pmd_t *)(current_pgd + pgd_index(address));
189 if (pmd_none(*pmd))
190 goto dofault;
191 if (pmd_bad(*pmd)) {
192 printk("bad pgdir entry 0x%lx at 0x%p\n", *(unsigned long*)pmd, pmd);
193 pmd_clear(pmd);
194 return;
195 }
196 pte = *pte_offset(pmd, address);
197 if (!pte_present(pte))
198 goto dofault;
199
200 #ifdef DEBUG
201 printk(" found pte %lx pg %p ", pte_val(pte), pte_page(pte));
202 if (pte_val(pte) & _PAGE_SILENT_WRITE)
203 printk("Silent-W ");
204 if (pte_val(pte) & _PAGE_KERNEL)
205 printk("Kernel ");
206 if (pte_val(pte) & _PAGE_SILENT_READ)
207 printk("Silent-R ");
208 if (pte_val(pte) & _PAGE_GLOBAL)
209 printk("Global ");
210 if (pte_val(pte) & _PAGE_PRESENT)
211 printk("Present ");
212 if (pte_val(pte) & _PAGE_ACCESSED)
213 printk("Accessed ");
214 if (pte_val(pte) & _PAGE_MODIFIED)
215 printk("Modified ");
216 if (pte_val(pte) & _PAGE_READ)
217 printk("Readable ");
218 if (pte_val(pte) & _PAGE_WRITE)
219 printk("Writeable ");
220 printk("\n");
221 #endif
222
223 /* load up the chosen TLB entry
224 * this assumes the pte format is the same as the TLB_LO layout.
225 *
226 * the write to R_TLB_LO also writes the vpn and page_id fields from
227 * R_MMU_CAUSE, which we in this case obviously want to keep
228 */
229
230 *R_TLB_LO = pte_val(pte);
231
232 return;
233 }
234
235 errcode = 1 | (we << 1);
236
237 dofault:
238 /* leave it to the MM system fault handler below */
239 D(printk("do_page_fault %lx errcode %d\n", address, errcode));
240 do_page_fault(address, regs, errcode);
241 }
242
243 /*
244 * This routine handles page faults. It determines the address,
245 * and the problem, and then passes it off to one of the appropriate
246 * routines.
247 *
248 * Notice that the address we're given is aligned to the page the fault
249 * occurred in, since we only get the PFN in R_MMU_CAUSE not the complete
250 * address.
251 *
252 * error_code:
253 * bit 0 == 0 means no page found, 1 means protection fault
254 * bit 1 == 0 means read, 1 means write
255 *
256 * If this routine detects a bad access, it returns 1, otherwise it
257 * returns 0.
258 */
259
260 asmlinkage void
do_page_fault(unsigned long address,struct pt_regs * regs,int error_code)261 do_page_fault(unsigned long address, struct pt_regs *regs,
262 int error_code)
263 {
264 struct task_struct *tsk;
265 struct mm_struct *mm;
266 struct vm_area_struct * vma;
267 int writeaccess;
268 unsigned long fixup;
269 siginfo_t info;
270
271 tsk = current;
272
273 /*
274 * We fault-in kernel-space virtual memory on-demand. The
275 * 'reference' page table is init_mm.pgd.
276 *
277 * NOTE! We MUST NOT take any locks for this case. We may
278 * be in an interrupt or a critical region, and should
279 * only copy the information from the master page table,
280 * nothing more.
281 *
282 * NOTE2: This is done so that, when updating the vmalloc
283 * mappings we don't have to walk all processes pgdirs and
284 * add the high mappings all at once. Instead we do it as they
285 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
286 * bit set so sometimes the TLB can use a lingering entry.
287 *
288 * This verifies that the fault happens in kernel space
289 * and that the fault was not a protection error (error_code & 1).
290 */
291
292 if (address >= VMALLOC_START &&
293 !(error_code & 1) &&
294 !user_mode(regs))
295 goto vmalloc_fault;
296
297 /* we can and should enable interrupts at this point */
298 sti();
299
300 mm = tsk->mm;
301 writeaccess = error_code & 2;
302 info.si_code = SEGV_MAPERR;
303
304 /*
305 * If we're in an interrupt or have no user
306 * context, we must not take the fault..
307 */
308
309 if (in_interrupt() || !mm)
310 goto no_context;
311
312 down_read(&mm->mmap_sem);
313 vma = find_vma(mm, address);
314 if (!vma)
315 goto bad_area;
316 if (vma->vm_start <= address)
317 goto good_area;
318 if (!(vma->vm_flags & VM_GROWSDOWN))
319 goto bad_area;
320 if (user_mode(regs)) {
321 /*
322 * accessing the stack below usp is always a bug.
323 * we get page-aligned addresses so we can only check
324 * if we're within a page from usp, but that might be
325 * enough to catch brutal errors at least.
326 */
327 if (address + PAGE_SIZE < rdusp())
328 goto bad_area;
329 }
330 if (expand_stack(vma, address))
331 goto bad_area;
332
333 /*
334 * Ok, we have a good vm_area for this memory access, so
335 * we can handle it..
336 */
337
338 good_area:
339 info.si_code = SEGV_ACCERR;
340
341 /* first do some preliminary protection checks */
342
343 if (writeaccess) {
344 if (!(vma->vm_flags & VM_WRITE))
345 goto bad_area;
346 } else {
347 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
348 goto bad_area;
349 }
350
351 /*
352 * If for any reason at all we couldn't handle the fault,
353 * make sure we exit gracefully rather than endlessly redo
354 * the fault.
355 */
356
357 switch (handle_mm_fault(mm, vma, address, writeaccess)) {
358 case 1:
359 tsk->min_flt++;
360 break;
361 case 2:
362 tsk->maj_flt++;
363 break;
364 case 0:
365 goto do_sigbus;
366 default:
367 goto out_of_memory;
368 }
369
370 up_read(&mm->mmap_sem);
371 return;
372
373 /*
374 * Something tried to access memory that isn't in our memory map..
375 * Fix it, but check if it's kernel or user first..
376 */
377
378 bad_area:
379 up_read(&mm->mmap_sem);
380
381 bad_area_nosemaphore:
382 DPG(show_registers(regs));
383
384 /* User mode accesses just cause a SIGSEGV */
385
386 if (user_mode(regs)) {
387 info.si_signo = SIGSEGV;
388 info.si_errno = 0;
389 /* info.si_code has been set above */
390 info.si_addr = (void *)address;
391 force_sig_info(SIGSEGV, &info, tsk);
392 return;
393 }
394
395 no_context:
396
397 /* Are we prepared to handle this kernel fault?
398 *
399 * (The kernel has valid exception-points in the source
400 * when it acesses user-memory. When it fails in one
401 * of those points, we find it in a table and do a jump
402 * to some fixup code that loads an appropriate error
403 * code)
404 */
405
406 if ((fixup = search_exception_table(regs->irp)) != 0) {
407 /* Adjust the instruction pointer in the stackframe */
408
409 regs->irp = fixup;
410
411 /* We do not want to return by restoring the CPU-state
412 * anymore, so switch frame-types (see ptrace.h)
413 */
414
415 regs->frametype = CRIS_FRAME_NORMAL;
416
417 D(printk("doing fixup to 0x%lx\n", fixup));
418 return;
419 }
420
421 /*
422 * Oops. The kernel tried to access some bad page. We'll have to
423 * terminate things with extreme prejudice.
424 */
425
426 if ((unsigned long) (address) < PAGE_SIZE)
427 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
428 else
429 printk(KERN_ALERT "Unable to handle kernel access");
430 printk(" at virtual address %08lx\n",address);
431
432 die_if_kernel("Oops", regs, error_code);
433
434 do_exit(SIGKILL);
435
436 /*
437 * We ran out of memory, or some other thing happened to us that made
438 * us unable to handle the page fault gracefully.
439 */
440
441 out_of_memory:
442 up_read(&mm->mmap_sem);
443 printk("VM: killing process %s\n", tsk->comm);
444 if (user_mode(regs))
445 do_exit(SIGKILL);
446 goto no_context;
447
448 do_sigbus:
449 up_read(&mm->mmap_sem);
450
451 /*
452 * Send a sigbus, regardless of whether we were in kernel
453 * or user mode.
454 */
455 info.si_signo = SIGBUS;
456 info.si_errno = 0;
457 info.si_code = BUS_ADRERR;
458 info.si_addr = (void *)address;
459 force_sig_info(SIGBUS, &info, tsk);
460
461 /* Kernel mode? Handle exceptions or die */
462 if (!user_mode(regs))
463 goto no_context;
464 return;
465
466 vmalloc_fault:
467 {
468 /*
469 * Synchronize this task's top level page-table
470 * with the 'reference' page table.
471 *
472 * Use current_pgd instead of tsk->active_mm->pgd
473 * since the latter might be unavailable if this
474 * code is executed in a misfortunately run irq
475 * (like inside schedule() between switch_mm and
476 * switch_to...).
477 */
478
479 int offset = pgd_index(address);
480 pgd_t *pgd, *pgd_k;
481 pmd_t *pmd, *pmd_k;
482 pte_t *pte_k;
483
484 pgd = (pgd_t *)current_pgd + offset;
485 pgd_k = init_mm.pgd + offset;
486
487 /* Since we're two-level, we don't need to do both
488 * set_pgd and set_pmd (they do the same thing). If
489 * we go three-level at some point, do the right thing
490 * with pgd_present and set_pgd here.
491 *
492 * Also, since the vmalloc area is global, we don't
493 * need to copy individual PTE's, it is enough to
494 * copy the pgd pointer into the pte page of the
495 * root task. If that is there, we'll find our pte if
496 * it exists.
497 */
498
499 pmd = pmd_offset(pgd, address);
500 pmd_k = pmd_offset(pgd_k, address);
501
502 if (!pmd_present(*pmd_k))
503 goto bad_area_nosemaphore;
504
505 set_pmd(pmd, *pmd_k);
506
507 /* Make sure the actual PTE exists as well to
508 * catch kernel vmalloc-area accesses to non-mapped
509 * addresses. If we don't do this, this will just
510 * silently loop forever.
511 */
512
513 pte_k = pte_offset(pmd_k, address);
514 if (!pte_present(*pte_k))
515 goto no_context;
516
517 return;
518 }
519 }
520