1 /*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/cache.c
7 *
8 * Original version Copyright (C) 2000, 2001 Paolo Alberelli
9 * Second version Copyright (C) benedict.gaster@superh.com 2002
10 * Third version Copyright Richard.Curnow@superh.com 2003
11 * Hacks to third version Copyright (C) 2003 Paul Mundt
12 */
13
14 /****************************************************************************/
15
16 #include <linux/config.h>
17 #include <linux/init.h>
18 #include <linux/mman.h>
19 #include <linux/mm.h>
20 #include <linux/threads.h>
21 #include <asm/page.h>
22 #include <asm/pgtable.h>
23 #include <asm/processor.h>
24 #include <asm/cache.h>
25 #include <asm/tlb.h>
26 #include <asm/io.h>
27 #include <asm/uaccess.h>
28 #include <asm/mmu_context.h>
29 #include <asm/pgalloc.h> /* for flush_itlb_range */
30
31 #include <linux/proc_fs.h>
32
33 /* This function is in entry.S */
34 extern unsigned long switch_and_save_asid(unsigned long new_asid);
35
36 /* Wired TLB entry for the D-cache */
37 static unsigned long long dtlb_cache_slot;
38
39 /**
40 * sh64_cache_init()
41 *
42 * This is pretty much just a straightforward clone of the SH
43 * detect_cpu_and_cache_system().
44 *
45 * This function is responsible for setting up all of the cache
46 * info dynamically as well as taking care of CPU probing and
47 * setting up the relevant subtype data.
48 *
49 * FIXME: For the time being, we only really support the SH5-101
50 * out of the box, and don't support dynamic probing for things
51 * like the SH5-103 or even cut2 of the SH5-101. Implement this
52 * later!
53 */
sh64_cache_init(void)54 int __init sh64_cache_init(void)
55 {
56 /*
57 * First, setup some sane values for the I-cache.
58 */
59 cpu_data->icache.ways = 4;
60 cpu_data->icache.sets = 256;
61 cpu_data->icache.linesz = L1_CACHE_BYTES;
62
63 /*
64 * FIXME: This can probably be cleaned up a bit as well.. for example,
65 * do we really need the way shift _and_ the way_step_shift ?? Judging
66 * by the existing code, I would guess no.. is there any valid reason
67 * why we need to be tracking this around?
68 */
69 cpu_data->icache.way_shift = 13;
70 cpu_data->icache.entry_shift = 5;
71 cpu_data->icache.set_shift = 4;
72 cpu_data->icache.way_step_shift = 16;
73 cpu_data->icache.asid_shift = 2;
74
75 /*
76 * way offset = cache size / associativity, so just don't factor in
77 * associativity in the first place..
78 */
79 cpu_data->icache.way_ofs = cpu_data->icache.sets *
80 cpu_data->icache.linesz;
81
82 cpu_data->icache.asid_mask = 0x3fc;
83 cpu_data->icache.idx_mask = 0x1fe0;
84 cpu_data->icache.epn_mask = 0xffffe000;
85 cpu_data->icache.flags = 0;
86
87 /*
88 * Next, setup some sane values for the D-cache.
89 *
90 * On the SH5, these are pretty consistent with the I-cache settings,
91 * so we just copy over the existing definitions.. these can be fixed
92 * up later, especially if we add runtime CPU probing.
93 *
94 * Though in the meantime it saves us from having to duplicate all of
95 * the above definitions..
96 */
97 cpu_data->dcache = cpu_data->icache;
98
99 /*
100 * Setup any cache-related flags here
101 */
102 #if defined(CONFIG_DCACHE_WRITE_THROUGH)
103 set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
104 #elif defined(CONFIG_DCACHE_WRITE_BACK)
105 set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
106 #endif
107
108 /*
109 * We also need to reserve a slot for the D-cache in the DTLB, so we
110 * do this now ..
111 */
112 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
113
114 return 0;
115 }
116
117 /*##########################################################################*/
118
119 /* From here onwards, a rewrite of the implementation,
120 by Richard.Curnow@superh.com.
121
122 The major changes in this compared to the old version are;
123 1. use more selective purging through OCBP instead of using ALLOCO to purge
124 by natural replacement. This avoids purging out unrelated cache lines
125 that happen to be in the same set.
126 2. exploit the APIs copy_user_page and clear_user_page better
127 3. be more selective about I-cache purging, in particular use invalidate_all
128 more sparingly.
129
130 NOTE : all this code runs in process context. As long as it's not used in a
131 pre-emptible or SMP kernel, there are no issues with locking, reentrancy
132 etc. When it gets used in a such a kernel this will have to be addressed.
133 (Note, there is no SMP realisation of sh64 yet.)
134 */
135
136 /*##########################################################################
137 SUPPORT FUNCTIONS
138 ##########################################################################*/
139
140 /****************************************************************************/
141 /* The following group of functions deal with mapping and unmapping a temporary
142 page into the DTLB slot that have been set aside for our exclusive use. */
143 /* In order to accomplish this, we use the generic interface for adding and
144 removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
145 /****************************************************************************/
146
sh64_setup_dtlb_cache_slot(unsigned long eaddr,unsigned long asid,unsigned long paddr)147 static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
148 {
149 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
150 }
151
sh64_teardown_dtlb_cache_slot(void)152 static inline void sh64_teardown_dtlb_cache_slot(void)
153 {
154 sh64_teardown_tlb_slot(dtlb_cache_slot);
155 }
156
157 /****************************************************************************/
158
159 #ifndef CONFIG_ICACHE_DISABLED
160
sh64_icache_inv_all(void)161 static void __inline__ sh64_icache_inv_all(void)
162 {
163 unsigned long long addr, flag, data;
164 unsigned int flags;
165
166 addr=ICCR0;
167 flag=ICCR0_ICI;
168 data=0;
169
170 /* TODO : does this really need to be a critical section? */
171 save_and_cli(flags);
172
173 /* Without %1 it gets unexplicably wrong */
174 asm volatile("getcfg %3, 0, %0\n\t"
175 "or %0, %2, %0\n\t"
176 "putcfg %3, 0, %0\n\t"
177 "synci"
178 : "=&r" (data)
179 : "0" (data), "r" (flag), "r" (addr));
180
181 restore_flags(flags);
182 }
183
sh64_icache_inv_kernel_range(unsigned long start,unsigned long end)184 static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
185 {
186 /* Invalidate range of addresses [start,end] from the I-cache, where
187 * the addresses lie in the kernel superpage. */
188
189 unsigned long long ullend, addr, aligned_start;
190 /* FIXME : This sign extension needs to be made generic. */
191 aligned_start = (unsigned long long)(signed long long)(signed long) start;
192 aligned_start &= L1_CACHE_ALIGN_MASK;
193 addr = aligned_start;
194 /* FIXME : likewise this one */
195 ullend = (unsigned long long) (signed long long) (signed long) end;
196 while (addr <= ullend) {
197 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
198 addr += L1_CACHE_BYTES;
199 }
200 }
201
sh64_icache_inv_user_page(struct vm_area_struct * vma,unsigned long eaddr)202 static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
203 {
204 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
205 Also, eaddr is page-aligned. */
206
207 /* For SMP/pre-emptible, this will need to be locked to prevent the
208 ASID from changing under us if there's a reschedule. */
209
210 unsigned long long addr, end_addr;
211 addr = eaddr;
212 end_addr = addr + PAGE_SIZE;
213
214
215 /* FIXME : is there any issue using 'current_asid' here? i.e. can this
216 ever get called with 'vma->vm_mm->context.asid' != current_asid?
217 (the normal case would be munmap or a page flush following a COW
218 break etc, and using current_asid is OK for those.). If we need to
219 use vma->vm_mm->context.asid, we'd have to switch ASIDs and run in
220 the context of the victim. */
221 while (addr < end_addr) {
222 /* Worth unrolling a little */
223 asm __volatile__("icbi %0, 0" : : "r" (addr));
224 asm __volatile__("icbi %0, 32" : : "r" (addr));
225 asm __volatile__("icbi %0, 64" : : "r" (addr));
226 asm __volatile__("icbi %0, 96" : : "r" (addr));
227 addr += 128;
228 }
229 return;
230 }
231
232 /****************************************************************************/
233
sh64_icache_inv_user_page_range(struct mm_struct * mm,unsigned long start,unsigned long end)234 static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
235 unsigned long start, unsigned long end)
236 {
237 /* Used for invalidating big chunks of I-cache, i.e. assume the range
238 is whole pages. If 'start' or 'end' is not page aligned, the code
239 is conservative and invalidates to the ends of the enclosing pages.
240 This is functionally OK, just a performance loss. */
241
242 /* See the comments below in sh64_dcache_purge_user_range() regarding
243 the choice of algorithm. However, for the I-cache option (2) isn't
244 available because there are no physical tags so aliases can't be
245 resolved. The icbi instruction has to be used through the user
246 mapping. Because icbi is cheaper than ocbp on a cache hit, it
247 would be cheaper to use the selective code for a large range than is
248 possible with the D-cache. Just assume 64 for now as a working
249 figure.
250 */
251
252 int n_pages;
253
254 if (!mm) return;
255
256 n_pages = ((end - start) >> PAGE_SHIFT);
257 if (n_pages >= 64) {
258 sh64_icache_inv_all();
259 } else {
260 unsigned long aligned_start;
261 unsigned long eaddr;
262 unsigned long after_last_page_start;
263 unsigned long mm_asid, current_asid;
264 unsigned long long flags = 0ULL;
265
266 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
267 current_asid = get_asid();
268
269 if (mm_asid != current_asid) {
270 /* Switch ASID and run the invalidate loop under cli */
271 save_and_cli(flags);
272 switch_and_save_asid(mm_asid);
273 }
274
275 aligned_start = start & PAGE_MASK;
276 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
277
278 while (aligned_start < after_last_page_start) {
279 struct vm_area_struct *vma;
280 unsigned long vma_end;
281 vma = find_vma(mm, aligned_start);
282 if (!vma || (aligned_start <= vma->vm_end)) {
283 /* Avoid getting stuck in an error condition */
284 aligned_start += PAGE_SIZE;
285 continue;
286 }
287 vma_end = vma->vm_end;
288 if (vma->vm_flags & VM_EXEC) {
289 /* Executable */
290 eaddr = aligned_start;
291 while (eaddr < vma_end) {
292 sh64_icache_inv_user_page(vma, eaddr);
293 eaddr += PAGE_SIZE;
294 }
295 }
296 aligned_start = vma->vm_end; /* Skip to start of next region */
297 }
298 if (mm_asid != current_asid) {
299 switch_and_save_asid(current_asid);
300 restore_flags(flags);
301 }
302 }
303 }
304
sh64_icache_inv_user_small_range(struct mm_struct * mm,unsigned long start,int len)305 static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
306 unsigned long start, int len)
307 {
308
309 /* Invalidate a small range of user context I-cache, not necessarily
310 page (or even cache-line) aligned. */
311
312 unsigned long long eaddr = start;
313 unsigned long long eaddr_end = start + len;
314 unsigned long current_asid, mm_asid;
315 unsigned long long flags;
316 unsigned long long epage_start;
317
318 /* Since this is used inside ptrace, the ASID in the mm context
319 typically won't match current_asid. We'll have to switch ASID to do
320 this. For safety, and given that the range will be small, do all
321 this under cli.
322
323 Note, there is a hazard that the ASID in mm->context is no longer
324 actually associated with mm, i.e. if the mm->context has started a
325 new cycle since mm was last active. However, this is just a
326 performance issue: all that happens is that we invalidate lines
327 belonging to another mm, so the owning process has to refill them
328 when that mm goes live again. mm itself can't have any cache
329 entries because there will have been a flush_cache_all when the new
330 mm->context cycle started. */
331
332 /* Align to start of cache line. Otherwise, suppose len==8 and start
333 was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
334 eaddr = start & L1_CACHE_ALIGN_MASK;
335 eaddr_end = start + len;
336
337 save_and_cli(flags);
338 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
339 current_asid = switch_and_save_asid(mm_asid);
340
341 epage_start = eaddr & PAGE_MASK;
342
343 while (eaddr < eaddr_end)
344 {
345 asm __volatile__("icbi %0, 0" : : "r" (eaddr));
346 eaddr += L1_CACHE_BYTES;
347 }
348 switch_and_save_asid(current_asid);
349 restore_flags(flags);
350 }
351
sh64_icache_inv_current_user_range(unsigned long start,unsigned long end)352 static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
353 {
354 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a
355 cache hit on the virtual tag the instruction ends there, without a
356 TLB lookup. */
357
358 unsigned long long aligned_start;
359 unsigned long long ull_end;
360 unsigned long long addr;
361
362 ull_end = end;
363
364 /* Just invalidate over the range using the natural addresses. TLB
365 miss handling will be OK (TBC). Since it's for the current process,
366 either we're already in the right ASID context, or the ASIDs have
367 been recycled since we were last active in which case we might just
368 invalidate another processes I-cache entries : no worries, just a
369 performance drop for him. */
370 aligned_start = start & L1_CACHE_ALIGN_MASK;
371 addr = aligned_start;
372 while (addr < ull_end) {
373 asm __volatile__ ("icbi %0, 0" : : "r" (addr));
374 asm __volatile__ ("nop");
375 asm __volatile__ ("nop");
376 addr += L1_CACHE_BYTES;
377 }
378 }
379
380 #endif /* !CONFIG_ICACHE_DISABLED */
381
382 /****************************************************************************/
383
384 #ifndef CONFIG_DCACHE_DISABLED
385
386 /* Buffer used as the target of alloco instructions to purge data from cache
387 sets by natural eviction. -- RPC */
388 #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
389 static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
390
391 /****************************************************************************/
392
sh64_dcache_purge_sets(int sets_to_purge_base,int n_sets)393 static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
394 {
395 /* Purge all ways in a particular block of sets, specified by the base
396 set number and number of sets. Can handle wrap-around, if that's
397 needed. */
398
399 int dummy_buffer_base_set;
400 unsigned long long eaddr, eaddr0, eaddr1;
401 int j;
402 int set_offset;
403
404 dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
405 set_offset = sets_to_purge_base - dummy_buffer_base_set;
406
407 for (j=0; j<n_sets; j++, set_offset++) {
408 set_offset &= (cpu_data->dcache.sets - 1);
409 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
410
411 /* Do one alloco which hits the required set per cache way. For
412 write-back mode, this will purge the #ways resident lines. There's
413 little point unrolling this loop because the allocos stall more if
414 they're too close together. */
415 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
416 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
417 asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
418 }
419
420 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
421 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
422 /* Load from each address. Required because alloco is a NOP if
423 the cache is write-through. Write-through is a config option. */
424 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
425 *(volatile unsigned char *)(int)eaddr;
426 }
427 }
428
429 /* Don't use OCBI to invalidate the lines. That costs cycles directly.
430 If the dummy block is just left resident, it will naturally get
431 evicted as required. */
432
433 return;
434 }
435
436 /****************************************************************************/
437
sh64_dcache_purge_all(void)438 static void sh64_dcache_purge_all(void)
439 {
440 /* Purge the entire contents of the dcache. The most efficient way to
441 achieve this is to use alloco instructions on a region of unused
442 memory equal in size to the cache, thereby causing the current
443 contents to be discarded by natural eviction. The alternative,
444 namely reading every tag, setting up a mapping for the corresponding
445 page and doing an OCBP for the line, would be much more expensive.
446 */
447
448 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
449
450 return;
451
452 }
453
454 /****************************************************************************/
455
sh64_dcache_purge_kernel_range(unsigned long start,unsigned long end)456 static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
457 {
458 /* Purge the range of addresses [start,end] from the D-cache. The
459 addresses lie in the superpage mapping. There's no harm if we
460 overpurge at either end - just a small performance loss. */
461 unsigned long long ullend, addr, aligned_start;
462 /* FIXME : This sign extension needs to be made generic. */
463 aligned_start = (unsigned long long)(signed long long)(signed long) start;
464 aligned_start &= L1_CACHE_ALIGN_MASK;
465 addr = aligned_start;
466 /* FIXME : likewise this one */
467 ullend = (unsigned long long) (signed long long) (signed long) end;
468 while (addr <= ullend) {
469 asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
470 addr += L1_CACHE_BYTES;
471 }
472 return;
473 }
474
475 /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
476 anything else in the kernel */
477 #define MAGIC_PAGE0_START 0xffffffffec000000ULL
478
sh64_dcache_purge_coloured_phy_page(unsigned long paddr,unsigned long eaddr)479 static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
480 {
481 /* Purge the physical page 'paddr' from the cache. It's known that any
482 cache lines requiring attention have the same page colour as the the
483 address 'eaddr'.
484
485 This relies on the fact that the D-cache matches on physical tags
486 when no virtual tag matches. So we create an alias for the original
487 page and purge through that. (Alternatively, we could have done
488 this by switching ASID to match the original mapping and purged
489 through that, but that involves ASID switching cost + probably a
490 TLBMISS + refill anyway.)
491 */
492
493 unsigned long long magic_page_start;
494 unsigned long long magic_eaddr, magic_eaddr_end;
495
496 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
497
498 /* As long as the kernel is not pre-emptible, this doesn't need to be
499 under cli/sti. */
500
501 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
502
503 magic_eaddr = magic_page_start;
504 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
505 while (magic_eaddr < magic_eaddr_end) {
506 /* Little point in unrolling this loop - the OCBPs are blocking
507 and won't go any quicker (i.e. the loop overhead is parallel
508 to part of the OCBP execution.) */
509 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
510 magic_eaddr += L1_CACHE_BYTES;
511 }
512
513 sh64_teardown_dtlb_cache_slot();
514 }
515
516 /****************************************************************************/
517
sh64_dcache_purge_phy_page(unsigned long paddr)518 static void sh64_dcache_purge_phy_page(unsigned long paddr)
519 {
520 /* Pure a page given its physical start address, by creating a
521 temporary 1 page mapping and purging across that. Even if we know
522 the virtual address (& vma or mm) of the page, the method here is
523 more elegant because it avoids issues of coping with page faults on
524 the purge instructions (i.e. no special-case code required in the
525 critical path in the TLB miss handling). */
526
527 unsigned long long eaddr_start, eaddr, eaddr_end;
528 int i;
529
530 /* As long as the kernel is not pre-emptible, this doesn't need to be
531 under cli/sti. */
532
533 eaddr_start = MAGIC_PAGE0_START;
534 for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
535 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
536
537 eaddr = eaddr_start;
538 eaddr_end = eaddr + PAGE_SIZE;
539 while (eaddr < eaddr_end) {
540 asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
541 eaddr += L1_CACHE_BYTES;
542 }
543
544 sh64_teardown_dtlb_cache_slot();
545 eaddr_start += PAGE_SIZE;
546 }
547 }
548
sh64_dcache_purge_virt_page(struct mm_struct * mm,unsigned long eaddr)549 static void sh64_dcache_purge_virt_page(struct mm_struct *mm, unsigned long eaddr)
550 {
551 unsigned long phys;
552 pgd_t *pgd;
553 pmd_t *pmd;
554 pte_t *pte;
555 pte_t entry;
556
557 pgd = pgd_offset(mm, eaddr);
558 pmd = pmd_offset(pgd, eaddr);
559
560 if (pmd_none(*pmd) || pmd_bad(*pmd))
561 return;
562
563 pte = pte_offset(pmd, eaddr);
564 entry = *pte;
565
566 if (pte_none(entry) || !pte_present(entry))
567 return;
568
569 phys = pte_val(entry) & PAGE_MASK;
570
571 sh64_dcache_purge_phy_page(phys);
572 }
573
sh64_dcache_purge_user_page(struct mm_struct * mm,unsigned long eaddr)574 static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
575 {
576 pgd_t *pgd;
577 pmd_t *pmd;
578 pte_t *pte;
579 pte_t entry;
580 unsigned long paddr;
581
582 /* NOTE : all the callers of this have mm->page_table_lock held, so the
583 following page table traversal is safe even on SMP/pre-emptible. */
584
585 if (!mm) return; /* No way to find physical address of page */
586 pgd = pgd_offset(mm, eaddr);
587 if (pgd_bad(*pgd)) return;
588
589 pmd = pmd_offset(pgd, eaddr);
590 if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
591
592 pte = pte_offset(pmd, eaddr);
593 entry = *pte;
594 if (pte_none(entry) || !pte_present(entry)) return;
595
596 paddr = pte_val(entry) & PAGE_MASK;
597
598 sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
599
600 }
601 /****************************************************************************/
602
sh64_dcache_purge_user_range(struct mm_struct * mm,unsigned long start,unsigned long end)603 static void sh64_dcache_purge_user_range(struct mm_struct *mm,
604 unsigned long start, unsigned long end)
605 {
606 /* There are at least 5 choices for the implementation of this, with
607 pros (+), cons(-), comments(*):
608
609 1. ocbp each line in the range through the original user's ASID
610 + no lines spuriously evicted
611 - tlbmiss handling (must either handle faults on demand => extra
612 special-case code in tlbmiss critical path), or map the page in
613 advance (=> flush_tlb_range in advance to avoid multiple hits)
614 - ASID switching
615 - expensive for large ranges
616
617 2. temporarily map each page in the range to a special effective
618 address and ocbp through the temporary mapping; relies on the
619 fact that SH-5 OCB* always do TLB lookup and match on ptags (they
620 never look at the etags)
621 + no spurious evictions
622 - expensive for large ranges
623 * surely cheaper than (1)
624
625 3. walk all the lines in the cache, check the tags, if a match
626 occurs create a page mapping to ocbp the line through
627 + no spurious evictions
628 - tag inspection overhead
629 - (especially for small ranges)
630 - potential cost of setting up/tearing down page mapping for
631 every line that matches the range
632 * cost partly independent of range size
633
634 4. walk all the lines in the cache, check the tags, if a match
635 occurs use 4 * alloco to purge the line (+3 other probably
636 innocent victims) by natural eviction
637 + no tlb mapping overheads
638 - spurious evictions
639 - tag inspection overhead
640
641 5. implement like flush_cache_all
642 + no tag inspection overhead
643 - spurious evictions
644 - bad for small ranges
645
646 (1) can be ruled out as more expensive than (2). (2) appears best
647 for small ranges. The choice between (3), (4) and (5) for large
648 ranges and the range size for the large/small boundary need
649 benchmarking to determine.
650
651 For now use approach (2) for small ranges and (5) for large ones.
652
653 */
654
655 int n_pages;
656
657 n_pages = ((end - start) >> PAGE_SHIFT);
658 if (n_pages >= 64) {
659 #if 1
660 sh64_dcache_purge_all();
661 #else
662 unsigned long long set, way;
663 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
664 for (set = 0; set < cpu_data->dcache.sets; set++) {
665 unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
666 for (way = 0; way < cpu_data->dcache.ways; way++) {
667 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
668 unsigned long long tag0;
669 unsigned long line_valid;
670
671 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
672 line_valid = tag0 & SH_CACHE_VALID;
673 if (line_valid) {
674 unsigned long cache_asid;
675 unsigned long epn;
676
677 cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
678 /* The next line needs some
679 explanation. The virtual tags
680 encode bits [31:13] of the virtual
681 address, bit [12] of the 'tag' being
682 implied by the cache set index. */
683 epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
684
685 if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
686 /* TODO : could optimise this
687 call by batching multiple
688 adjacent sets together. */
689 sh64_dcache_purge_sets(set, 1);
690 break; /* Don't waste time inspecting other ways for this set */
691 }
692 }
693 }
694 }
695 #endif
696 } else {
697 /* 'Small' range */
698 unsigned long aligned_start;
699 unsigned long eaddr;
700 unsigned long last_page_start;
701
702 aligned_start = start & PAGE_MASK;
703 /* 'end' is 1 byte beyond the end of the range */
704 last_page_start = (end - 1) & PAGE_MASK;
705
706 eaddr = aligned_start;
707 while (eaddr <= last_page_start) {
708 sh64_dcache_purge_user_page(mm, eaddr);
709 eaddr += PAGE_SIZE;
710 }
711 }
712 return;
713 }
714
sh64_dcache_wback_current_user_range(unsigned long start,unsigned long end)715 static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
716 {
717 unsigned long long aligned_start;
718 unsigned long long ull_end;
719 unsigned long long addr;
720
721 ull_end = end;
722
723 /* Just wback over the range using the natural addresses. TLB miss
724 handling will be OK (TBC) : the range has just been written to by
725 the signal frame setup code, so the PTEs must exist.
726
727 TODO : with SMP or pre-emptible, run this under cli to guard against
728 the risk of a reschedule causing a new ASID cycle to begin.
729 */
730 aligned_start = start & L1_CACHE_ALIGN_MASK;
731 addr = aligned_start;
732 while (addr < ull_end) {
733 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
734 addr += L1_CACHE_BYTES;
735 }
736 }
737
738 #endif /* !CONFIG_DCACHE_DISABLED */
739
740 /****************************************************************************/
741
742 /* These *MUST* lie in an area of virtual address space that's otherwise unused. */
743 #define UNIQUE_EADDR_START 0xe0000000UL
744 #define UNIQUE_EADDR_END 0xe8000000UL
745
sh64_make_unique_eaddr(unsigned long user_eaddr,unsigned long paddr)746 static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
747 {
748 /* Given a physical address paddr, and a user virtual address
749 user_eaddr which will eventually be mapped to it, create a one-off
750 kernel-private eaddr mapped to the same paddr. This is used for
751 creating special destination pages for copy_user_page and
752 clear_user_page */
753
754 static unsigned long current_pointer = UNIQUE_EADDR_START;
755 unsigned long coloured_pointer;
756
757 if (current_pointer == UNIQUE_EADDR_END) {
758 sh64_dcache_purge_all();
759 current_pointer = UNIQUE_EADDR_START;
760 }
761
762 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
763 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
764
765 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
766
767 return coloured_pointer;
768 }
769
770 /****************************************************************************/
771
sh64_copy_user_page_coloured(void * to,void * from,unsigned long address)772 static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
773 {
774 void *coloured_to;
775
776 /* Discard any existing cache entries of the wrong colour. These are
777 present quite often, if the kernel has recently used the page
778 internally, then given it up, then it's been allocated to the user.
779 */
780 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
781
782 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
783 sh64_page_copy(from, coloured_to);
784
785 sh64_teardown_dtlb_cache_slot();
786 }
787
sh64_clear_user_page_coloured(void * to,unsigned long address)788 static void sh64_clear_user_page_coloured(void *to, unsigned long address)
789 {
790 void *coloured_to;
791
792 /* Discard any existing kernel-originated lines of the wrong colour (as
793 above) */
794 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
795
796 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
797 sh64_page_clear(coloured_to);
798
799 sh64_teardown_dtlb_cache_slot();
800 }
801
802 /****************************************************************************/
803
804 /*##########################################################################
805 EXTERNALLY CALLABLE API.
806 ##########################################################################*/
807
808 /* These functions are described in Documentation/cachetlb.txt.
809 Each one of these functions varies in behaviour depending on whether the
810 I-cache and/or D-cache are configured out.
811
812 Note that the Linux term 'flush' corresponds to what is termed 'purge' in
813 the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
814 invalidate the cache lines, and 'invalidate' for the I-cache.
815 */
816
817 #undef FLUSH_TRACE
818
flush_cache_all(void)819 void flush_cache_all(void)
820 {
821 /* Invalidate the entire contents of both caches, after writing back to
822 memory any dirty data from the D-cache. */
823 sh64_dcache_purge_all();
824 sh64_icache_inv_all();
825 }
826
827 /****************************************************************************/
828
flush_cache_mm(struct mm_struct * mm)829 void flush_cache_mm(struct mm_struct *mm)
830 {
831 /* Invalidate an entire user-address space from both caches, after
832 writing back dirty data (e.g. for shared mmap etc). */
833
834 /* This could be coded selectively by inspecting all the tags then
835 doing 4*alloco on any set containing a match (as for
836 flush_cache_range), but fork/exit/execve (where this is called from)
837 are expensive anyway. */
838
839 /* Have to do a purge here, despite the comments re I-cache below.
840 There could be odd-coloured dirty data associated with the mm still
841 in the cache - if this gets written out through natural eviction
842 after the kernel has reused the page there will be chaos.
843
844 TODO Perhaps a selective purge is appropriate.
845 */
846 sh64_dcache_purge_all();
847
848 /* The mm being torn down won't ever be active again, so any Icache
849 lines tagged with its ASID won't be visible for the rest of the
850 lifetime of this ASID cycle. Before the ASID gets reused, there
851 will be a flush_cache_all. This is similar to the lack of action
852 needed in flush_tlb_mm - see fault.c. */
853 #if 0
854 sh64_icache_inv_all();
855 #endif
856 }
857
858 /****************************************************************************/
859
flush_cache_range(struct mm_struct * mm,unsigned long start,unsigned long end)860 void flush_cache_range(struct mm_struct *mm, unsigned long start,
861 unsigned long end)
862 {
863 /* Invalidate (from both caches) the range [start,end) of virtual
864 addresses from the user address space specified by mm, after writing
865 back any dirty data.
866
867 Note(1), 'end' is 1 byte beyond the end of the range to flush.
868
869 Note(2), this is called with mm->page_table_lock held.*/
870
871 sh64_dcache_purge_user_range(mm, start, end);
872 sh64_icache_inv_user_page_range(mm, start, end);
873 }
874
875 /****************************************************************************/
876
flush_cache_page(struct vm_area_struct * vma,unsigned long eaddr)877 void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr)
878 {
879 /* Invalidate any entries in either cache for the vma within the user
880 address space vma->vm_mm for the page starting at virtual address
881 'eaddr'. This seems to be used primarily in breaking COW. Note,
882 the I-cache must be searched too in case the page in question is
883 both writable and being executed from (e.g. stack trampolines.)
884
885 Note(1), this is called with mm->page_table_lock held.
886 */
887
888 sh64_dcache_purge_virt_page(vma->vm_mm, eaddr);
889
890 if (vma->vm_flags & VM_EXEC) {
891 sh64_icache_inv_user_page(vma, eaddr);
892 }
893 }
894
895 /****************************************************************************/
896
flush_page_to_ram(struct page * page)897 void flush_page_to_ram(struct page *page)
898 {
899 /* This is a depracated API, being replaced by copy/clear_user_page and
900 flush_dcache_page. However, parts of the generic code (e.g. ptrace)
901 still require an implementation of flush_page_to_ram for them to
902 work properly.
903
904 The 'page' argument defines a *physical* page. After the function,
905 no lines in the D-cache must reference that page, and any dirty
906 entries in the page must have been written out. (This is used to
907 make memory coherent with the cache when the I-cache is about to see
908 the data etc) -- RPC */
909
910 /* As an elaboration on the above, flush_page_to_ram() is indeed
911 deprecated, and the majority of users (such as ptrace) are using
912 this erroneously anyways. As such, there's two different things we
913 can do here.. namely, we can either nop this out and hope that some
914 clueless generic code isn't abusing the interface (this is what we
915 do for sh, and it seems to work just fine), or we can play it safe
916 (albeit much slower, since we have the added cost of additional
917 flushing to contend with) and just wrap to the existing
918 flush_dcache_page() behavior. -- PFM */
919
920 flush_dcache_page(page);
921 }
922
923 /****************************************************************************/
924
925 #ifndef CONFIG_DCACHE_DISABLED
926
copy_user_page(void * to,void * from,unsigned long address)927 void copy_user_page(void *to, void *from, unsigned long address)
928 {
929 /* 'from' and 'to' are kernel virtual addresses (within the superpage
930 mapping of the physical RAM). 'address' is the user virtual address
931 where the copy 'to' will be mapped after. This allows a custom
932 mapping to be used to ensure that the new copy is placed in the
933 right cache sets for the user to see it without having to bounce it
934 out via memory. Note however : the call to flush_page_to_ram in
935 (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
936 very important case!
937
938 TBD : can we guarantee that on every call, any cache entries for
939 'from' are in the same colour sets as 'address' also? i.e. is this
940 always used just to deal with COW? (I suspect not). */
941
942 /* There are two possibilities here for when the page 'from' was last accessed:
943 * by the kernel : this is OK, no purge required.
944 * by the/a user (e.g. for break_COW) : need to purge.
945
946 If the potential user mapping at 'address' is the same colour as
947 'from' there is no need to purge any cache lines from the 'from'
948 page mapped into cache sets of colour 'address'. (The copy will be
949 accessing the page through 'from').
950 */
951
952 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
953 sh64_dcache_purge_coloured_phy_page(__pa(from), address);
954 }
955
956 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
957 /* No synonym problem on destination */
958 sh64_page_copy(from, to);
959 } else {
960 sh64_copy_user_page_coloured(to, from, address);
961 }
962
963 /* Note, don't need to flush 'from' page from the cache again - it's
964 done anyway by the generic code */
965 }
966
clear_user_page(void * to,unsigned long address)967 void clear_user_page(void *to, unsigned long address)
968 {
969 /* 'to' is a kernel virtual address (within the superpage
970 mapping of the physical RAM). 'address' is the user virtual address
971 where the 'to' page will be mapped after. This allows a custom
972 mapping to be used to ensure that the new copy is placed in the
973 right cache sets for the user to see it without having to bounce it
974 out via memory.
975 */
976
977 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
978 /* No synonym problem on destination */
979 sh64_page_clear(to);
980 } else {
981 sh64_clear_user_page_coloured(to, address);
982 }
983 }
984
985 #endif /* !CONFIG_DCACHE_DISABLED */
986
987 /****************************************************************************/
988
flush_dcache_page(struct page * page)989 void flush_dcache_page(struct page *page)
990 {
991 /* The behaviour of this function is equivalent to that of
992 flush_page_to_ram above. */
993
994 sh64_dcache_purge_phy_page(page_to_phys(page));
995 wmb();
996 }
997
998 /****************************************************************************/
999
flush_icache_range(unsigned long start,unsigned long end)1000 void flush_icache_range(unsigned long start, unsigned long end)
1001 {
1002 /* Flush the range [start,end] of kernel virtual adddress space from
1003 the I-cache. The corresponding range must be purged from the
1004 D-cache also because the SH-5 doesn't have cache snooping between
1005 the caches. The addresses will be visible through the superpage
1006 mapping, therefore it's guaranteed that there no cache entries for
1007 the range in cache sets of the wrong colour.
1008
1009 Primarily used for cohering the I-cache after a module has
1010 been loaded. */
1011
1012 /* We also make sure to purge the same range from the D-cache since
1013 flush_page_to_ram() won't be doing this for us! */
1014
1015 sh64_dcache_purge_kernel_range(start, end);
1016 wmb();
1017 sh64_icache_inv_kernel_range(start, end);
1018 }
1019
1020 /****************************************************************************/
1021
flush_icache_user_range(struct vm_area_struct * vma,struct page * page,unsigned long addr,int len)1022 void flush_icache_user_range(struct vm_area_struct *vma,
1023 struct page *page, unsigned long addr, int len)
1024 {
1025 /* Flush the range of user (defined by vma->vm_mm) address space
1026 starting at 'addr' for 'len' bytes from the cache. The range does
1027 not straddle a page boundary, the unique physical page containing
1028 the range is 'page'. This seems to be used mainly for invalidating
1029 an address range following a poke into the program text through the
1030 ptrace() call from another process (e.g. for BRK instruction
1031 insertion). */
1032
1033 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1034 wmb();
1035
1036 if (vma->vm_flags & VM_EXEC) {
1037 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1038 }
1039 }
1040
1041 /****************************************************************************/
1042
flush_icache_page(struct vm_area_struct * vma,struct page * page)1043 void flush_icache_page(struct vm_area_struct *vma, struct page *page)
1044 {
1045 /* Called when a page-cache page is about to be mapped into a user
1046 address space (defined by vma->vm_mm). Since the SH-5 I-cache
1047 hasn't got physical tags, this doesn't have to do anything. Empirically, this is OK.
1048
1049 Note, SH-5 I-cache entries can legally outlive the ITLB mapping
1050 anyway. So even if an executable page is dropped by the generic VM
1051 to free a page, we can still execute it from the cache. (The cache
1052 invalidation occurs as part of flush_cache_range or flush_cache_mm
1053 when the munmap() or exit() finally happens.) -- RPC */
1054
1055 /* We also want to purge this page from the D-cache, since we can't
1056 rely on flush_page_to_ram() do to do the D-cache purge.
1057
1058 Yes, this esentially constitutes misuse of flush_icache_page(), but
1059 we need this specifically for do_swap_page() in mm/memory.c for
1060 purging the page from both the I and D-cache. (This is primarily
1061 a performance hack!).
1062
1063 We can probably do away with this entirely once we get around to
1064 doing selective flushing from update_mmu_cache() once we can
1065 sanely test the page flags for PG_arch_1 (PG_dcache_dirty) to see if
1066 we need to do the D-cache purge. -- PFM */
1067
1068 sh64_dcache_purge_phy_page(page_to_phys(page));
1069 wmb();
1070 }
1071
1072 /*##########################################################################
1073 ARCH/SH64 PRIVATE CALLABLE API.
1074 ##########################################################################*/
1075
flush_cache_sigtramp(unsigned long start,unsigned long end)1076 void flush_cache_sigtramp(unsigned long start, unsigned long end)
1077 {
1078 /* For the address range [start,end), write back the data from the
1079 D-cache and invalidate the corresponding region of the I-cache for
1080 the current process. Used to flush signal trampolines on the stack
1081 to make them executable. */
1082
1083 sh64_dcache_wback_current_user_range(start, end);
1084 wmb();
1085 sh64_icache_inv_current_user_range(start, end);
1086 }
1087
1088