1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * arch/sh64/mm/cache.c
7  *
8  * Original version Copyright (C) 2000, 2001  Paolo Alberelli
9  * Second version Copyright (C) benedict.gaster@superh.com 2002
10  * Third version Copyright Richard.Curnow@superh.com 2003
11  * Hacks to third version Copyright (C) 2003 Paul Mundt
12  */
13 
14 /****************************************************************************/
15 
16 #include <linux/config.h>
17 #include <linux/init.h>
18 #include <linux/mman.h>
19 #include <linux/mm.h>
20 #include <linux/threads.h>
21 #include <asm/page.h>
22 #include <asm/pgtable.h>
23 #include <asm/processor.h>
24 #include <asm/cache.h>
25 #include <asm/tlb.h>
26 #include <asm/io.h>
27 #include <asm/uaccess.h>
28 #include <asm/mmu_context.h>
29 #include <asm/pgalloc.h> /* for flush_itlb_range */
30 
31 #include <linux/proc_fs.h>
32 
33 /* This function is in entry.S */
34 extern unsigned long switch_and_save_asid(unsigned long new_asid);
35 
36 /* Wired TLB entry for the D-cache */
37 static unsigned long long dtlb_cache_slot;
38 
39 /**
40  * sh64_cache_init()
41  *
42  * This is pretty much just a straightforward clone of the SH
43  * detect_cpu_and_cache_system().
44  *
45  * This function is responsible for setting up all of the cache
46  * info dynamically as well as taking care of CPU probing and
47  * setting up the relevant subtype data.
48  *
49  * FIXME: For the time being, we only really support the SH5-101
50  * out of the box, and don't support dynamic probing for things
51  * like the SH5-103 or even cut2 of the SH5-101. Implement this
52  * later!
53  */
sh64_cache_init(void)54 int __init sh64_cache_init(void)
55 {
56 	/*
57 	 * First, setup some sane values for the I-cache.
58 	 */
59 	cpu_data->icache.ways		= 4;
60 	cpu_data->icache.sets		= 256;
61 	cpu_data->icache.linesz		= L1_CACHE_BYTES;
62 
63 	/*
64 	 * FIXME: This can probably be cleaned up a bit as well.. for example,
65 	 * do we really need the way shift _and_ the way_step_shift ?? Judging
66 	 * by the existing code, I would guess no.. is there any valid reason
67 	 * why we need to be tracking this around?
68 	 */
69 	cpu_data->icache.way_shift	= 13;
70 	cpu_data->icache.entry_shift	= 5;
71 	cpu_data->icache.set_shift	= 4;
72 	cpu_data->icache.way_step_shift	= 16;
73 	cpu_data->icache.asid_shift	= 2;
74 
75 	/*
76 	 * way offset = cache size / associativity, so just don't factor in
77 	 * associativity in the first place..
78 	 */
79 	cpu_data->icache.way_ofs	= cpu_data->icache.sets *
80 					  cpu_data->icache.linesz;
81 
82 	cpu_data->icache.asid_mask	= 0x3fc;
83 	cpu_data->icache.idx_mask	= 0x1fe0;
84 	cpu_data->icache.epn_mask	= 0xffffe000;
85 	cpu_data->icache.flags		= 0;
86 
87 	/*
88 	 * Next, setup some sane values for the D-cache.
89 	 *
90 	 * On the SH5, these are pretty consistent with the I-cache settings,
91 	 * so we just copy over the existing definitions.. these can be fixed
92 	 * up later, especially if we add runtime CPU probing.
93 	 *
94 	 * Though in the meantime it saves us from having to duplicate all of
95 	 * the above definitions..
96 	 */
97 	cpu_data->dcache 		= cpu_data->icache;
98 
99 	/*
100 	 * Setup any cache-related flags here
101 	 */
102 #if defined(CONFIG_DCACHE_WRITE_THROUGH)
103 	set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
104 #elif defined(CONFIG_DCACHE_WRITE_BACK)
105 	set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
106 #endif
107 
108 	/*
109 	 * We also need to reserve a slot for the D-cache in the DTLB, so we
110 	 * do this now ..
111 	 */
112 	dtlb_cache_slot			= sh64_get_wired_dtlb_entry();
113 
114 	return 0;
115 }
116 
117 /*##########################################################################*/
118 
119 /* From here onwards, a rewrite of the implementation,
120    by Richard.Curnow@superh.com.
121 
122    The major changes in this compared to the old version are;
123    1. use more selective purging through OCBP instead of using ALLOCO to purge
124       by natural replacement.  This avoids purging out unrelated cache lines
125       that happen to be in the same set.
126    2. exploit the APIs copy_user_page and clear_user_page better
127    3. be more selective about I-cache purging, in particular use invalidate_all
128       more sparingly.
129 
130    NOTE : all this code runs in process context.  As long as it's not used in a
131    pre-emptible or SMP kernel, there are no issues with locking, reentrancy
132    etc.  When it gets used in a such a kernel this will have to be addressed.
133    (Note, there is no SMP realisation of sh64 yet.)
134    */
135 
136 /*##########################################################################
137 			       SUPPORT FUNCTIONS
138   ##########################################################################*/
139 
140 /****************************************************************************/
141 /* The following group of functions deal with mapping and unmapping a temporary
142    page into the DTLB slot that have been set aside for our exclusive use. */
143 /* In order to accomplish this, we use the generic interface for adding and
144    removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
145 /****************************************************************************/
146 
sh64_setup_dtlb_cache_slot(unsigned long eaddr,unsigned long asid,unsigned long paddr)147 static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
148 {
149 	sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
150 }
151 
sh64_teardown_dtlb_cache_slot(void)152 static inline void sh64_teardown_dtlb_cache_slot(void)
153 {
154 	sh64_teardown_tlb_slot(dtlb_cache_slot);
155 }
156 
157 /****************************************************************************/
158 
159 #ifndef CONFIG_ICACHE_DISABLED
160 
sh64_icache_inv_all(void)161 static void __inline__ sh64_icache_inv_all(void)
162 {
163 	unsigned long long addr, flag, data;
164 	unsigned int flags;
165 
166 	addr=ICCR0;
167 	flag=ICCR0_ICI;
168 	data=0;
169 
170 	/* TODO : does this really need to be a critical section? */
171 	save_and_cli(flags);
172 
173 	/* Without %1 it gets unexplicably wrong */
174 	asm volatile("getcfg	%3, 0, %0\n\t"
175 			"or	%0, %2, %0\n\t"
176 			"putcfg	%3, 0, %0\n\t"
177 			"synci"
178 			: "=&r" (data)
179 			: "0" (data), "r" (flag), "r" (addr));
180 
181 	restore_flags(flags);
182 }
183 
sh64_icache_inv_kernel_range(unsigned long start,unsigned long end)184 static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
185 {
186 	/* Invalidate range of addresses [start,end] from the I-cache, where
187 	 * the addresses lie in the kernel superpage. */
188 
189 	unsigned long long ullend, addr, aligned_start;
190 	/* FIXME : This sign extension needs to be made generic. */
191 	aligned_start = (unsigned long long)(signed long long)(signed long) start;
192 	aligned_start &= L1_CACHE_ALIGN_MASK;
193 	addr = aligned_start;
194 	/* FIXME : likewise this one */
195 	ullend = (unsigned long long) (signed long long) (signed long) end;
196 	while (addr <= ullend) {
197 		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
198 		addr += L1_CACHE_BYTES;
199 	}
200 }
201 
sh64_icache_inv_user_page(struct vm_area_struct * vma,unsigned long eaddr)202 static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
203 {
204 	/* If we get called, we know that vma->vm_flags contains VM_EXEC.
205 	   Also, eaddr is page-aligned. */
206 
207 	/* For SMP/pre-emptible, this will need to be locked to prevent the
208 	   ASID from changing under us if there's a reschedule. */
209 
210 	unsigned long long addr, end_addr;
211 	addr = eaddr;
212 	end_addr = addr + PAGE_SIZE;
213 
214 
215 	/* FIXME : is there any issue using 'current_asid' here?  i.e. can this
216 	   ever get called with 'vma->vm_mm->context.asid' != current_asid?
217 	   (the normal case would be munmap or a page flush following a COW
218 	   break etc, and using current_asid is OK for those.).  If we need to
219 	   use vma->vm_mm->context.asid, we'd have to switch ASIDs and run in
220 	   the context of the victim. */
221 	while (addr < end_addr) {
222 		/* Worth unrolling a little */
223 		asm __volatile__("icbi %0,  0" : : "r" (addr));
224 		asm __volatile__("icbi %0, 32" : : "r" (addr));
225 		asm __volatile__("icbi %0, 64" : : "r" (addr));
226 		asm __volatile__("icbi %0, 96" : : "r" (addr));
227 		addr += 128;
228 	}
229 	return;
230 }
231 
232 /****************************************************************************/
233 
sh64_icache_inv_user_page_range(struct mm_struct * mm,unsigned long start,unsigned long end)234 static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
235 			  unsigned long start, unsigned long end)
236 {
237 	/* Used for invalidating big chunks of I-cache, i.e. assume the range
238 	   is whole pages.  If 'start' or 'end' is not page aligned, the code
239 	   is conservative and invalidates to the ends of the enclosing pages.
240 	   This is functionally OK, just a performance loss. */
241 
242 	/* See the comments below in sh64_dcache_purge_user_range() regarding
243 	   the choice of algorithm.  However, for the I-cache option (2) isn't
244 	   available because there are no physical tags so aliases can't be
245 	   resolved.  The icbi instruction has to be used through the user
246 	   mapping.   Because icbi is cheaper than ocbp on a cache hit, it
247 	   would be cheaper to use the selective code for a large range than is
248 	   possible with the D-cache.  Just assume 64 for now as a working
249 	   figure.
250 	   */
251 
252 	int n_pages;
253 
254 	if (!mm) return;
255 
256 	n_pages = ((end - start) >> PAGE_SHIFT);
257 	if (n_pages >= 64) {
258 		sh64_icache_inv_all();
259 	} else {
260 		unsigned long aligned_start;
261 		unsigned long eaddr;
262 		unsigned long after_last_page_start;
263 		unsigned long mm_asid, current_asid;
264 		unsigned long long flags = 0ULL;
265 
266 		mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
267 		current_asid = get_asid();
268 
269 		if (mm_asid != current_asid) {
270 			/* Switch ASID and run the invalidate loop under cli */
271 			save_and_cli(flags);
272 			switch_and_save_asid(mm_asid);
273 		}
274 
275 		aligned_start = start & PAGE_MASK;
276 		after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
277 
278 		while (aligned_start < after_last_page_start) {
279 			struct vm_area_struct *vma;
280 			unsigned long vma_end;
281 			vma = find_vma(mm, aligned_start);
282 			if (!vma || (aligned_start <= vma->vm_end)) {
283 				/* Avoid getting stuck in an error condition */
284 				aligned_start += PAGE_SIZE;
285 				continue;
286 			}
287 			vma_end = vma->vm_end;
288 			if (vma->vm_flags & VM_EXEC) {
289 				/* Executable */
290 				eaddr = aligned_start;
291 				while (eaddr < vma_end) {
292 					sh64_icache_inv_user_page(vma, eaddr);
293 					eaddr += PAGE_SIZE;
294 				}
295 			}
296 			aligned_start = vma->vm_end; /* Skip to start of next region */
297 		}
298 		if (mm_asid != current_asid) {
299 			switch_and_save_asid(current_asid);
300 			restore_flags(flags);
301 		}
302 	}
303 }
304 
sh64_icache_inv_user_small_range(struct mm_struct * mm,unsigned long start,int len)305 static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
306 						unsigned long start, int len)
307 {
308 
309 	/* Invalidate a small range of user context I-cache, not necessarily
310 	   page (or even cache-line) aligned. */
311 
312 	unsigned long long eaddr = start;
313 	unsigned long long eaddr_end = start + len;
314 	unsigned long current_asid, mm_asid;
315 	unsigned long long flags;
316 	unsigned long long epage_start;
317 
318 	/* Since this is used inside ptrace, the ASID in the mm context
319 	   typically won't match current_asid.  We'll have to switch ASID to do
320 	   this.  For safety, and given that the range will be small, do all
321 	   this under cli.
322 
323 	   Note, there is a hazard that the ASID in mm->context is no longer
324 	   actually associated with mm, i.e. if the mm->context has started a
325 	   new cycle since mm was last active.  However, this is just a
326 	   performance issue: all that happens is that we invalidate lines
327 	   belonging to another mm, so the owning process has to refill them
328 	   when that mm goes live again.  mm itself can't have any cache
329 	   entries because there will have been a flush_cache_all when the new
330 	   mm->context cycle started. */
331 
332 	/* Align to start of cache line.  Otherwise, suppose len==8 and start
333 	   was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
334 	eaddr = start & L1_CACHE_ALIGN_MASK;
335 	eaddr_end = start + len;
336 
337 	save_and_cli(flags);
338 	mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
339 	current_asid = switch_and_save_asid(mm_asid);
340 
341 	epage_start = eaddr & PAGE_MASK;
342 
343 	while (eaddr < eaddr_end)
344 	{
345 		asm __volatile__("icbi %0, 0" : : "r" (eaddr));
346 		eaddr += L1_CACHE_BYTES;
347 	}
348 	switch_and_save_asid(current_asid);
349 	restore_flags(flags);
350 }
351 
sh64_icache_inv_current_user_range(unsigned long start,unsigned long end)352 static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
353 {
354 	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
355 	   cache hit on the virtual tag the instruction ends there, without a
356 	   TLB lookup. */
357 
358 	unsigned long long aligned_start;
359 	unsigned long long ull_end;
360 	unsigned long long addr;
361 
362 	ull_end = end;
363 
364 	/* Just invalidate over the range using the natural addresses.  TLB
365 	   miss handling will be OK (TBC).  Since it's for the current process,
366 	   either we're already in the right ASID context, or the ASIDs have
367 	   been recycled since we were last active in which case we might just
368 	   invalidate another processes I-cache entries : no worries, just a
369 	   performance drop for him. */
370 	aligned_start = start & L1_CACHE_ALIGN_MASK;
371 	addr = aligned_start;
372 	while (addr < ull_end) {
373 		asm __volatile__ ("icbi %0, 0" : : "r" (addr));
374 		asm __volatile__ ("nop");
375 		asm __volatile__ ("nop");
376 		addr += L1_CACHE_BYTES;
377 	}
378 }
379 
380 #endif /* !CONFIG_ICACHE_DISABLED */
381 
382 /****************************************************************************/
383 
384 #ifndef CONFIG_DCACHE_DISABLED
385 
386 /* Buffer used as the target of alloco instructions to purge data from cache
387    sets by natural eviction. -- RPC */
388 #define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
389 static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
390 
391 /****************************************************************************/
392 
sh64_dcache_purge_sets(int sets_to_purge_base,int n_sets)393 static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
394 {
395 	/* Purge all ways in a particular block of sets, specified by the base
396 	   set number and number of sets.  Can handle wrap-around, if that's
397 	   needed.  */
398 
399 	int dummy_buffer_base_set;
400 	unsigned long long eaddr, eaddr0, eaddr1;
401 	int j;
402 	int set_offset;
403 
404 	dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
405 	set_offset = sets_to_purge_base - dummy_buffer_base_set;
406 
407 	for (j=0; j<n_sets; j++, set_offset++) {
408 		set_offset &= (cpu_data->dcache.sets - 1);
409 		eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
410 
411 		/* Do one alloco which hits the required set per cache way.  For
412 		   write-back mode, this will purge the #ways resident lines.   There's
413 		   little point unrolling this loop because the allocos stall more if
414 		   they're too close together. */
415 		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
416 		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
417 			asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
418 		}
419 
420 		eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
421 		for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
422 			/* Load from each address.  Required because alloco is a NOP if
423 			   the cache is write-through.  Write-through is a config option. */
424 			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
425 				*(volatile unsigned char *)(int)eaddr;
426 		}
427 	}
428 
429 	/* Don't use OCBI to invalidate the lines.  That costs cycles directly.
430 	   If the dummy block is just left resident, it will naturally get
431 	   evicted as required.  */
432 
433 	return;
434 }
435 
436 /****************************************************************************/
437 
sh64_dcache_purge_all(void)438 static void sh64_dcache_purge_all(void)
439 {
440 	/* Purge the entire contents of the dcache.  The most efficient way to
441 	   achieve this is to use alloco instructions on a region of unused
442 	   memory equal in size to the cache, thereby causing the current
443 	   contents to be discarded by natural eviction.  The alternative,
444 	   namely reading every tag, setting up a mapping for the corresponding
445 	   page and doing an OCBP for the line, would be much more expensive.
446 	   */
447 
448 	sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
449 
450 	return;
451 
452 }
453 
454 /****************************************************************************/
455 
sh64_dcache_purge_kernel_range(unsigned long start,unsigned long end)456 static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
457 {
458 	/* Purge the range of addresses [start,end] from the D-cache.  The
459 	   addresses lie in the superpage mapping.  There's no harm if we
460 	   overpurge at either end - just a small performance loss. */
461 	unsigned long long ullend, addr, aligned_start;
462 	/* FIXME : This sign extension needs to be made generic. */
463 	aligned_start = (unsigned long long)(signed long long)(signed long) start;
464 	aligned_start &= L1_CACHE_ALIGN_MASK;
465 	addr = aligned_start;
466 	/* FIXME : likewise this one */
467 	ullend = (unsigned long long) (signed long long) (signed long) end;
468 	while (addr <= ullend) {
469 		asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
470 		addr += L1_CACHE_BYTES;
471 	}
472 	return;
473 }
474 
475 /* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
476    anything else in the kernel */
477 #define MAGIC_PAGE0_START 0xffffffffec000000ULL
478 
sh64_dcache_purge_coloured_phy_page(unsigned long paddr,unsigned long eaddr)479 static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
480 {
481 	/* Purge the physical page 'paddr' from the cache.  It's known that any
482 	   cache lines requiring attention have the same page colour as the the
483 	   address 'eaddr'.
484 
485 	   This relies on the fact that the D-cache matches on physical tags
486 	   when no virtual tag matches.  So we create an alias for the original
487 	   page and purge through that.  (Alternatively, we could have done
488 	   this by switching ASID to match the original mapping and purged
489 	   through that, but that involves ASID switching cost + probably a
490 	   TLBMISS + refill anyway.)
491 	   */
492 
493 	unsigned long long magic_page_start;
494 	unsigned long long magic_eaddr, magic_eaddr_end;
495 
496 	magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
497 
498 	/* As long as the kernel is not pre-emptible, this doesn't need to be
499 	   under cli/sti. */
500 
501 	sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
502 
503 	magic_eaddr = magic_page_start;
504 	magic_eaddr_end = magic_eaddr + PAGE_SIZE;
505 	while (magic_eaddr < magic_eaddr_end) {
506 		/* Little point in unrolling this loop - the OCBPs are blocking
507 		   and won't go any quicker (i.e. the loop overhead is parallel
508 		   to part of the OCBP execution.) */
509 		asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
510 		magic_eaddr += L1_CACHE_BYTES;
511 	}
512 
513 	sh64_teardown_dtlb_cache_slot();
514 }
515 
516 /****************************************************************************/
517 
sh64_dcache_purge_phy_page(unsigned long paddr)518 static void sh64_dcache_purge_phy_page(unsigned long paddr)
519 {
520 	/* Pure a page given its physical start address, by creating a
521 	   temporary 1 page mapping and purging across that.  Even if we know
522 	   the virtual address (& vma or mm) of the page, the method here is
523 	   more elegant because it avoids issues of coping with page faults on
524 	   the purge instructions (i.e. no special-case code required in the
525 	   critical path in the TLB miss handling). */
526 
527 	unsigned long long eaddr_start, eaddr, eaddr_end;
528 	int i;
529 
530 	/* As long as the kernel is not pre-emptible, this doesn't need to be
531 	   under cli/sti. */
532 
533 	eaddr_start = MAGIC_PAGE0_START;
534 	for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
535 		sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
536 
537 		eaddr = eaddr_start;
538 		eaddr_end = eaddr + PAGE_SIZE;
539 		while (eaddr < eaddr_end) {
540 			asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
541 			eaddr += L1_CACHE_BYTES;
542 		}
543 
544 		sh64_teardown_dtlb_cache_slot();
545 		eaddr_start += PAGE_SIZE;
546 	}
547 }
548 
sh64_dcache_purge_virt_page(struct mm_struct * mm,unsigned long eaddr)549 static void sh64_dcache_purge_virt_page(struct mm_struct *mm, unsigned long eaddr)
550 {
551 	unsigned long phys;
552 	pgd_t *pgd;
553 	pmd_t *pmd;
554 	pte_t *pte;
555 	pte_t entry;
556 
557 	pgd = pgd_offset(mm, eaddr);
558 	pmd = pmd_offset(pgd, eaddr);
559 
560 	if (pmd_none(*pmd) || pmd_bad(*pmd))
561 		return;
562 
563 	pte = pte_offset(pmd, eaddr);
564 	entry = *pte;
565 
566 	if (pte_none(entry) || !pte_present(entry))
567 		return;
568 
569 	phys = pte_val(entry) & PAGE_MASK;
570 
571 	sh64_dcache_purge_phy_page(phys);
572 }
573 
sh64_dcache_purge_user_page(struct mm_struct * mm,unsigned long eaddr)574 static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
575 {
576 	pgd_t *pgd;
577 	pmd_t *pmd;
578 	pte_t *pte;
579 	pte_t entry;
580 	unsigned long paddr;
581 
582 	/* NOTE : all the callers of this have mm->page_table_lock held, so the
583 	   following page table traversal is safe even on SMP/pre-emptible. */
584 
585 	if (!mm) return; /* No way to find physical address of page */
586 	pgd = pgd_offset(mm, eaddr);
587 	if (pgd_bad(*pgd)) return;
588 
589 	pmd = pmd_offset(pgd, eaddr);
590 	if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
591 
592 	pte = pte_offset(pmd, eaddr);
593 	entry = *pte;
594 	if (pte_none(entry) || !pte_present(entry)) return;
595 
596 	paddr = pte_val(entry) & PAGE_MASK;
597 
598 	sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
599 
600 }
601 /****************************************************************************/
602 
sh64_dcache_purge_user_range(struct mm_struct * mm,unsigned long start,unsigned long end)603 static void sh64_dcache_purge_user_range(struct mm_struct *mm,
604 			  unsigned long start, unsigned long end)
605 {
606 	/* There are at least 5 choices for the implementation of this, with
607 	   pros (+), cons(-), comments(*):
608 
609 	   1. ocbp each line in the range through the original user's ASID
610 	      + no lines spuriously evicted
611 	      - tlbmiss handling (must either handle faults on demand => extra
612 		special-case code in tlbmiss critical path), or map the page in
613 		advance (=> flush_tlb_range in advance to avoid multiple hits)
614 	      - ASID switching
615 	      - expensive for large ranges
616 
617 	   2. temporarily map each page in the range to a special effective
618 	      address and ocbp through the temporary mapping; relies on the
619 	      fact that SH-5 OCB* always do TLB lookup and match on ptags (they
620 	      never look at the etags)
621 	      + no spurious evictions
622 	      - expensive for large ranges
623 	      * surely cheaper than (1)
624 
625 	   3. walk all the lines in the cache, check the tags, if a match
626 	      occurs create a page mapping to ocbp the line through
627 	      + no spurious evictions
628 	      - tag inspection overhead
629 	      - (especially for small ranges)
630 	      - potential cost of setting up/tearing down page mapping for
631 		every line that matches the range
632 	      * cost partly independent of range size
633 
634 	   4. walk all the lines in the cache, check the tags, if a match
635 	      occurs use 4 * alloco to purge the line (+3 other probably
636 	      innocent victims) by natural eviction
637 	      + no tlb mapping overheads
638 	      - spurious evictions
639 	      - tag inspection overhead
640 
641 	   5. implement like flush_cache_all
642 	      + no tag inspection overhead
643 	      - spurious evictions
644 	      - bad for small ranges
645 
646 	   (1) can be ruled out as more expensive than (2).  (2) appears best
647 	   for small ranges.  The choice between (3), (4) and (5) for large
648 	   ranges and the range size for the large/small boundary need
649 	   benchmarking to determine.
650 
651 	   For now use approach (2) for small ranges and (5) for large ones.
652 
653 	   */
654 
655 	int n_pages;
656 
657 	n_pages = ((end - start) >> PAGE_SHIFT);
658 	if (n_pages >= 64) {
659 #if 1
660 		sh64_dcache_purge_all();
661 #else
662 		unsigned long long set, way;
663 		unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
664 		for (set = 0; set < cpu_data->dcache.sets; set++) {
665 			unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
666 			for (way = 0; way < cpu_data->dcache.ways; way++) {
667 				unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
668 				unsigned long long tag0;
669 				unsigned long line_valid;
670 
671 				asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
672 				line_valid = tag0 & SH_CACHE_VALID;
673 				if (line_valid) {
674 					unsigned long cache_asid;
675 					unsigned long epn;
676 
677 					cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
678 					/* The next line needs some
679 					   explanation.  The virtual tags
680 					   encode bits [31:13] of the virtual
681 					   address, bit [12] of the 'tag' being
682 					   implied by the cache set index. */
683 					epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
684 
685 					if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
686 						/* TODO : could optimise this
687 						   call by batching multiple
688 						   adjacent sets together. */
689 						sh64_dcache_purge_sets(set, 1);
690 						break; /* Don't waste time inspecting other ways for this set */
691 					}
692 				}
693 			}
694 		}
695 #endif
696 	} else {
697 		/* 'Small' range */
698 		unsigned long aligned_start;
699 		unsigned long eaddr;
700 		unsigned long last_page_start;
701 
702 		aligned_start = start & PAGE_MASK;
703 		/* 'end' is 1 byte beyond the end of the range */
704 		last_page_start = (end - 1) & PAGE_MASK;
705 
706 		eaddr = aligned_start;
707 		while (eaddr <= last_page_start) {
708 			sh64_dcache_purge_user_page(mm, eaddr);
709 			eaddr += PAGE_SIZE;
710 		}
711 	}
712 	return;
713 }
714 
sh64_dcache_wback_current_user_range(unsigned long start,unsigned long end)715 static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
716 {
717 	unsigned long long aligned_start;
718 	unsigned long long ull_end;
719 	unsigned long long addr;
720 
721 	ull_end = end;
722 
723 	/* Just wback over the range using the natural addresses.  TLB miss
724 	   handling will be OK (TBC) : the range has just been written to by
725 	   the signal frame setup code, so the PTEs must exist.
726 
727 	   TODO : with SMP or pre-emptible, run this under cli to guard against
728 	   the risk of a reschedule causing a new ASID cycle to begin.
729 	   */
730 	aligned_start = start & L1_CACHE_ALIGN_MASK;
731 	addr = aligned_start;
732 	while (addr < ull_end) {
733 		asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
734 		addr += L1_CACHE_BYTES;
735 	}
736 }
737 
738 #endif /* !CONFIG_DCACHE_DISABLED */
739 
740 /****************************************************************************/
741 
742 /* These *MUST* lie in an area of virtual address space that's otherwise unused. */
743 #define UNIQUE_EADDR_START 0xe0000000UL
744 #define UNIQUE_EADDR_END   0xe8000000UL
745 
sh64_make_unique_eaddr(unsigned long user_eaddr,unsigned long paddr)746 static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
747 {
748 	/* Given a physical address paddr, and a user virtual address
749 	   user_eaddr which will eventually be mapped to it, create a one-off
750 	   kernel-private eaddr mapped to the same paddr.  This is used for
751 	   creating special destination pages for copy_user_page and
752 	   clear_user_page */
753 
754 	static unsigned long current_pointer = UNIQUE_EADDR_START;
755 	unsigned long coloured_pointer;
756 
757 	if (current_pointer == UNIQUE_EADDR_END) {
758 		sh64_dcache_purge_all();
759 		current_pointer = UNIQUE_EADDR_START;
760 	}
761 
762 	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
763 	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
764 
765 	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
766 
767 	return coloured_pointer;
768 }
769 
770 /****************************************************************************/
771 
sh64_copy_user_page_coloured(void * to,void * from,unsigned long address)772 static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
773 {
774 	void *coloured_to;
775 
776 	/* Discard any existing cache entries of the wrong colour.  These are
777 	   present quite often, if the kernel has recently used the page
778 	   internally, then given it up, then it's been allocated to the user.
779 	   */
780 	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
781 
782 	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
783 	sh64_page_copy(from, coloured_to);
784 
785 	sh64_teardown_dtlb_cache_slot();
786 }
787 
sh64_clear_user_page_coloured(void * to,unsigned long address)788 static void sh64_clear_user_page_coloured(void *to, unsigned long address)
789 {
790 	void *coloured_to;
791 
792 	/* Discard any existing kernel-originated lines of the wrong colour (as
793 	   above) */
794 	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
795 
796 	coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
797 	sh64_page_clear(coloured_to);
798 
799 	sh64_teardown_dtlb_cache_slot();
800 }
801 
802 /****************************************************************************/
803 
804 /*##########################################################################
805 			    EXTERNALLY CALLABLE API.
806   ##########################################################################*/
807 
808 /* These functions are described in Documentation/cachetlb.txt.
809    Each one of these functions varies in behaviour depending on whether the
810    I-cache and/or D-cache are configured out.
811 
812    Note that the Linux term 'flush' corresponds to what is termed 'purge' in
813    the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
814    invalidate the cache lines, and 'invalidate' for the I-cache.
815    */
816 
817 #undef FLUSH_TRACE
818 
flush_cache_all(void)819 void flush_cache_all(void)
820 {
821 	/* Invalidate the entire contents of both caches, after writing back to
822 	   memory any dirty data from the D-cache. */
823 	sh64_dcache_purge_all();
824 	sh64_icache_inv_all();
825 }
826 
827 /****************************************************************************/
828 
flush_cache_mm(struct mm_struct * mm)829 void flush_cache_mm(struct mm_struct *mm)
830 {
831 	/* Invalidate an entire user-address space from both caches, after
832 	   writing back dirty data (e.g. for shared mmap etc). */
833 
834 	/* This could be coded selectively by inspecting all the tags then
835 	   doing 4*alloco on any set containing a match (as for
836 	   flush_cache_range), but fork/exit/execve (where this is called from)
837 	   are expensive anyway. */
838 
839 	/* Have to do a purge here, despite the comments re I-cache below.
840 	   There could be odd-coloured dirty data associated with the mm still
841 	   in the cache - if this gets written out through natural eviction
842 	   after the kernel has reused the page there will be chaos.
843 
844 	   TODO Perhaps a selective purge is appropriate.
845 	   */
846 	sh64_dcache_purge_all();
847 
848 	/* The mm being torn down won't ever be active again, so any Icache
849 	   lines tagged with its ASID won't be visible for the rest of the
850 	   lifetime of this ASID cycle.  Before the ASID gets reused, there
851 	   will be a flush_cache_all.  This is similar to the lack of action
852 	   needed in flush_tlb_mm - see fault.c. */
853 #if 0
854 	sh64_icache_inv_all();
855 #endif
856 }
857 
858 /****************************************************************************/
859 
flush_cache_range(struct mm_struct * mm,unsigned long start,unsigned long end)860 void flush_cache_range(struct mm_struct *mm, unsigned long start,
861 		       unsigned long end)
862 {
863 	/* Invalidate (from both caches) the range [start,end) of virtual
864 	   addresses from the user address space specified by mm, after writing
865 	   back any dirty data.
866 
867 	   Note(1), 'end' is 1 byte beyond the end of the range to flush.
868 
869 	   Note(2), this is called with mm->page_table_lock held.*/
870 
871 	sh64_dcache_purge_user_range(mm, start, end);
872 	sh64_icache_inv_user_page_range(mm, start, end);
873 }
874 
875 /****************************************************************************/
876 
flush_cache_page(struct vm_area_struct * vma,unsigned long eaddr)877 void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr)
878 {
879 	/* Invalidate any entries in either cache for the vma within the user
880 	   address space vma->vm_mm for the page starting at virtual address
881 	   'eaddr'.   This seems to be used primarily in breaking COW.  Note,
882 	   the I-cache must be searched too in case the page in question is
883 	   both writable and being executed from (e.g. stack trampolines.)
884 
885 	   Note(1), this is called with mm->page_table_lock held.
886 	   */
887 
888 	sh64_dcache_purge_virt_page(vma->vm_mm, eaddr);
889 
890 	if (vma->vm_flags & VM_EXEC) {
891 		sh64_icache_inv_user_page(vma, eaddr);
892 	}
893 }
894 
895 /****************************************************************************/
896 
flush_page_to_ram(struct page * page)897 void flush_page_to_ram(struct page *page)
898 {
899 	/* This is a depracated API, being replaced by copy/clear_user_page and
900 	   flush_dcache_page.  However, parts of the generic code (e.g. ptrace)
901 	   still require an implementation of flush_page_to_ram for them to
902 	   work properly.
903 
904 	   The 'page' argument defines a *physical* page.  After the function,
905 	   no lines in the D-cache must reference that page, and any dirty
906 	   entries in the page must have been written out.  (This is used to
907 	   make memory coherent with the cache when the I-cache is about to see
908 	   the data etc) -- RPC */
909 
910 	/* As an elaboration on the above, flush_page_to_ram() is indeed
911 	   deprecated, and the majority of users (such as ptrace) are using
912 	   this erroneously anyways. As such, there's two different things we
913 	   can do here.. namely, we can either nop this out and hope that some
914 	   clueless generic code isn't abusing the interface (this is what we
915 	   do for sh, and it seems to work just fine), or we can play it safe
916 	   (albeit much slower, since we have the added cost of additional
917 	   flushing to contend with) and just wrap to the existing
918 	   flush_dcache_page() behavior. -- PFM */
919 
920 	flush_dcache_page(page);
921 }
922 
923 /****************************************************************************/
924 
925 #ifndef CONFIG_DCACHE_DISABLED
926 
copy_user_page(void * to,void * from,unsigned long address)927 void copy_user_page(void *to, void *from, unsigned long address)
928 {
929 	/* 'from' and 'to' are kernel virtual addresses (within the superpage
930 	   mapping of the physical RAM).  'address' is the user virtual address
931 	   where the copy 'to' will be mapped after.  This allows a custom
932 	   mapping to be used to ensure that the new copy is placed in the
933 	   right cache sets for the user to see it without having to bounce it
934 	   out via memory.  Note however : the call to flush_page_to_ram in
935 	   (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
936 	   very important case!
937 
938 	   TBD : can we guarantee that on every call, any cache entries for
939 	   'from' are in the same colour sets as 'address' also?  i.e. is this
940 	   always used just to deal with COW?  (I suspect not). */
941 
942 	/* There are two possibilities here for when the page 'from' was last accessed:
943 	   * by the kernel : this is OK, no purge required.
944 	   * by the/a user (e.g. for break_COW) : need to purge.
945 
946 	   If the potential user mapping at 'address' is the same colour as
947 	   'from' there is no need to purge any cache lines from the 'from'
948 	   page mapped into cache sets of colour 'address'.  (The copy will be
949 	   accessing the page through 'from').
950 	   */
951 
952 	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
953 		sh64_dcache_purge_coloured_phy_page(__pa(from), address);
954 	}
955 
956 	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
957 		/* No synonym problem on destination */
958 		sh64_page_copy(from, to);
959 	} else {
960 		sh64_copy_user_page_coloured(to, from, address);
961 	}
962 
963 	/* Note, don't need to flush 'from' page from the cache again - it's
964 	   done anyway by the generic code */
965 }
966 
clear_user_page(void * to,unsigned long address)967 void clear_user_page(void *to, unsigned long address)
968 {
969 	/* 'to' is a kernel virtual address (within the superpage
970 	   mapping of the physical RAM).  'address' is the user virtual address
971 	   where the 'to' page will be mapped after.  This allows a custom
972 	   mapping to be used to ensure that the new copy is placed in the
973 	   right cache sets for the user to see it without having to bounce it
974 	   out via memory.
975 	*/
976 
977 	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
978 		/* No synonym problem on destination */
979 		sh64_page_clear(to);
980 	} else {
981 		sh64_clear_user_page_coloured(to, address);
982 	}
983 }
984 
985 #endif /* !CONFIG_DCACHE_DISABLED */
986 
987 /****************************************************************************/
988 
flush_dcache_page(struct page * page)989 void flush_dcache_page(struct page *page)
990 {
991 	/* The behaviour of this function is equivalent to that of
992 	   flush_page_to_ram above. */
993 
994 	sh64_dcache_purge_phy_page(page_to_phys(page));
995 	wmb();
996 }
997 
998 /****************************************************************************/
999 
flush_icache_range(unsigned long start,unsigned long end)1000 void flush_icache_range(unsigned long start, unsigned long end)
1001 {
1002 	/* Flush the range [start,end] of kernel virtual adddress space from
1003 	   the I-cache.  The corresponding range must be purged from the
1004 	   D-cache also because the SH-5 doesn't have cache snooping between
1005 	   the caches.  The addresses will be visible through the superpage
1006 	   mapping, therefore it's guaranteed that there no cache entries for
1007 	   the range in cache sets of the wrong colour.
1008 
1009 	   Primarily used for cohering the I-cache after a module has
1010 	   been loaded.  */
1011 
1012 	/* We also make sure to purge the same range from the D-cache since
1013 	   flush_page_to_ram() won't be doing this for us! */
1014 
1015 	sh64_dcache_purge_kernel_range(start, end);
1016 	wmb();
1017 	sh64_icache_inv_kernel_range(start, end);
1018 }
1019 
1020 /****************************************************************************/
1021 
flush_icache_user_range(struct vm_area_struct * vma,struct page * page,unsigned long addr,int len)1022 void flush_icache_user_range(struct vm_area_struct *vma,
1023 			struct page *page, unsigned long addr, int len)
1024 {
1025 	/* Flush the range of user (defined by vma->vm_mm) address space
1026 	   starting at 'addr' for 'len' bytes from the cache.  The range does
1027 	   not straddle a page boundary, the unique physical page containing
1028 	   the range is 'page'.  This seems to be used mainly for invalidating
1029 	   an address range following a poke into the program text through the
1030 	   ptrace() call from another process (e.g. for BRK instruction
1031 	   insertion). */
1032 
1033 	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1034 	wmb();
1035 
1036 	if (vma->vm_flags & VM_EXEC) {
1037 		sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1038 	}
1039 }
1040 
1041 /****************************************************************************/
1042 
flush_icache_page(struct vm_area_struct * vma,struct page * page)1043 void flush_icache_page(struct vm_area_struct *vma, struct page *page)
1044 {
1045 	/* Called when a page-cache page is about to be mapped into a user
1046 	   address space (defined by vma->vm_mm).  Since the SH-5 I-cache
1047 	   hasn't got physical tags, this doesn't have to do anything.  Empirically, this is OK.
1048 
1049 	   Note, SH-5 I-cache entries can legally outlive the ITLB mapping
1050 	   anyway.  So even if an executable page is dropped by the generic VM
1051 	   to free a page, we can still execute it from the cache.  (The cache
1052 	   invalidation occurs as part of flush_cache_range or flush_cache_mm
1053 	   when the munmap() or exit() finally happens.) -- RPC */
1054 
1055 	/* We also want to purge this page from the D-cache, since we can't
1056 	   rely on flush_page_to_ram() do to do the D-cache purge.
1057 
1058 	   Yes, this esentially constitutes misuse of flush_icache_page(), but
1059 	   we need this specifically for do_swap_page() in mm/memory.c for
1060 	   purging the page from both the I and D-cache. (This is primarily
1061 	   a performance hack!).
1062 
1063 	   We can probably do away with this entirely once we get around to
1064 	   doing selective flushing from update_mmu_cache() once we can
1065 	   sanely test the page flags for PG_arch_1 (PG_dcache_dirty) to see if
1066 	   we need to do the D-cache purge. -- PFM */
1067 
1068 	sh64_dcache_purge_phy_page(page_to_phys(page));
1069 	wmb();
1070 }
1071 
1072 /*##########################################################################
1073 			ARCH/SH64 PRIVATE CALLABLE API.
1074   ##########################################################################*/
1075 
flush_cache_sigtramp(unsigned long start,unsigned long end)1076 void flush_cache_sigtramp(unsigned long start, unsigned long end)
1077 {
1078 	/* For the address range [start,end), write back the data from the
1079 	   D-cache and invalidate the corresponding region of the I-cache for
1080 	   the current process.  Used to flush signal trampolines on the stack
1081 	   to make them executable. */
1082 
1083 	sh64_dcache_wback_current_user_range(start, end);
1084 	wmb();
1085 	sh64_icache_inv_current_user_range(start, end);
1086 }
1087 
1088