1/* Written by Richard P. Curnow, SuperH (UK) Ltd.
2
3   Tight version of memset for the case of just clearing a page.  It turns out
4   that having the alloco's spaced out slightly due to the increment/branch
5   pair causes them to contend less for access to the cache.  Similarly,
6   keeping the stores apart from the allocos causes less contention.  => Do two
7   separate loops.  Do multiple stores per loop to amortise the
8   increment/branch cost a little.
9
10   Parameters:
11   r2 : source effective address (start of page)
12
13   Always clears 4096 bytes.
14
15*/
16
17	.section .text..SHmedia32,"ax"
18	.little
19
20	.balign 8
21	.global sh64_page_clear
22sh64_page_clear:
23	pta/l 1f, tr1
24	pta/l 2f, tr2
25	ptabs/l r18, tr0
26
27	movi 4096, r7
28	add  r2, r7, r7
29	add  r2, r63, r6
301:
31	alloco r6, 0
32	addi	r6, 32, r6
33	bgt/l	r7, r6, tr1
34
35	add  r2, r63, r6
362:
37	st.q  r6,   0, r63
38	st.q  r6,   8, r63
39	st.q  r6,  16, r63
40	st.q  r6,  24, r63
41	addi r6, 32, r6
42	bgt/l r7, r6, tr2
43
44	blink tr0, r63
45
46
47