1/* 2 * blockops.S: Common block zero optimized routines. 3 * 4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 5 */ 6 7#include <asm/page.h> 8 9 /* Zero out 64 bytes of memory at (buf + offset). 10 * Assumes %g1 contains zero. 11 */ 12#define BLAST_BLOCK(buf, offset) \ 13 std %g0, [buf + offset + 0x38]; \ 14 std %g0, [buf + offset + 0x30]; \ 15 std %g0, [buf + offset + 0x28]; \ 16 std %g0, [buf + offset + 0x20]; \ 17 std %g0, [buf + offset + 0x18]; \ 18 std %g0, [buf + offset + 0x10]; \ 19 std %g0, [buf + offset + 0x08]; \ 20 std %g0, [buf + offset + 0x00]; 21 22 /* Copy 32 bytes of memory at (src + offset) to 23 * (dst + offset). 24 */ 25#define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 26 ldd [src + offset + 0x18], t0; \ 27 ldd [src + offset + 0x10], t2; \ 28 ldd [src + offset + 0x08], t4; \ 29 ldd [src + offset + 0x00], t6; \ 30 std t0, [dst + offset + 0x18]; \ 31 std t2, [dst + offset + 0x10]; \ 32 std t4, [dst + offset + 0x08]; \ 33 std t6, [dst + offset + 0x00]; 34 35 /* Profiling evidence indicates that memset() is 36 * commonly called for blocks of size PAGE_SIZE, 37 * and (2 * PAGE_SIZE) (for kernel stacks) 38 * and with a second arg of zero. We assume in 39 * all of these cases that the buffer is aligned 40 * on at least an 8 byte boundary. 41 * 42 * Therefore we special case them to make them 43 * as fast as possible. 44 */ 45 46 .text 47 .align 4 48 .globl bzero_1page, __copy_1page 49 50bzero_1page: 51/* NOTE: If you change the number of insns of this routine, please check 52 * arch/sparc/mm/hypersparc.S */ 53 /* %o0 = buf */ 54 or %g0, %g0, %g1 55 or %o0, %g0, %o1 56 or %g0, (PAGE_SIZE >> 8), %g2 571: 58 BLAST_BLOCK(%o0, 0x00) 59 BLAST_BLOCK(%o0, 0x40) 60 BLAST_BLOCK(%o0, 0x80) 61 BLAST_BLOCK(%o0, 0xc0) 62 subcc %g2, 1, %g2 63 bne 1b 64 add %o0, 0x100, %o0 65 66 retl 67 nop 68 69__copy_1page: 70/* NOTE: If you change the number of insns of this routine, please check 71 * arch/sparc/mm/hypersparc.S */ 72 /* %o0 = dst, %o1 = src */ 73 or %g0, (PAGE_SIZE >> 8), %g1 741: 75 MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 76 MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 77 MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 78 MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 79 MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 80 MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 81 MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 82 MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5) 83 subcc %g1, 1, %g1 84 add %o0, 0x100, %o0 85 bne 1b 86 add %o1, 0x100, %o1 87 88 retl 89 nop 90