1/* Copyright 2002 Andi Kleen, SuSE Labs */ 2 3/* 4 * ISO C memset - set a memory block to a byte value. 5 * 6 * rdi destination 7 * rsi value (char) 8 * rdx count (bytes) 9 * 10 * rax original destination 11 */ 12 .globl __memset 13 .globl memset 14 .p2align 4 15memset: 16__memset: 17 movq %rdi,%r10 18 movq %rdx,%r11 19 20 /* expand byte value */ 21 movzbl %sil,%ecx 22 movabs $0x0101010101010101,%rax 23 mul %rcx /* with rax, clobbers rdx */ 24 25 /* align dst */ 26 movl %edi,%r9d 27 andl $7,%r9d 28 jnz .Lbad_alignment 29.Lafter_bad_alignment: 30 31 movl %r11d,%ecx 32 shrl $6,%ecx 33 jz .Lhandle_tail 34 35 .p2align 4 36.Lloop_64: 37 decl %ecx 38 movq %rax,(%rdi) 39 movq %rax,8(%rdi) 40 movq %rax,16(%rdi) 41 movq %rax,24(%rdi) 42 movq %rax,32(%rdi) 43 movq %rax,40(%rdi) 44 movq %rax,48(%rdi) 45 movq %rax,56(%rdi) 46 leaq 64(%rdi),%rdi 47 jnz .Lloop_64 48 49 /* Handle tail in loops. The loops should be faster than hard 50 to predict jump tables. */ 51 .p2align 4 52.Lhandle_tail: 53 movl %r11d,%ecx 54 andl $63&(~7),%ecx 55 jz .Lhandle_7 56 shrl $3,%ecx 57 .p2align 4 58.Lloop_8: 59 decl %ecx 60 movq %rax,(%rdi) 61 leaq 8(%rdi),%rdi 62 jnz .Lloop_8 63 64.Lhandle_7: 65 movl %r11d,%ecx 66 andl $7,%ecx 67 jz .Lende 68 .p2align 4 69.Lloop_1: 70 decl %ecx 71 movb %al,(%rdi) 72 leaq 1(%rdi),%rdi 73 jnz .Lloop_1 74 75.Lende: 76 movq %r10,%rax 77 ret 78 79.Lbad_alignment: 80 cmpq $7,%r11 81 jbe .Lhandle_7 82 movq %rax,(%rdi) /* unaligned store */ 83 movq $8,%r8 84 subq %r9,%r8 85 addq %r8,%rdi 86 subq %r8,%r11 87 jmp .Lafter_bad_alignment 88