1/* Optimized memset for PowerPC476 (128-byte cacheline). 2 Copyright (C) 2010-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library. If not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21/* memset 22 23 r3:destination address and return address 24 r4:source integer to copy 25 r5:byte count 26 r11:sources integer to copy in all 32 bits of reg 27 r12:temp return address 28 29 Save return address in r12 30 If destinationn is unaligned and count is greater tha 255 bytes 31 set 0-3 bytes to make destination aligned 32 If count is greater tha 255 bytes and setting zero to memory 33 use dbcz to set memeory when we can 34 otherwsie do the follwoing 35 If 16 or more words to set we use 16 word copy loop. 36 Finaly we set 0-15 extra bytes with string store. */ 37 38EALIGN (memset, 5, 0) 39 rlwinm r11,r4,0,24,31 40 rlwimi r11,r4,8,16,23 41 rlwimi r11,r11,16,0,15 42 addi r12,r3,0 43 cmpwi r5,0x00FF 44 ble L(preword8_count_loop) 45 cmpwi r4,0x00 46 beq L(use_dcbz) 47 neg r6,r3 48 clrlwi. r6,r6,30 49 beq L(preword8_count_loop) 50 addi r8,0,1 51 mtctr r6 52 subi r3,r3,1 53 54L(unaligned_bytecopy_loop): 55 stbu r11,0x1(r3) 56 subf. r5,r8,r5 57 beq L(end_memset) 58 bdnz L(unaligned_bytecopy_loop) 59 addi r3,r3,1 60 61L(preword8_count_loop): 62 srwi. r6,r5,4 63 beq L(preword2_count_loop) 64 mtctr r6 65 addi r3,r3,-4 66 mr r8,r11 67 mr r9,r11 68 mr r10,r11 69 70L(word8_count_loop_no_dcbt): 71 stwu r8,4(r3) 72 stwu r9,4(r3) 73 subi r5,r5,0x10 74 stwu r10,4(r3) 75 stwu r11,4(r3) 76 bdnz L(word8_count_loop_no_dcbt) 77 addi r3,r3,4 78 79L(preword2_count_loop): 80 clrlwi. r7,r5,28 81 beq L(end_memset) 82 mr r8,r11 83 mr r9,r11 84 mr r10,r11 85 mtxer r7 86 stswx r8,0,r3 87 88L(end_memset): 89 addi r3,r12,0 90 blr 91 92L(use_dcbz): 93 neg r6,r3 94 clrlwi. r7,r6,28 95 beq L(skip_string_loop) 96 mr r8,r11 97 mr r9,r11 98 mr r10,r11 99 subf r5,r7,r5 100 mtxer r7 101 stswx r8,0,r3 102 add r3,r3,r7 103 104L(skip_string_loop): 105 clrlwi r8,r6,25 106 srwi. r8,r8,4 107 beq L(dcbz_pre_loop) 108 mtctr r8 109 110L(word_loop): 111 stw r11,0(r3) 112 subi r5,r5,0x10 113 stw r11,4(r3) 114 stw r11,8(r3) 115 stw r11,12(r3) 116 addi r3,r3,0x10 117 bdnz L(word_loop) 118 119L(dcbz_pre_loop): 120 srwi r6,r5,7 121 mtctr r6 122 addi r7,0,0 123 124L(dcbz_loop): 125 dcbz r3,r7 126 addi r3,r3,0x80 127 subi r5,r5,0x80 128 bdnz L(dcbz_loop) 129 srwi. r6,r5,4 130 beq L(postword2_count_loop) 131 mtctr r6 132 133L(postword8_count_loop): 134 stw r11,0(r3) 135 subi r5,r5,0x10 136 stw r11,4(r3) 137 stw r11,8(r3) 138 stw r11,12(r3) 139 addi r3,r3,0x10 140 bdnz L(postword8_count_loop) 141 142L(postword2_count_loop): 143 clrlwi. r7,r5,28 144 beq L(end_memset) 145 mr r8,r11 146 mr r9,r11 147 mr r10,r11 148 mtxer r7 149 stswx r8,0,r3 150 b L(end_memset) 151END (memset) 152libc_hidden_builtin_def (memset) 153