1/* Optimized strcmp implementation for PowerPC64. 2 Copyright (C) 2003-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21#ifndef STRNCMP 22# define STRNCMP strncmp 23#endif 24 25/* See strlen.s for comments on how the end-of-string testing works. */ 26 27/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ 28 29ENTRY_TOCLESS (STRNCMP, 4) 30 CALL_MCOUNT 3 31 32#define rTMP2 r0 33#define rRTN r3 34#define rSTR1 r3 /* first string arg */ 35#define rSTR2 r4 /* second string arg */ 36#define rN r5 /* max string length */ 37#define rWORD1 r6 /* current word in s1 */ 38#define rWORD2 r7 /* current word in s2 */ 39#define rWORD3 r10 40#define rWORD4 r11 41#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ 42#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ 43#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ 44#define rBITDIF r11 /* bits that differ in s1 & s2 words */ 45#define rTMP r12 46 47 dcbt 0,rSTR1 48 or rTMP, rSTR2, rSTR1 49 lis r7F7F, 0x7f7f 50 dcbt 0,rSTR2 51 clrldi. rTMP, rTMP, 61 52 cmpldi cr1, rN, 0 53 lis rFEFE, -0x101 54 bne L(unaligned) 55/* We are doubleword aligned so set up for two loops. first a double word 56 loop, then fall into the byte loop if any residual. */ 57 srdi. rTMP, rN, 3 58 clrldi rN, rN, 61 59 addi rFEFE, rFEFE, -0x101 60 addi r7F7F, r7F7F, 0x7f7f 61 cmpldi cr1, rN, 0 62 beq L(unaligned) 63 64 mtctr rTMP /* Power4 wants mtctr 1st in dispatch group. */ 65 ld rWORD1, 0(rSTR1) 66 ld rWORD2, 0(rSTR2) 67 sldi rTMP, rFEFE, 32 68 insrdi r7F7F, r7F7F, 32, 0 69 add rFEFE, rFEFE, rTMP 70 b L(g1) 71 72L(g0): 73 ldu rWORD1, 8(rSTR1) 74 bne- cr1, L(different) 75 ldu rWORD2, 8(rSTR2) 76L(g1): add rTMP, rFEFE, rWORD1 77 nor rNEG, r7F7F, rWORD1 78 bdz L(tail) 79 and. rTMP, rTMP, rNEG 80 cmpd cr1, rWORD1, rWORD2 81 beq+ L(g0) 82 83/* OK. We've hit the end of the string. We need to be careful that 84 we don't compare two strings as different because of gunk beyond 85 the end of the strings... */ 86 87#ifdef __LITTLE_ENDIAN__ 88L(endstring): 89 addi rTMP2, rTMP, -1 90 beq cr1, L(equal) 91 andc rTMP2, rTMP2, rTMP 92 rldimi rTMP2, rTMP2, 1, 0 93 and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ 94 and rWORD1, rWORD1, rTMP2 95 cmpd cr1, rWORD1, rWORD2 96 beq cr1, L(equal) 97 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ 98 neg rNEG, rBITDIF 99 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ 100 cntlzd rNEG, rNEG /* bitcount of the bit. */ 101 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ 102 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ 103 sld rWORD2, rWORD2, rNEG 104 xor. rBITDIF, rWORD1, rWORD2 105 sub rRTN, rWORD1, rWORD2 106 blt- L(highbit) 107 sradi rRTN, rRTN, 63 /* must return an int. */ 108 ori rRTN, rRTN, 1 109 blr 110L(equal): 111 li rRTN, 0 112 blr 113 114L(different): 115 ld rWORD1, -8(rSTR1) 116 xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */ 117 neg rNEG, rBITDIF 118 and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */ 119 cntlzd rNEG, rNEG /* bitcount of the bit. */ 120 andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */ 121 sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */ 122 sld rWORD2, rWORD2, rNEG 123 xor. rBITDIF, rWORD1, rWORD2 124 sub rRTN, rWORD1, rWORD2 125 blt- L(highbit) 126 sradi rRTN, rRTN, 63 127 ori rRTN, rRTN, 1 128 blr 129L(highbit): 130 sradi rRTN, rWORD2, 63 131 ori rRTN, rRTN, 1 132 blr 133 134#else 135L(endstring): 136 and rTMP, r7F7F, rWORD1 137 beq cr1, L(equal) 138 add rTMP, rTMP, r7F7F 139 xor. rBITDIF, rWORD1, rWORD2 140 andc rNEG, rNEG, rTMP 141 blt- L(highbit) 142 cntlzd rBITDIF, rBITDIF 143 cntlzd rNEG, rNEG 144 addi rNEG, rNEG, 7 145 cmpd cr1, rNEG, rBITDIF 146 sub rRTN, rWORD1, rWORD2 147 blt- cr1, L(equal) 148 sradi rRTN, rRTN, 63 /* must return an int. */ 149 ori rRTN, rRTN, 1 150 blr 151L(equal): 152 li rRTN, 0 153 blr 154 155L(different): 156 ld rWORD1, -8(rSTR1) 157 xor. rBITDIF, rWORD1, rWORD2 158 sub rRTN, rWORD1, rWORD2 159 blt- L(highbit) 160 sradi rRTN, rRTN, 63 161 ori rRTN, rRTN, 1 162 blr 163L(highbit): 164 sradi rRTN, rWORD2, 63 165 ori rRTN, rRTN, 1 166 blr 167#endif 168 169/* Oh well. In this case, we just do a byte-by-byte comparison. */ 170 .align 4 171L(tail): 172 and. rTMP, rTMP, rNEG 173 cmpd cr1, rWORD1, rWORD2 174 bne- L(endstring) 175 addi rSTR1, rSTR1, 8 176 bne- cr1, L(different) 177 addi rSTR2, rSTR2, 8 178 cmpldi cr1, rN, 0 179L(unaligned): 180 mtctr rN /* Power4 wants mtctr 1st in dispatch group */ 181 ble cr1, L(ux) 182L(uz): 183 lbz rWORD1, 0(rSTR1) 184 lbz rWORD2, 0(rSTR2) 185 .align 4 186L(u1): 187 cmpdi cr1, rWORD1, 0 188 bdz L(u4) 189 cmpd rWORD1, rWORD2 190 beq- cr1, L(u4) 191 bne- L(u4) 192 lbzu rWORD3, 1(rSTR1) 193 lbzu rWORD4, 1(rSTR2) 194 cmpdi cr1, rWORD3, 0 195 bdz L(u3) 196 cmpd rWORD3, rWORD4 197 beq- cr1, L(u3) 198 bne- L(u3) 199 lbzu rWORD1, 1(rSTR1) 200 lbzu rWORD2, 1(rSTR2) 201 cmpdi cr1, rWORD1, 0 202 bdz L(u4) 203 cmpd rWORD1, rWORD2 204 beq- cr1, L(u4) 205 bne- L(u4) 206 lbzu rWORD3, 1(rSTR1) 207 lbzu rWORD4, 1(rSTR2) 208 cmpdi cr1, rWORD3, 0 209 bdz L(u3) 210 cmpd rWORD3, rWORD4 211 beq- cr1, L(u3) 212 bne- L(u3) 213 lbzu rWORD1, 1(rSTR1) 214 lbzu rWORD2, 1(rSTR2) 215 b L(u1) 216 217L(u3): sub rRTN, rWORD3, rWORD4 218 blr 219L(u4): sub rRTN, rWORD1, rWORD2 220 blr 221L(ux): 222 li rRTN, 0 223 blr 224END (STRNCMP) 225libc_hidden_builtin_def (strncmp) 226