1/* Optimized strcmp implementation for POWER7/PowerPC64. 2 Copyright (C) 2010-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21#ifndef STRNCMP 22# define STRNCMP strncmp 23#endif 24 25/* See strlen.s for comments on how the end-of-string testing works. */ 26 27/* int [r3] strncmp (const char *s1 [r3], 28 const char *s2 [r4], 29 size_t size [r5]) */ 30 31 .machine power7 32ENTRY_TOCLESS (STRNCMP, 5) 33 CALL_MCOUNT 3 34 35#define rTMP2 r0 36#define rRTN r3 37#define rSTR1 r3 /* first string arg */ 38#define rSTR2 r4 /* second string arg */ 39#define rN r5 /* max string length */ 40#define rWORD1 r6 /* current word in s1 */ 41#define rWORD2 r7 /* current word in s2 */ 42#define rWORD3 r10 43#define rWORD4 r11 44#define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ 45#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */ 46#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ 47#define rBITDIF r11 /* bits that differ in s1 & s2 words */ 48#define rTMP r12 49 50 dcbt 0,rSTR1 51 nop 52 or rTMP,rSTR2,rSTR1 53 lis r7F7F,0x7f7f 54 dcbt 0,rSTR2 55 nop 56 clrldi. rTMP,rTMP,61 57 cmpldi cr1,rN,0 58 lis rFEFE,-0x101 59 bne L(unaligned) 60/* We are doubleword aligned so set up for two loops. first a double word 61 loop, then fall into the byte loop if any residual. */ 62 srdi. rTMP,rN,3 63 clrldi rN,rN,61 64 addi rFEFE,rFEFE,-0x101 65 addi r7F7F,r7F7F,0x7f7f 66 cmpldi cr1,rN,0 67 beq L(unaligned) 68 69 mtctr rTMP 70 ld rWORD1,0(rSTR1) 71 ld rWORD2,0(rSTR2) 72 sldi rTMP,rFEFE,32 73 insrdi r7F7F,r7F7F,32,0 74 add rFEFE,rFEFE,rTMP 75 b L(g1) 76 77L(g0): 78 ldu rWORD1,8(rSTR1) 79 bne cr1,L(different) 80 ldu rWORD2,8(rSTR2) 81L(g1): add rTMP,rFEFE,rWORD1 82 nor rNEG,r7F7F,rWORD1 83 bdz L(tail) 84 and. rTMP,rTMP,rNEG 85 cmpd cr1,rWORD1,rWORD2 86 beq L(g0) 87 88/* OK. We've hit the end of the string. We need to be careful that 89 we don't compare two strings as different because of gunk beyond 90 the end of the strings... */ 91 92#ifdef __LITTLE_ENDIAN__ 93L(endstring): 94 addi rTMP2, rTMP, -1 95 beq cr1, L(equal) 96 andc rTMP2, rTMP2, rTMP 97 rldimi rTMP2, rTMP2, 1, 0 98 and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */ 99 and rWORD1, rWORD1, rTMP2 100 cmpd cr1, rWORD1, rWORD2 101 beq cr1, L(equal) 102 cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ 103 addi rNEG, rBITDIF, 1 104 orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ 105 sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ 106 andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ 107 andc rWORD2, rWORD2, rNEG 108 xor. rBITDIF, rWORD1, rWORD2 109 sub rRTN, rWORD1, rWORD2 110 blt L(highbit) 111 sradi rRTN, rRTN, 63 /* must return an int. */ 112 ori rRTN, rRTN, 1 113 blr 114L(equal): 115 li rRTN, 0 116 blr 117 118L(different): 119 ld rWORD1, -8(rSTR1) 120 cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */ 121 addi rNEG, rBITDIF, 1 122 orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */ 123 sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */ 124 andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */ 125 andc rWORD2, rWORD2, rNEG 126 xor. rBITDIF, rWORD1, rWORD2 127 sub rRTN, rWORD1, rWORD2 128 blt L(highbit) 129 sradi rRTN, rRTN, 63 130 ori rRTN, rRTN, 1 131 blr 132L(highbit): 133 sradi rRTN, rWORD2, 63 134 ori rRTN, rRTN, 1 135 blr 136 137#else 138L(endstring): 139 and rTMP,r7F7F,rWORD1 140 beq cr1,L(equal) 141 add rTMP,rTMP,r7F7F 142 xor. rBITDIF,rWORD1,rWORD2 143 andc rNEG,rNEG,rTMP 144 blt L(highbit) 145 cntlzd rBITDIF,rBITDIF 146 cntlzd rNEG,rNEG 147 addi rNEG,rNEG,7 148 cmpd cr1,rNEG,rBITDIF 149 sub rRTN,rWORD1,rWORD2 150 blt cr1,L(equal) 151 sradi rRTN,rRTN,63 /* must return an int. */ 152 ori rRTN,rRTN,1 153 blr 154L(equal): 155 li rRTN,0 156 blr 157 158L(different): 159 ld rWORD1,-8(rSTR1) 160 xor. rBITDIF,rWORD1,rWORD2 161 sub rRTN,rWORD1,rWORD2 162 blt L(highbit) 163 sradi rRTN,rRTN,63 164 ori rRTN,rRTN,1 165 blr 166L(highbit): 167 sradi rRTN,rWORD2,63 168 ori rRTN,rRTN,1 169 blr 170#endif 171 172/* Oh well. In this case, we just do a byte-by-byte comparison. */ 173 .align 4 174L(tail): 175 and. rTMP,rTMP,rNEG 176 cmpd cr1,rWORD1,rWORD2 177 bne L(endstring) 178 addi rSTR1,rSTR1,8 179 bne cr1,L(different) 180 addi rSTR2,rSTR2,8 181 cmpldi cr1,rN,0 182L(unaligned): 183 mtctr rN 184 ble cr1,L(ux) 185L(uz): 186 lbz rWORD1,0(rSTR1) 187 lbz rWORD2,0(rSTR2) 188 .align 4 189L(u1): 190 cmpdi cr1,rWORD1,0 191 bdz L(u4) 192 cmpd rWORD1,rWORD2 193 beq cr1,L(u4) 194 bne L(u4) 195 lbzu rWORD3,1(rSTR1) 196 lbzu rWORD4,1(rSTR2) 197 cmpdi cr1,rWORD3,0 198 bdz L(u3) 199 cmpd rWORD3,rWORD4 200 beq cr1,L(u3) 201 bne L(u3) 202 lbzu rWORD1,1(rSTR1) 203 lbzu rWORD2,1(rSTR2) 204 cmpdi cr1,rWORD1,0 205 bdz L(u4) 206 cmpd rWORD1,rWORD2 207 beq cr1,L(u4) 208 bne L(u4) 209 lbzu rWORD3,1(rSTR1) 210 lbzu rWORD4,1(rSTR2) 211 cmpdi cr1,rWORD3,0 212 bdz L(u3) 213 cmpd rWORD3,rWORD4 214 beq cr1,L(u3) 215 bne L(u3) 216 lbzu rWORD1,1(rSTR1) 217 lbzu rWORD2,1(rSTR2) 218 b L(u1) 219 220L(u3): sub rRTN,rWORD3,rWORD4 221 blr 222L(u4): sub rRTN,rWORD1,rWORD2 223 blr 224L(ux): 225 li rRTN,0 226 blr 227END (STRNCMP) 228libc_hidden_builtin_def (strncmp) 229