1/* Copyright (C) 1996-2022 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 The GNU C Library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Lesser General Public 6 License as published by the Free Software Foundation; either 7 version 2.1 of the License, or (at your option) any later version. 8 9 The GNU C Library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public 15 License along with the GNU C Library. If not, see 16 <https://www.gnu.org/licenses/>. */ 17 18/* Bytewise compare two null-terminated strings of length no longer than N. */ 19 20#include <sysdep.h> 21 22 .set noat 23 .set noreorder 24 25/* EV6 only predicts one branch per octaword. We'll use these to push 26 subsequent branches back to the next bundle. This will generally add 27 a fetch+decode cycle to older machines, so skip in that case. */ 28#ifdef __alpha_fix__ 29# define ev6_unop unop 30#else 31# define ev6_unop 32#endif 33 34 .text 35 36ENTRY(strncmp) 37#ifdef PROF 38 ldgp gp, 0(pv) 39 lda AT, _mcount 40 jsr AT, (AT), _mcount 41 .prologue 1 42#else 43 .prologue 0 44#endif 45 46 xor a0, a1, t2 # are s1 and s2 co-aligned? 47 beq a2, $zerolength 48 ldq_u t0, 0(a0) # load asap to give cache time to catch up 49 ldq_u t1, 0(a1) 50 lda t3, -1 51 and t2, 7, t2 52 srl t3, 1, t6 53 and a0, 7, t4 # find s1 misalignment 54 and a1, 7, t5 # find s2 misalignment 55 cmovlt a2, t6, a2 # bound neg count to LONG_MAX 56 addq a1, a2, a3 # s2+count 57 addq a2, t4, a2 # bias count by s1 misalignment 58 and a2, 7, t10 # ofs of last byte in s1 last word 59 srl a2, 3, a2 # remaining full words in s1 count 60 bne t2, $unaligned 61 62 /* On entry to this basic block: 63 t0 == the first word of s1. 64 t1 == the first word of s2. 65 t3 == -1. */ 66$aligned: 67 mskqh t3, a1, t8 # mask off leading garbage 68 ornot t1, t8, t1 69 ornot t0, t8, t0 70 cmpbge zero, t1, t7 # bits set iff null found 71 beq a2, $eoc # check end of count 72 bne t7, $eos 73 beq t10, $ant_loop 74 75 /* Aligned compare main loop. 76 On entry to this basic block: 77 t0 == an s1 word. 78 t1 == an s2 word not containing a null. */ 79 80 .align 4 81$a_loop: 82 xor t0, t1, t2 # e0 : 83 bne t2, $wordcmp # .. e1 (zdb) 84 ldq_u t1, 8(a1) # e0 : 85 ldq_u t0, 8(a0) # .. e1 : 86 87 subq a2, 1, a2 # e0 : 88 addq a1, 8, a1 # .. e1 : 89 addq a0, 8, a0 # e0 : 90 beq a2, $eoc # .. e1 : 91 92 cmpbge zero, t1, t7 # e0 : 93 beq t7, $a_loop # .. e1 : 94 95 br $eos 96 97 /* Alternate aligned compare loop, for when there's no trailing 98 bytes on the count. We have to avoid reading too much data. */ 99 .align 4 100$ant_loop: 101 xor t0, t1, t2 # e0 : 102 ev6_unop 103 ev6_unop 104 bne t2, $wordcmp # .. e1 (zdb) 105 106 subq a2, 1, a2 # e0 : 107 beq a2, $zerolength # .. e1 : 108 ldq_u t1, 8(a1) # e0 : 109 ldq_u t0, 8(a0) # .. e1 : 110 111 addq a1, 8, a1 # e0 : 112 addq a0, 8, a0 # .. e1 : 113 cmpbge zero, t1, t7 # e0 : 114 beq t7, $ant_loop # .. e1 : 115 116 br $eos 117 118 /* The two strings are not co-aligned. Align s1 and cope. */ 119 /* On entry to this basic block: 120 t0 == the first word of s1. 121 t1 == the first word of s2. 122 t3 == -1. 123 t4 == misalignment of s1. 124 t5 == misalignment of s2. 125 t10 == misalignment of s1 end. */ 126 .align 4 127$unaligned: 128 /* If s1 misalignment is larger than s2 misalignment, we need 129 extra startup checks to avoid SEGV. */ 130 subq a1, t4, a1 # adjust s2 for s1 misalignment 131 cmpult t4, t5, t9 132 subq a3, 1, a3 # last byte of s2 133 bic a1, 7, t8 134 mskqh t3, t5, t7 # mask garbage in s2 135 subq a3, t8, a3 136 ornot t1, t7, t7 137 srl a3, 3, a3 # remaining full words in s2 count 138 beq t9, $u_head 139 140 /* Failing that, we need to look for both eos and eoc within the 141 first word of s2. If we find either, we can continue by 142 pretending that the next word of s2 is all zeros. */ 143 lda t2, 0 # next = zero 144 cmpeq a3, 0, t8 # eoc in the first word of s2? 145 cmpbge zero, t7, t7 # eos in the first word of s2? 146 or t7, t8, t8 147 bne t8, $u_head_nl 148 149 /* We know just enough now to be able to assemble the first 150 full word of s2. We can still find a zero at the end of it. 151 152 On entry to this basic block: 153 t0 == first word of s1 154 t1 == first partial word of s2. 155 t3 == -1. 156 t10 == ofs of last byte in s1 last word. 157 t11 == ofs of last byte in s2 last word. */ 158$u_head: 159 ldq_u t2, 8(a1) # load second partial s2 word 160 subq a3, 1, a3 161$u_head_nl: 162 extql t1, a1, t1 # create first s2 word 163 mskqh t3, a0, t8 164 extqh t2, a1, t4 165 ornot t0, t8, t0 # kill s1 garbage 166 or t1, t4, t1 # s2 word now complete 167 cmpbge zero, t0, t7 # find eos in first s1 word 168 ornot t1, t8, t1 # kill s2 garbage 169 beq a2, $eoc 170 subq a2, 1, a2 171 bne t7, $eos 172 mskql t3, a1, t8 # mask out s2[1] bits we have seen 173 xor t0, t1, t4 # compare aligned words 174 or t2, t8, t8 175 bne t4, $wordcmp 176 cmpbge zero, t8, t7 # eos in high bits of s2[1]? 177 cmpeq a3, 0, t8 # eoc in s2[1]? 178 or t7, t8, t7 179 bne t7, $u_final 180 181 /* Unaligned copy main loop. In order to avoid reading too much, 182 the loop is structured to detect zeros in aligned words from s2. 183 This has, unfortunately, effectively pulled half of a loop 184 iteration out into the head and half into the tail, but it does 185 prevent nastiness from accumulating in the very thing we want 186 to run as fast as possible. 187 188 On entry to this basic block: 189 t2 == the unshifted low-bits from the next s2 word. 190 t10 == ofs of last byte in s1 last word. 191 t11 == ofs of last byte in s2 last word. */ 192 .align 4 193$u_loop: 194 extql t2, a1, t3 # e0 : 195 ldq_u t2, 16(a1) # .. e1 : load next s2 high bits 196 ldq_u t0, 8(a0) # e0 : load next s1 word 197 addq a1, 8, a1 # .. e1 : 198 199 addq a0, 8, a0 # e0 : 200 subq a3, 1, a3 # .. e1 : 201 extqh t2, a1, t1 # e0 : 202 cmpbge zero, t0, t7 # .. e1 : eos in current s1 word 203 204 or t1, t3, t1 # e0 : 205 beq a2, $eoc # .. e1 : eoc in current s1 word 206 subq a2, 1, a2 # e0 : 207 cmpbge zero, t2, t4 # .. e1 : eos in s2[1] 208 209 xor t0, t1, t3 # e0 : compare the words 210 ev6_unop 211 ev6_unop 212 bne t7, $eos # .. e1 : 213 214 cmpeq a3, 0, t5 # e0 : eoc in s2[1] 215 ev6_unop 216 ev6_unop 217 bne t3, $wordcmp # .. e1 : 218 219 or t4, t5, t4 # e0 : eos or eoc in s2[1]. 220 beq t4, $u_loop # .. e1 (zdb) 221 222 /* We've found a zero in the low bits of the last s2 word. Get 223 the next s1 word and align them. */ 224 .align 3 225$u_final: 226 ldq_u t0, 8(a0) 227 extql t2, a1, t1 228 cmpbge zero, t1, t7 229 bne a2, $eos 230 231 /* We've hit end of count. Zero everything after the count 232 and compare whats left. */ 233 .align 3 234$eoc: 235 mskql t0, t10, t0 236 mskql t1, t10, t1 237 cmpbge zero, t1, t7 238 239 /* We've found a zero somewhere in a word we just read. 240 On entry to this basic block: 241 t0 == s1 word 242 t1 == s2 word 243 t7 == cmpbge mask containing the zero. */ 244 .align 3 245$eos: 246 negq t7, t6 # create bytemask of valid data 247 and t6, t7, t8 248 subq t8, 1, t6 249 or t6, t8, t7 250 zapnot t0, t7, t0 # kill the garbage 251 zapnot t1, t7, t1 252 xor t0, t1, v0 # ... and compare 253 beq v0, $done 254 255 /* Here we have two differing co-aligned words in t0 & t1. 256 Bytewise compare them and return (t0 > t1 ? 1 : -1). */ 257 .align 3 258$wordcmp: 259 cmpbge t0, t1, t2 # comparison yields bit mask of ge 260 cmpbge t1, t0, t3 261 xor t2, t3, t0 # bits set iff t0/t1 bytes differ 262 negq t0, t1 # clear all but least bit 263 and t0, t1, t0 264 lda v0, -1 265 and t0, t2, t1 # was bit set in t0 > t1? 266 cmovne t1, 1, v0 267$done: 268 ret 269 270 .align 3 271$zerolength: 272 clr v0 273 ret 274 275 END(strncmp) 276libc_hidden_builtin_def (strncmp) 277