1/* strcat (dest, src) -- Append SRC on the end of DEST. 2 For SPARC v9. 3 Copyright (C) 1998-2022 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include <asm/asi.h> 22#ifndef XCC 23#define XCC xcc 24#define USE_BPR 25 .register %g2, #scratch 26 .register %g3, #scratch 27 .register %g6, #scratch 28#endif 29 30 /* Normally, this uses 31 ((xword - 0x0101010101010101) & 0x8080808080808080) test 32 to find out if any byte in xword could be zero. This is fast, but 33 also gives false alarm for any byte in range 0x81-0xff. It does 34 not matter for correctness, as if this test tells us there could 35 be some zero byte, we check it byte by byte, but if bytes with 36 high bits set are common in the strings, then this will give poor 37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm 38 will use one tick slower, but more precise test 39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), 40 which does not give any false alarms (but if some bits are set, 41 one cannot assume from it which bytes are zero and which are not). 42 It is yet to be measured, what is the correct default for glibc 43 in these days for an average user. 44 */ 45 46 .text 47 .align 32 48ENTRY(strcat) 49 sethi %hi(0x01010101), %g1 /* IEU0 Group */ 50 ldub [%o0], %o3 /* Load */ 51 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ 52 mov %o0, %g6 /* IEU1 */ 53 54 sllx %g1, 32, %g2 /* IEU0 Group */ 55 andcc %o0, 7, %g0 /* IEU1 */ 56 or %g1, %g2, %g1 /* IEU0 Group */ 57 bne,pn %icc, 32f /* CTI */ 58 59 sllx %g1, 7, %g2 /* IEU0 Group */ 60 brz,pn %o3, 30f /* CTI+IEU1 */ 61 ldx [%o0], %o3 /* Load */ 6248: add %o0, 8, %o0 /* IEU0 Group */ 63 6449: sub %o3, %g1, %o2 /* IEU0 Group */ 65#ifdef EIGHTBIT_NOT_RARE 66 andn %o2, %o3, %g5 /* IEU0 Group */ 67 ldxa [%o0] ASI_PNF, %o3 /* Load */ 68 andcc %g5, %g2, %g0 /* IEU1 Group */ 69#else 70 ldxa [%o0] ASI_PNF, %o3 /* Load */ 71 andcc %o2, %g2, %g0 /* IEU1 Group */ 72#endif 73 be,pt %xcc, 49b /* CTI */ 74 75 add %o0, 8, %o0 /* IEU0 */ 76 addcc %o2, %g1, %g3 /* IEU1 Group */ 77 srlx %o2, 32, %o2 /* IEU0 */ 7850: andcc %o2, %g2, %g0 /* IEU1 Group */ 79 80 be,pn %xcc, 51f /* CTI */ 81 srlx %g3, 56, %o2 /* IEU0 */ 82 andcc %o2, 0xff, %g0 /* IEU1 Group */ 83 be,pn %icc, 29f /* CTI */ 84 85 srlx %g3, 48, %o2 /* IEU0 */ 86 andcc %o2, 0xff, %g0 /* IEU1 Group */ 87 be,pn %icc, 28f /* CTI */ 88 srlx %g3, 40, %o2 /* IEU0 */ 89 90 andcc %o2, 0xff, %g0 /* IEU1 Group */ 91 be,pn %icc, 27f /* CTI */ 92 srlx %g3, 32, %o2 /* IEU0 */ 93 andcc %o2, 0xff, %g0 /* IEU1 Group */ 94 95 be,pn %icc, 26f /* CTI */ 9651: srlx %g3, 24, %o2 /* IEU0 */ 97 andcc %o2, 0xff, %g0 /* IEU1 Group */ 98 be,pn %icc, 25f /* CTI */ 99 100 srlx %g3, 16, %o2 /* IEU0 */ 101 andcc %o2, 0xff, %g0 /* IEU1 Group */ 102 be,pn %icc, 24f /* CTI */ 103 srlx %g3, 8, %o2 /* IEU0 */ 104 105 andcc %o2, 0xff, %g0 /* IEU1 Group */ 106 be,pn %icc, 23f /* CTI */ 107 sub %o3, %g1, %o2 /* IEU0 */ 108 andcc %g3, 0xff, %g0 /* IEU1 Group */ 109 110 be,pn %icc, 52f /* CTI */ 111 ldxa [%o0] ASI_PNF, %o3 /* Load */ 112 andcc %o2, %g2, %g0 /* IEU1 Group */ 113 be,pt %xcc, 49b /* CTI */ 114 115 add %o0, 8, %o0 /* IEU0 */ 116 addcc %o2, %g1, %g3 /* IEU1 Group */ 117 ba,pt %xcc, 50b /* CTI */ 118 srlx %o2, 32, %o2 /* IEU0 */ 119 120 .align 16 12152: ba,pt %xcc, 12f /* CTI Group */ 122 add %o0, -9, %o0 /* IEU0 */ 12323: ba,pt %xcc, 12f /* CTI Group */ 124 add %o0, -10, %o0 /* IEU0 */ 125 12624: ba,pt %xcc, 12f /* CTI Group */ 127 add %o0, -11, %o0 /* IEU0 */ 12825: ba,pt %xcc, 12f /* CTI Group */ 129 add %o0, -12, %o0 /* IEU0 */ 130 13126: ba,pt %xcc, 12f /* CTI Group */ 132 add %o0, -13, %o0 /* IEU0 */ 13327: ba,pt %xcc, 12f /* CTI Group */ 134 add %o0, -14, %o0 /* IEU0 */ 135 13628: ba,pt %xcc, 12f /* CTI Group */ 137 add %o0, -15, %o0 /* IEU0 */ 13829: add %o0, -16, %o0 /* IEU0 Group */ 13930: andcc %o1, 7, %g3 /* IEU1 */ 140 14131: bne,pn %icc, 14f /* CTI */ 142 orcc %g0, 64, %g4 /* IEU1 Group */ 1431: ldx [%o1], %o3 /* Load */ 144 add %o1, 8, %o1 /* IEU1 */ 145 1462: mov %o3, %g3 /* IEU0 Group */ 1473: sub %o3, %g1, %o2 /* IEU1 */ 148 ldxa [%o1] ASI_PNF, %o3 /* Load */ 149#ifdef EIGHTBIT_NOT_RARE 150 andn %o2, %g3, %o2 /* IEU0 Group */ 151#endif 152 add %o0, 8, %o0 /* IEU0 Group */ 153 154 andcc %o2, %g2, %g0 /* IEU1 */ 155 add %o1, 8, %o1 /* IEU0 Group */ 156 be,a,pt %xcc, 2b /* CTI */ 157 stx %g3, [%o0 - 8] /* Store */ 158 159 srlx %g3, 56, %g5 /* IEU0 Group */ 160 andcc %g5, 0xff, %g0 /* IEU1 Group */ 161 be,pn %icc, 11f /* CTI */ 162 srlx %g3, 48, %g4 /* IEU0 */ 163 164 andcc %g4, 0xff, %g0 /* IEU1 Group */ 165 be,pn %icc, 10f /* CTI */ 166 srlx %g3, 40, %g5 /* IEU0 */ 167 andcc %g5, 0xff, %g0 /* IEU1 Group */ 168 169 be,pn %icc, 9f /* CTI */ 170 srlx %g3, 32, %g4 /* IEU0 */ 171 andcc %g4, 0xff, %g0 /* IEU1 Group */ 172 be,pn %icc, 8f /* CTI */ 173 174 srlx %g3, 24, %g5 /* IEU0 */ 175 andcc %g5, 0xff, %g0 /* IEU1 Group */ 176 be,pn %icc, 7f /* CTI */ 177 srlx %g3, 16, %g4 /* IEU0 */ 178 179 andcc %g4, 0xff, %g0 /* IEU1 Group */ 180 be,pn %icc, 6f /* CTI */ 181 srlx %g3, 8, %g5 /* IEU0 */ 182 andcc %g5, 0xff, %g0 /* IEU1 Group */ 183 184 be,pn %icc, 5f /* CTI */ 185 sub %o3, %g1, %o2 /* IEU0 */ 186 stx %g3, [%o0 - 8] /* Store Group */ 187 andcc %g3, 0xff, %g0 /* IEU1 */ 188 189 bne,pt %icc, 3b /* CTI */ 190 mov %o3, %g3 /* IEU0 Group */ 1914: retl /* CTI+IEU1 Group */ 192 mov %g6, %o0 /* IEU0 */ 193 194 .align 16 1955: stb %g5, [%o0 - 2] /* Store Group */ 196 srlx %g3, 16, %g4 /* IEU0 */ 1976: sth %g4, [%o0 - 4] /* Store Group */ 198 srlx %g3, 32, %g4 /* IEU0 */ 199 200 stw %g4, [%o0 - 8] /* Store Group */ 201 retl /* CTI+IEU1 Group */ 202 mov %g6, %o0 /* IEU0 */ 2037: stb %g5, [%o0 - 4] /* Store Group */ 204 205 srlx %g3, 32, %g4 /* IEU0 */ 2068: stw %g4, [%o0 - 8] /* Store Group */ 207 retl /* CTI+IEU1 Group */ 208 mov %g6, %o0 /* IEU0 */ 209 2109: stb %g5, [%o0 - 6] /* Store Group */ 211 srlx %g3, 48, %g4 /* IEU0 */ 21210: sth %g4, [%o0 - 8] /* Store Group */ 213 retl /* CTI+IEU1 Group */ 214 215 mov %g6, %o0 /* IEU0 */ 21611: stb %g5, [%o0 - 8] /* Store Group */ 217 retl /* CTI+IEU1 Group */ 218 mov %g6, %o0 /* IEU0 */ 219 220 .align 16 22132: andcc %o0, 7, %g0 /* IEU1 Group */ 222 be,a,pn %icc, 48b /* CTI */ 223 ldx [%o0], %o3 /* Load */ 224 add %o0, 1, %o0 /* IEU0 Group */ 225 226 brnz,a,pt %o3, 32b /* CTI+IEU1 */ 227 lduba [%o0] ASI_PNF, %o3 /* Load */ 228 add %o0, -1, %o0 /* IEU0 Group */ 229 andcc %o0, 7, %g0 /* IEU1 Group */ 230 231 be,a,pn %icc, 31b /* CTI */ 232 andcc %o1, 7, %g3 /* IEU1 Group */ 23312: ldub [%o1], %o3 /* Load */ 234 stb %o3, [%o0] /* Store Group */ 235 23613: add %o0, 1, %o0 /* IEU0 */ 237 add %o1, 1, %o1 /* IEU1 */ 238 andcc %o3, 0xff, %g0 /* IEU1 Group */ 239 be,pn %icc, 4b /* CTI */ 240 241 lduba [%o1] ASI_PNF, %o3 /* Load */ 242 andcc %o0, 7, %g0 /* IEU1 Group */ 243 bne,a,pt %icc, 13b /* CTI */ 244 stb %o3, [%o0] /* Store */ 245 246 andcc %o1, 7, %g3 /* IEU1 Group */ 247 be,a,pt %icc, 1b /* CTI */ 248 ldx [%o1], %o3 /* Load */ 249 orcc %g0, 64, %g4 /* IEU1 Group */ 250 25114: sllx %g3, 3, %g5 /* IEU0 */ 252 sub %o1, %g3, %o1 /* IEU0 Group */ 253 sub %g4, %g5, %g4 /* IEU1 */ 254 /* %g1 = 0101010101010101 * 255 * %g2 = 8080808080808080 * 256 * %g3 = source alignment * 257 * %g5 = number of bits to shift left * 258 * %g4 = number of bits to shift right */ 259 ldxa [%o1] ASI_PNF, %o5 /* Load Group */ 260 261 addcc %o1, 8, %o1 /* IEU1 */ 26215: sllx %o5, %g5, %o3 /* IEU0 Group */ 263 ldxa [%o1] ASI_PNF, %o5 /* Load */ 264 srlx %o5, %g4, %o4 /* IEU0 Group */ 265 266 add %o0, 8, %o0 /* IEU1 */ 267 or %o3, %o4, %o3 /* IEU0 Group */ 268 add %o1, 8, %o1 /* IEU1 */ 269 sub %o3, %g1, %o4 /* IEU0 Group */ 270 271#ifdef EIGHTBIT_NOT_RARE 272 andn %o4, %o3, %o4 /* IEU0 Group */ 273#endif 274 andcc %o4, %g2, %g0 /* IEU1 Group */ 275 be,a,pt %xcc, 15b /* CTI */ 276 stx %o3, [%o0 - 8] /* Store */ 277 srlx %o3, 56, %o4 /* IEU0 Group */ 278 279 andcc %o4, 0xff, %g0 /* IEU1 Group */ 280 be,pn %icc, 22f /* CTI */ 281 srlx %o3, 48, %o4 /* IEU0 */ 282 andcc %o4, 0xff, %g0 /* IEU1 Group */ 283 284 be,pn %icc, 21f /* CTI */ 285 srlx %o3, 40, %o4 /* IEU0 */ 286 andcc %o4, 0xff, %g0 /* IEU1 Group */ 287 be,pn %icc, 20f /* CTI */ 288 289 srlx %o3, 32, %o4 /* IEU0 */ 290 andcc %o4, 0xff, %g0 /* IEU1 Group */ 291 be,pn %icc, 19f /* CTI */ 292 srlx %o3, 24, %o4 /* IEU0 */ 293 294 andcc %o4, 0xff, %g0 /* IEU1 Group */ 295 be,pn %icc, 18f /* CTI */ 296 srlx %o3, 16, %o4 /* IEU0 */ 297 andcc %o4, 0xff, %g0 /* IEU1 Group */ 298 299 be,pn %icc, 17f /* CTI */ 300 srlx %o3, 8, %o4 /* IEU0 */ 301 andcc %o4, 0xff, %g0 /* IEU1 Group */ 302 be,pn %icc, 16f /* CTI */ 303 304 andcc %o3, 0xff, %g0 /* IEU1 Group */ 305 bne,pn %icc, 15b /* CTI */ 306 stx %o3, [%o0 - 8] /* Store */ 307 retl /* CTI+IEU1 Group */ 308 309 mov %g6, %o0 /* IEU0 */ 310 311 .align 16 31216: srlx %o3, 8, %o4 /* IEU0 Group */ 313 stb %o4, [%o0 - 2] /* Store */ 31417: srlx %o3, 16, %o4 /* IEU0 Group */ 315 stb %o4, [%o0 - 3] /* Store */ 316 31718: srlx %o3, 24, %o4 /* IEU0 Group */ 318 stb %o4, [%o0 - 4] /* Store */ 31919: srlx %o3, 32, %o4 /* IEU0 Group */ 320 stw %o4, [%o0 - 8] /* Store */ 321 322 retl /* CTI+IEU1 Group */ 323 mov %g6, %o0 /* IEU0 */ 324 nop 325 nop 326 32720: srlx %o3, 40, %o4 /* IEU0 Group */ 328 stb %o4, [%o0 - 6] /* Store */ 32921: srlx %o3, 48, %o4 /* IEU0 Group */ 330 stb %o4, [%o0 - 7] /* Store */ 331 33222: srlx %o3, 56, %o4 /* IEU0 Group */ 333 stb %o4, [%o0 - 8] /* Store */ 334 retl /* CTI+IEU1 Group */ 335 mov %g6, %o0 /* IEU0 */ 336END(strcat) 337libc_hidden_builtin_def (strcat) 338