1/* Vector optimized 32/64 bit S/390 version of strstr. 2 Copyright (C) 2019-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <ifunc-strstr.h> 20#if HAVE_STRSTR_ARCH13 21# include "sysdep.h" 22# include "asm-syntax.h" 23 .text 24 25/* char *strstr (const char *haystack=r2, const char *needle=r3) 26 Locate a substring. */ 27ENTRY(STRSTR_ARCH13) 28 .machine "arch13" 29 .machinemode "zarch_nohighgprs" 30 lcbb %r1,0(%r3),6 31 jo .Lneedle_on_bb /* Needle on block-boundary? */ 32 vl %v18,0(%r3),6 /* Load needle. */ 33 vfenezb %v19,%v18,%v18 /* v19[7] contains the length of needle. */ 34.Lneedle_loaded: 35 vlgvb %r4,%v19,7 /* Get index of zero or 16 if not found. */ 36 lghi %r5,17 /* See below: min-skip-partial-match-index. */ 37 cgibe %r4,0,0(%r14) /* Test if needle is zero and return. */ 38 39 /* The vstrs instruction is able to handle needles up to a length of 16, 40 but then we may have to load the next part of haystack with a 41 small offset. This will be slow - see examples: 42 haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma 43 needle = mmmmmmmmmmmmmma0 44 => needle_len=15; vstrs reports a partial match; haystack+=2 45 haystack =mmmmmmmmmmmmmmmm mmmmmmmmmmmmmmmmmm...mmmmmmmmmmmmmmmmmmma 46 needle = mmmmmmmma0000000 47 => needle_len=9; vstrs reports a partial match; haystack+=8 */ 48# if ! HAVE_STRSTR_Z13 49# error The arch13 variant of strstr needs the z13 variant of strstr! 50# endif 51 clgfi %r4,9 52 jgh STRSTR_Z13 53 54 /* In case of a partial match, the vstrs instruction returns the index 55 of the partial match in a vector-register. Then we have to 56 reload the string at the "current-position plus this index" and run 57 vstrs again in order to determine if it was a full match or no match. 58 Transferring this index from vr to gr, compute the haystack-address 59 and loading with vl is quite slow as all instructions have data 60 dependencies. Thus we assume, that a partial match is always at the 61 first possible index and just load the next part of haystack from 62 there instead of waiting until the correct index is computed: 63 min-skip-partial-match-index = (16 - n_len) + 1 */ 64 sgr %r5,%r4 65 66.Lloop: 67 lcbb %r1,0(%r2),6 68 jo .Lloop_haystack_on_bb /* Haystack on block-boundary? */ 69 vl %v16,0(%r2) /* Load next part of haystack. */ 70.Lloop_haystack_loaded: 71 /* Vector string search with zero search (cc=0 => no match). */ 72 vstrs %v20,%v16,%v18,%v19,0,2 73 jne .Lloop_vstrs_nonzero_cc 74 lcbb %r1,16(%r2),6 /* Next part of haystack. */ 75 jo .Lloop_haystack_on_bb16 76 vl %v16,16(%r2) 77 vstrs %v20,%v16,%v18,%v19,0,2 78 jne .Lloop_vstrs_nonzero_cc16 79 lcbb %r1,32(%r2),6 /* Next part of haystack. */ 80 jo .Lloop_haystack_on_bb32 81 vl %v16,32(%r2) 82 vstrs %v20,%v16,%v18,%v19,0,2 83 jne .Lloop_vstrs_nonzero_cc32 84 lcbb %r1,48(%r2),6 /* Next part of haystack. */ 85 jo .Lloop_haystack_on_bb48 86 vl %v16,48(%r2) 87 vstrs %v20,%v16,%v18,%v19,0,2 88 jne .Lloop_vstrs_nonzero_cc48 89 la %r2,64(%r2) 90 j .Lloop 91 92.Lloop_vstrs_nonzero_cc48: 93 la %r2,16(%r2) 94.Lloop_vstrs_nonzero_cc32: 95 la %r2,16(%r2) 96.Lloop_vstrs_nonzero_cc16: 97 la %r2,16(%r2) 98.Lloop_vstrs_nonzero_cc: 99 jh .Lend_match_found /* cc == 2 (full match) */ 100 jl .Lend_no_match /* cc == 1 (no match, end of string) */ 101 /* cc == 3 (partial match) See above: min-skip-partial-match-index! */ 102 lcbb %r1,0(%r5,%r2),6 103 la %r2,0(%r5,%r2) 104 jo .Lloop_haystack_on_bb 105 vl %v16,0(%r2) 106 vstrs %v20,%v16,%v18,%v19,0,2 107.Lloop_vstrs_nonzero_cc_loop: 108 jh .Lend_match_found 109 jl .Lend_no_match 110 la %r2,0(%r5,%r2) 111 je .Lloop 112 lcbb %r1,0(%r2),6 /* Next part of haystack. */ 113 jo .Lloop_haystack_on_bb 114 vl %v16,0(%r2) 115 vstrs %v20,%v16,%v18,%v19,0,2 116 jh .Lend_match_found 117 jl .Lend_no_match 118 la %r2,0(%r5,%r2) 119 je .Lloop 120 lcbb %r1,0(%r2),6 /* Next part of haystack. */ 121 jo .Lloop_haystack_on_bb 122 vl %v16,0(%r2) 123 vstrs %v20,%v16,%v18,%v19,0,2 124 jh .Lend_match_found 125 jl .Lend_no_match 126 la %r2,0(%r5,%r2) 127 je .Lloop 128 lcbb %r1,0(%r2),6 /* Next part of haystack. */ 129 jo .Lloop_haystack_on_bb 130 vl %v16,0(%r2) 131 vstrs %v20,%v16,%v18,%v19,0,2 132 j .Lloop_vstrs_nonzero_cc_loop 133 134.Lend_no_match: 135 lghi %r2,0 136 br %r14 137.Lend_match_found: 138 vlgvb %r4,%v20,7 139 la %r2,0(%r4,%r2) 140 br %r14 141 142.Lloop_haystack_on_bb48: 143 la %r2,16(%r2) 144.Lloop_haystack_on_bb32: 145 la %r2,16(%r2) 146.Lloop_haystack_on_bb16: 147 la %r2,16(%r2) 148.Lloop_haystack_on_bb: 149 /* Haystack located on page-boundary. */ 150 ahi %r1,-1 /* vll needs highest index instead of count. */ 151 vll %v16,%r1,0(%r2) 152 vlvgb %v21,%r1,7 153 vfenezb %v17,%v16,%v16 /* Search zero in loaded haystack bytes. */ 154 veclb %v17,%v21 /* Zero index <= loaded byte index? */ 155 jle .Lloop_haystack_loaded /* -> v16 contains full haystack. */ 156 vl %v16,0(%r2) /* Load haystack beyond page boundary. */ 157 j .Lloop_haystack_loaded 158 159.Lneedle_on_bb: 160 /* Needle located on page-boundary. */ 161 ahi %r1,-1 /* vll needs highest index instead of count. */ 162 vll %v18,%r1,0(%r3) 163 vlvgb %v21,%r1,7 164 vfenezb %v19,%v18,%v18 /* Search zero in loaded needle bytes. */ 165 veclb %v19,%v21 /* Zero index <= max loaded byte index? */ 166 jle .Lneedle_loaded /* -> v18 contains full needle. */ 167 vl %v18,0(%r3) /* Load needle beyond page boundary. */ 168 vfenezb %v19,%v18,%v18 169 j .Lneedle_loaded 170END(STRSTR_ARCH13) 171 172# if ! HAVE_STRSTR_IFUNC 173strong_alias (STRSTR_ARCH13, strstr) 174# endif 175 176# if STRSTR_Z13_ONLY_USED_AS_FALLBACK && defined SHARED && IS_IN (libc) 177strong_alias (STRSTR_ARCH13, __GI_strstr) 178# endif 179#endif 180