1 /* Copyright (C) 2010-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library. If not, see
16 <https://www.gnu.org/licenses/>. */
17
18 #include <string.h>
19
20 typedef unsigned long word;
21
22 static inline word
ldq_u(const void * s)23 ldq_u(const void *s)
24 {
25 return *(const word *)((word)s & -8);
26 }
27
28 #define unlikely(X) __builtin_expect ((X), 0)
29 #define prefetch(X) __builtin_prefetch ((void *)(X), 0)
30
31 #define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X))
32 #define find(X, Y) cmpbeq0 ((X) ^ (Y))
33
34 /* Search no more than N bytes of S for C. */
35
36 void *
__memchr(const void * s,int xc,size_t n)37 __memchr (const void *s, int xc, size_t n)
38 {
39 const word *s_align;
40 word t, current, found, mask, offset;
41
42 if (unlikely (n == 0))
43 return 0;
44
45 current = ldq_u (s);
46
47 /* Replicate low byte of XC into all bytes of C. */
48 t = xc & 0xff; /* 0000000c */
49 t = (t << 8) | t; /* 000000cc */
50 t = (t << 16) | t; /* 0000cccc */
51 const word c = (t << 32) | t; /* cccccccc */
52
53 /* Align the source, and decrement the count by the number
54 of bytes searched in the first word. */
55 s_align = (const word *)((word)s & -8);
56 {
57 size_t inc = n + ((word)s & 7);
58 n = inc | -(inc < n);
59 }
60
61 /* Deal with misalignment in the first word for the comparison. */
62 mask = (1ul << ((word)s & 7)) - 1;
63
64 /* If the entire string fits within one word, we may need masking
65 at both the front and the back of the string. */
66 if (unlikely (n <= 8))
67 {
68 mask |= -1ul << n;
69 goto last_quad;
70 }
71
72 found = find (current, c) & ~mask;
73 if (unlikely (found))
74 goto found_it;
75
76 s_align++;
77 n -= 8;
78
79 /* If the block is sufficiently large, align to cacheline and prefetch. */
80 if (unlikely (n >= 256))
81 {
82 /* Prefetch 3 cache lines beyond the one we're working on. */
83 prefetch (s_align + 8);
84 prefetch (s_align + 16);
85 prefetch (s_align + 24);
86
87 while ((word)s_align & 63)
88 {
89 current = *s_align;
90 found = find (current, c);
91 if (found)
92 goto found_it;
93 s_align++;
94 n -= 8;
95 }
96
97 /* Within each cacheline, advance the load for the next word
98 before the test for the previous word is complete. This
99 allows us to hide the 3 cycle L1 cache load latency. We
100 only perform this advance load within a cacheline to prevent
101 reading across page boundary. */
102 #define CACHELINE_LOOP \
103 do { \
104 word i, next = s_align[0]; \
105 for (i = 0; i < 7; ++i) \
106 { \
107 current = next; \
108 next = s_align[1]; \
109 found = find (current, c); \
110 if (unlikely (found)) \
111 goto found_it; \
112 s_align++; \
113 } \
114 current = next; \
115 found = find (current, c); \
116 if (unlikely (found)) \
117 goto found_it; \
118 s_align++; \
119 n -= 64; \
120 } while (0)
121
122 /* While there's still lots more data to potentially be read,
123 continue issuing prefetches for the 4th cacheline out. */
124 while (n >= 256)
125 {
126 prefetch (s_align + 24);
127 CACHELINE_LOOP;
128 }
129
130 /* Up to 3 cache lines remaining. Continue issuing advanced
131 loads, but stop prefetching. */
132 while (n >= 64)
133 CACHELINE_LOOP;
134
135 /* We may have exhausted the buffer. */
136 if (n == 0)
137 return NULL;
138 }
139
140 /* Quadword aligned loop. */
141 current = *s_align;
142 while (n > 8)
143 {
144 found = find (current, c);
145 if (unlikely (found))
146 goto found_it;
147 current = *++s_align;
148 n -= 8;
149 }
150
151 /* The last word may need masking at the tail of the compare. */
152 mask = -1ul << n;
153 last_quad:
154 found = find (current, c) & ~mask;
155 if (found == 0)
156 return NULL;
157
158 found_it:
159 #ifdef __alpha_cix__
160 offset = __builtin_alpha_cttz (found);
161 #else
162 /* Extract LSB. */
163 found &= -found;
164
165 /* Binary search for the LSB. */
166 offset = (found & 0x0f ? 0 : 4);
167 offset += (found & 0x33 ? 0 : 2);
168 offset += (found & 0x55 ? 0 : 1);
169 #endif
170
171 return (void *)((word)s_align + offset);
172 }
173
174 #ifdef weak_alias
175 weak_alias (__memchr, memchr)
176 #endif
177 libc_hidden_builtin_def (memchr)
178