1 /* Copyright (C) 2010-2022 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8 
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library.  If not, see
16    <https://www.gnu.org/licenses/>.  */
17 
18 #include <string.h>
19 
20 typedef unsigned long word;
21 
22 static inline word
ldq_u(const void * s)23 ldq_u(const void *s)
24 {
25   return *(const word *)((word)s & -8);
26 }
27 
28 #define unlikely(X)	__builtin_expect ((X), 0)
29 #define prefetch(X)	__builtin_prefetch ((void *)(X), 0)
30 
31 #define cmpbeq0(X)	__builtin_alpha_cmpbge(0, (X))
32 #define find(X, Y)	cmpbeq0 ((X) ^ (Y))
33 
34 /* Search no more than N bytes of S for C.  */
35 
36 void *
__memchr(const void * s,int xc,size_t n)37 __memchr (const void *s, int xc, size_t n)
38 {
39   const word *s_align;
40   word t, current, found, mask, offset;
41 
42   if (unlikely (n == 0))
43     return 0;
44 
45   current = ldq_u (s);
46 
47   /* Replicate low byte of XC into all bytes of C.  */
48   t = xc & 0xff;			/* 0000000c */
49   t = (t << 8) | t;			/* 000000cc */
50   t = (t << 16) | t;			/* 0000cccc */
51   const word c = (t << 32) | t;		/* cccccccc */
52 
53   /* Align the source, and decrement the count by the number
54      of bytes searched in the first word.  */
55   s_align = (const word *)((word)s & -8);
56   {
57     size_t inc = n + ((word)s & 7);
58     n = inc | -(inc < n);
59   }
60 
61   /* Deal with misalignment in the first word for the comparison.  */
62   mask = (1ul << ((word)s & 7)) - 1;
63 
64   /* If the entire string fits within one word, we may need masking
65      at both the front and the back of the string.  */
66   if (unlikely (n <= 8))
67     {
68       mask |= -1ul << n;
69       goto last_quad;
70     }
71 
72   found = find (current, c) & ~mask;
73   if (unlikely (found))
74     goto found_it;
75 
76   s_align++;
77   n -= 8;
78 
79   /* If the block is sufficiently large, align to cacheline and prefetch.  */
80   if (unlikely (n >= 256))
81     {
82       /* Prefetch 3 cache lines beyond the one we're working on.  */
83       prefetch (s_align + 8);
84       prefetch (s_align + 16);
85       prefetch (s_align + 24);
86 
87       while ((word)s_align & 63)
88 	{
89 	  current = *s_align;
90 	  found = find (current, c);
91 	  if (found)
92 	    goto found_it;
93 	  s_align++;
94 	  n -= 8;
95 	}
96 
97 	/* Within each cacheline, advance the load for the next word
98 	   before the test for the previous word is complete.  This
99 	   allows us to hide the 3 cycle L1 cache load latency.  We
100 	   only perform this advance load within a cacheline to prevent
101 	   reading across page boundary.  */
102 #define CACHELINE_LOOP				\
103 	do {					\
104 	  word i, next = s_align[0];		\
105 	  for (i = 0; i < 7; ++i)		\
106 	    {					\
107 	      current = next;			\
108 	      next = s_align[1];		\
109 	      found = find (current, c);	\
110 	      if (unlikely (found))		\
111 		goto found_it;			\
112 	      s_align++;			\
113 	    }					\
114 	  current = next;			\
115 	  found = find (current, c);		\
116 	  if (unlikely (found))			\
117 	    goto found_it;			\
118 	  s_align++;				\
119 	  n -= 64;				\
120 	} while (0)
121 
122       /* While there's still lots more data to potentially be read,
123 	 continue issuing prefetches for the 4th cacheline out.  */
124       while (n >= 256)
125 	{
126 	  prefetch (s_align + 24);
127 	  CACHELINE_LOOP;
128 	}
129 
130       /* Up to 3 cache lines remaining.  Continue issuing advanced
131 	 loads, but stop prefetching.  */
132       while (n >= 64)
133 	CACHELINE_LOOP;
134 
135       /* We may have exhausted the buffer.  */
136       if (n == 0)
137 	return NULL;
138     }
139 
140   /* Quadword aligned loop.  */
141   current = *s_align;
142   while (n > 8)
143     {
144       found = find (current, c);
145       if (unlikely (found))
146 	goto found_it;
147       current = *++s_align;
148       n -= 8;
149     }
150 
151   /* The last word may need masking at the tail of the compare.  */
152   mask = -1ul << n;
153  last_quad:
154   found = find (current, c) & ~mask;
155   if (found == 0)
156     return NULL;
157 
158  found_it:
159 #ifdef __alpha_cix__
160   offset = __builtin_alpha_cttz (found);
161 #else
162   /* Extract LSB.  */
163   found &= -found;
164 
165   /* Binary search for the LSB.  */
166   offset  = (found & 0x0f ? 0 : 4);
167   offset += (found & 0x33 ? 0 : 2);
168   offset += (found & 0x55 ? 0 : 1);
169 #endif
170 
171   return (void *)((word)s_align + offset);
172 }
173 
174 #ifdef weak_alias
175 weak_alias (__memchr, memchr)
176 #endif
177 libc_hidden_builtin_def (memchr)
178