1 /* SPDX-License-Identifier: LicenseRef-lookup3-public-domain */
2 /* Slightly modified by Lennart Poettering, to avoid name clashes, and
3  * unexport a few functions. */
4 
5 #include "lookup3.h"
6 
7 /*
8 -------------------------------------------------------------------------------
9 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
10 
11 These are functions for producing 32-bit hashes for hash table lookup.
12 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
13 are externally useful functions.  Routines to test the hash are included
14 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
15 the public domain.  It has no warranty.
16 
17 You probably want to use hashlittle().  hashlittle() and hashbig()
18 hash byte arrays.  hashlittle() is faster than hashbig() on
19 little-endian machines.  Intel and AMD are little-endian machines.
20 On second thought, you probably want hashlittle2(), which is identical to
21 hashlittle() except it returns two 32-bit hashes for the price of one.
22 You could implement hashbig2() if you wanted but I haven't bothered here.
23 
24 If you want to find a hash of, say, exactly 7 integers, do
25   a = i1;  b = i2;  c = i3;
26   mix(a,b,c);
27   a += i4; b += i5; c += i6;
28   mix(a,b,c);
29   a += i7;
30   final(a,b,c);
31 then use c as the hash value.  If you have a variable length array of
32 4-byte integers to hash, use hashword().  If you have a byte array (like
33 a character string), use hashlittle().  If you have several byte arrays, or
34 a mix of things, see the comments above hashlittle().
35 
36 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
37 then mix those integers.  This is fast (you can do a lot more thorough
38 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
39 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
40 -------------------------------------------------------------------------------
41 */
42 /* #define SELF_TEST 1 */
43 
44 #include <stdint.h>     /* defines uint32_t etc */
45 #include <stdio.h>      /* defines printf for tests */
46 #include <sys/param.h>  /* attempt to define endianness */
47 #include <time.h>       /* defines time_t for timings in the test */
48 #ifdef linux
49 # include <endian.h>    /* attempt to define endianness */
50 #endif
51 
52 #if __GNUC__ >= 7
53 _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
54 #endif
55 
56 /*
57  * My best guess at if you are big-endian or little-endian.  This may
58  * need adjustment.
59  */
60 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
61      __BYTE_ORDER == __LITTLE_ENDIAN) || \
62     (defined(i386) || defined(__i386__) || defined(__i486__) || \
63      defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
64 # define HASH_LITTLE_ENDIAN 1
65 # define HASH_BIG_ENDIAN 0
66 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
67        __BYTE_ORDER == __BIG_ENDIAN) || \
68       (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
69 # define HASH_LITTLE_ENDIAN 0
70 # define HASH_BIG_ENDIAN 1
71 #else
72 # define HASH_LITTLE_ENDIAN 0
73 # define HASH_BIG_ENDIAN 0
74 #endif
75 
76 #define hashsize(n) ((uint32_t)1<<(n))
77 #define hashmask(n) (hashsize(n)-1)
78 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
79 
80 /*
81 -------------------------------------------------------------------------------
82 mix -- mix 3 32-bit values reversibly.
83 
84 This is reversible, so any information in (a,b,c) before mix() is
85 still in (a,b,c) after mix().
86 
87 If four pairs of (a,b,c) inputs are run through mix(), or through
88 mix() in reverse, there are at least 32 bits of the output that
89 are sometimes the same for one pair and different for another pair.
90 This was tested for:
91 * pairs that differed by one bit, by two bits, in any combination
92   of top bits of (a,b,c), or in any combination of bottom bits of
93   (a,b,c).
94 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
95   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
96   is commonly produced by subtraction) look like a single 1-bit
97   difference.
98 * the base values were pseudorandom, all zero but one bit set, or
99   all zero plus a counter that starts at zero.
100 
101 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
102 satisfy this are
103     4  6  8 16 19  4
104     9 15  3 18 27 15
105    14  9  3  7 17  3
106 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
107 for "differ" defined as + with a one-bit base and a two-bit delta.  I
108 used http://burtleburtle.net/bob/hash/avalanche.html to choose
109 the operations, constants, and arrangements of the variables.
110 
111 This does not achieve avalanche.  There are input bits of (a,b,c)
112 that fail to affect some output bits of (a,b,c), especially of a.  The
113 most thoroughly mixed value is c, but it doesn't really even achieve
114 avalanche in c.
115 
116 This allows some parallelism.  Read-after-writes are good at doubling
117 the number of bits affected, so the goal of mixing pulls in the opposite
118 direction as the goal of parallelism.  I did what I could.  Rotates
119 seem to cost as much as shifts on every machine I could lay my hands
120 on, and rotates are much kinder to the top and bottom bits, so I used
121 rotates.
122 -------------------------------------------------------------------------------
123 */
124 #define mix(a,b,c) \
125 { \
126   a -= c;  a ^= rot(c, 4);  c += b; \
127   b -= a;  b ^= rot(a, 6);  a += c; \
128   c -= b;  c ^= rot(b, 8);  b += a; \
129   a -= c;  a ^= rot(c,16);  c += b; \
130   b -= a;  b ^= rot(a,19);  a += c; \
131   c -= b;  c ^= rot(b, 4);  b += a; \
132 }
133 
134 /*
135 -------------------------------------------------------------------------------
136 final -- final mixing of 3 32-bit values (a,b,c) into c
137 
138 Pairs of (a,b,c) values differing in only a few bits will usually
139 produce values of c that look totally different.  This was tested for
140 * pairs that differed by one bit, by two bits, in any combination
141   of top bits of (a,b,c), or in any combination of bottom bits of
142   (a,b,c).
143 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
144   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
145   is commonly produced by subtraction) look like a single 1-bit
146   difference.
147 * the base values were pseudorandom, all zero but one bit set, or
148   all zero plus a counter that starts at zero.
149 
150 These constants passed:
151  14 11 25 16 4 14 24
152  12 14 25 16 4 14 24
153 and these came close:
154   4  8 15 26 3 22 24
155  10  8 15 26 3 22 24
156  11  8 15 26 3 22 24
157 -------------------------------------------------------------------------------
158 */
159 #define final(a,b,c) \
160 { \
161   c ^= b; c -= rot(b,14); \
162   a ^= c; a -= rot(c,11); \
163   b ^= a; b -= rot(a,25); \
164   c ^= b; c -= rot(b,16); \
165   a ^= c; a -= rot(c,4);  \
166   b ^= a; b -= rot(a,14); \
167   c ^= b; c -= rot(b,24); \
168 }
169 
170 /*
171 --------------------------------------------------------------------
172  This works on all machines.  To be useful, it requires
173  -- that the key be an array of uint32_t's, and
174  -- that the length be the number of uint32_t's in the key
175 
176  The function hashword() is identical to hashlittle() on little-endian
177  machines, and identical to hashbig() on big-endian machines,
178  except that the length has to be measured in uint32_ts rather than in
179  bytes.  hashlittle() is more complicated than hashword() only because
180  hashlittle() has to dance around fitting the key bytes into registers.
181 --------------------------------------------------------------------
182 */
jenkins_hashword(const uint32_t * k,size_t length,uint32_t initval)183 uint32_t jenkins_hashword(
184 const uint32_t *k,                   /* the key, an array of uint32_t values */
185 size_t          length,               /* the length of the key, in uint32_ts */
186 uint32_t        initval)         /* the previous hash, or an arbitrary value */
187 {
188   uint32_t a,b,c;
189 
190   /* Set up the internal state */
191   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
192 
193   /*------------------------------------------------- handle most of the key */
194   while (length > 3)
195   {
196     a += k[0];
197     b += k[1];
198     c += k[2];
199     mix(a,b,c);
200     length -= 3;
201     k += 3;
202   }
203 
204   /*------------------------------------------- handle the last 3 uint32_t's */
205   switch(length)                     /* all the case statements fall through */
206   {
207   case 3 : c+=k[2];
208   case 2 : b+=k[1];
209   case 1 : a+=k[0];
210     final(a,b,c);
211   case 0:     /* case 0: nothing left to add */
212     break;
213   }
214   /*------------------------------------------------------ report the result */
215   return c;
216 }
217 
218 /*
219 --------------------------------------------------------------------
220 hashword2() -- same as hashword(), but take two seeds and return two
221 32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
222 both be initialized with seeds.  If you pass in (*pb)==0, the output
223 (*pc) will be the same as the return value from hashword().
224 --------------------------------------------------------------------
225 */
jenkins_hashword2(const uint32_t * k,size_t length,uint32_t * pc,uint32_t * pb)226 void jenkins_hashword2 (
227 const uint32_t *k,                   /* the key, an array of uint32_t values */
228 size_t          length,               /* the length of the key, in uint32_ts */
229 uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
230 uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
231 {
232   uint32_t a,b,c;
233 
234   /* Set up the internal state */
235   a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
236   c += *pb;
237 
238   /*------------------------------------------------- handle most of the key */
239   while (length > 3)
240   {
241     a += k[0];
242     b += k[1];
243     c += k[2];
244     mix(a,b,c);
245     length -= 3;
246     k += 3;
247   }
248 
249   /*------------------------------------------- handle the last 3 uint32_t's */
250   switch(length)                     /* all the case statements fall through */
251   {
252   case 3 : c+=k[2];
253   case 2 : b+=k[1];
254   case 1 : a+=k[0];
255     final(a,b,c);
256   case 0:     /* case 0: nothing left to add */
257     break;
258   }
259   /*------------------------------------------------------ report the result */
260   *pc=c; *pb=b;
261 }
262 
263 /*
264 -------------------------------------------------------------------------------
265 hashlittle() -- hash a variable-length key into a 32-bit value
266   k       : the key (the unaligned variable-length array of bytes)
267   length  : the length of the key, counting by bytes
268   initval : can be any 4-byte value
269 Returns a 32-bit value.  Every bit of the key affects every bit of
270 the return value.  Two keys differing by one or two bits will have
271 totally different hash values.
272 
273 The best hash table sizes are powers of 2.  There is no need to do
274 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
275 use a bitmask.  For example, if you need only 10 bits, do
276   h = (h & hashmask(10));
277 In which case, the hash table should have hashsize(10) elements.
278 
279 If you are hashing n strings (uint8_t **)k, do it like this:
280   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
281 
282 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
283 code any way you wish, private, educational, or commercial.  It's free.
284 
285 Use for hash table lookup, or anything where one collision in 2^^32 is
286 acceptable.  Do NOT use for cryptographic purposes.
287 -------------------------------------------------------------------------------
288 */
289 
jenkins_hashlittle(const void * key,size_t length,uint32_t initval)290 uint32_t jenkins_hashlittle( const void *key, size_t length, uint32_t initval)
291 {
292   uint32_t a,b,c;                                          /* internal state */
293   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
294 
295   /* Set up the internal state */
296   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
297 
298   u.ptr = key;
299   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
300     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
301 
302     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
303     while (length > 12)
304     {
305       a += k[0];
306       b += k[1];
307       c += k[2];
308       mix(a,b,c);
309       length -= 12;
310       k += 3;
311     }
312 
313     /*----------------------------- handle the last (probably partial) block */
314     /*
315      * "k[2]&0xffffff" actually reads beyond the end of the string, but
316      * then masks off the part it's not allowed to read.  Because the
317      * string is aligned, the masked-off tail is in the same word as the
318      * rest of the string.  Every machine with memory protection I've seen
319      * does it on word boundaries, so is OK with this.  But valgrind will
320      * still catch it and complain.  The masking trick does make the hash
321      * noticeably faster for short strings (like English words).
322      */
323 #if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER && !HAS_FEATURE_MEMORY_SANITIZER
324 
325     switch(length)
326     {
327     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
328     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
329     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
330     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
331     case 8 : b+=k[1]; a+=k[0]; break;
332     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
333     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
334     case 5 : b+=k[1]&0xff; a+=k[0]; break;
335     case 4 : a+=k[0]; break;
336     case 3 : a+=k[0]&0xffffff; break;
337     case 2 : a+=k[0]&0xffff; break;
338     case 1 : a+=k[0]&0xff; break;
339     case 0 : return c;              /* zero length strings require no mixing */
340     }
341 
342 #else /* make valgrind happy */
343     {
344       const uint8_t *k8 = (const uint8_t *) k;
345 
346       switch(length)
347       {
348       case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
349       case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
350       case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
351       case 9 : c+=k8[8];                   /* fall through */
352       case 8 : b+=k[1]; a+=k[0]; break;
353       case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
354       case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
355       case 5 : b+=k8[4];                   /* fall through */
356       case 4 : a+=k[0]; break;
357       case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
358       case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
359       case 1 : a+=k8[0]; break;
360       case 0 : return c;
361       }
362     }
363 
364 #endif /* !valgrind */
365 
366   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
367     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
368     const uint8_t  *k8;
369 
370     /*--------------- all but last block: aligned reads and different mixing */
371     while (length > 12)
372     {
373       a += k[0] + (((uint32_t)k[1])<<16);
374       b += k[2] + (((uint32_t)k[3])<<16);
375       c += k[4] + (((uint32_t)k[5])<<16);
376       mix(a,b,c);
377       length -= 12;
378       k += 6;
379     }
380 
381     /*----------------------------- handle the last (probably partial) block */
382     k8 = (const uint8_t *)k;
383     switch(length)
384     {
385     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
386              b+=k[2]+(((uint32_t)k[3])<<16);
387              a+=k[0]+(((uint32_t)k[1])<<16);
388              break;
389     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
390     case 10: c+=k[4];
391              b+=k[2]+(((uint32_t)k[3])<<16);
392              a+=k[0]+(((uint32_t)k[1])<<16);
393              break;
394     case 9 : c+=k8[8];                      /* fall through */
395     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
396              a+=k[0]+(((uint32_t)k[1])<<16);
397              break;
398     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
399     case 6 : b+=k[2];
400              a+=k[0]+(((uint32_t)k[1])<<16);
401              break;
402     case 5 : b+=k8[4];                      /* fall through */
403     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
404              break;
405     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
406     case 2 : a+=k[0];
407              break;
408     case 1 : a+=k8[0];
409              break;
410     case 0 : return c;                     /* zero length requires no mixing */
411     }
412 
413   } else {                        /* need to read the key one byte at a time */
414     const uint8_t *k = (const uint8_t *)key;
415 
416     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
417     while (length > 12)
418     {
419       a += k[0];
420       a += ((uint32_t)k[1])<<8;
421       a += ((uint32_t)k[2])<<16;
422       a += ((uint32_t)k[3])<<24;
423       b += k[4];
424       b += ((uint32_t)k[5])<<8;
425       b += ((uint32_t)k[6])<<16;
426       b += ((uint32_t)k[7])<<24;
427       c += k[8];
428       c += ((uint32_t)k[9])<<8;
429       c += ((uint32_t)k[10])<<16;
430       c += ((uint32_t)k[11])<<24;
431       mix(a,b,c);
432       length -= 12;
433       k += 12;
434     }
435 
436     /*-------------------------------- last block: affect all 32 bits of (c) */
437     switch(length)                   /* all the case statements fall through */
438     {
439     case 12: c+=((uint32_t)k[11])<<24;
440     case 11: c+=((uint32_t)k[10])<<16;
441     case 10: c+=((uint32_t)k[9])<<8;
442     case 9 : c+=k[8];
443     case 8 : b+=((uint32_t)k[7])<<24;
444     case 7 : b+=((uint32_t)k[6])<<16;
445     case 6 : b+=((uint32_t)k[5])<<8;
446     case 5 : b+=k[4];
447     case 4 : a+=((uint32_t)k[3])<<24;
448     case 3 : a+=((uint32_t)k[2])<<16;
449     case 2 : a+=((uint32_t)k[1])<<8;
450     case 1 : a+=k[0];
451              break;
452     case 0 : return c;
453     }
454   }
455 
456   final(a,b,c);
457   return c;
458 }
459 
460 /*
461  * hashlittle2: return 2 32-bit hash values
462  *
463  * This is identical to hashlittle(), except it returns two 32-bit hash
464  * values instead of just one.  This is good enough for hash table
465  * lookup with 2^^64 buckets, or if you want a second hash if you're not
466  * happy with the first, or if you want a probably-unique 64-bit ID for
467  * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
468  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
469  */
jenkins_hashlittle2(const void * key,size_t length,uint32_t * pc,uint32_t * pb)470 void jenkins_hashlittle2(
471   const void *key,       /* the key to hash */
472   size_t      length,    /* length of the key */
473   uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
474   uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
475 {
476   uint32_t a,b,c;                                          /* internal state */
477   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
478 
479   /* Set up the internal state */
480   a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
481   c += *pb;
482 
483   u.ptr = key;
484   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
485     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
486 
487     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
488     while (length > 12)
489     {
490       a += k[0];
491       b += k[1];
492       c += k[2];
493       mix(a,b,c);
494       length -= 12;
495       k += 3;
496     }
497 
498     /*----------------------------- handle the last (probably partial) block */
499     /*
500      * "k[2]&0xffffff" actually reads beyond the end of the string, but
501      * then masks off the part it's not allowed to read.  Because the
502      * string is aligned, the masked-off tail is in the same word as the
503      * rest of the string.  Every machine with memory protection I've seen
504      * does it on word boundaries, so is OK with this.  But valgrind will
505      * still catch it and complain.  The masking trick does make the hash
506      * noticeably faster for short strings (like English words).
507      */
508 #if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER && !HAS_FEATURE_MEMORY_SANITIZER
509 
510     switch(length)
511     {
512     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
513     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
514     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
515     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
516     case 8 : b+=k[1]; a+=k[0]; break;
517     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
518     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
519     case 5 : b+=k[1]&0xff; a+=k[0]; break;
520     case 4 : a+=k[0]; break;
521     case 3 : a+=k[0]&0xffffff; break;
522     case 2 : a+=k[0]&0xffff; break;
523     case 1 : a+=k[0]&0xff; break;
524     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
525     }
526 
527 #else /* make valgrind happy */
528 
529     {
530       const uint8_t *k8 = (const uint8_t *)k;
531       switch(length)
532       {
533       case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
534       case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
535       case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
536       case 9 : c+=k8[8];                   /* fall through */
537       case 8 : b+=k[1]; a+=k[0]; break;
538       case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
539       case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
540       case 5 : b+=k8[4];                   /* fall through */
541       case 4 : a+=k[0]; break;
542       case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
543       case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
544       case 1 : a+=k8[0]; break;
545       case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
546       }
547     }
548 
549 #endif /* !valgrind */
550 
551   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
552     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
553     const uint8_t  *k8;
554 
555     /*--------------- all but last block: aligned reads and different mixing */
556     while (length > 12)
557     {
558       a += k[0] + (((uint32_t)k[1])<<16);
559       b += k[2] + (((uint32_t)k[3])<<16);
560       c += k[4] + (((uint32_t)k[5])<<16);
561       mix(a,b,c);
562       length -= 12;
563       k += 6;
564     }
565 
566     /*----------------------------- handle the last (probably partial) block */
567     k8 = (const uint8_t *)k;
568     switch(length)
569     {
570     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
571              b+=k[2]+(((uint32_t)k[3])<<16);
572              a+=k[0]+(((uint32_t)k[1])<<16);
573              break;
574     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
575     case 10: c+=k[4];
576              b+=k[2]+(((uint32_t)k[3])<<16);
577              a+=k[0]+(((uint32_t)k[1])<<16);
578              break;
579     case 9 : c+=k8[8];                      /* fall through */
580     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
581              a+=k[0]+(((uint32_t)k[1])<<16);
582              break;
583     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
584     case 6 : b+=k[2];
585              a+=k[0]+(((uint32_t)k[1])<<16);
586              break;
587     case 5 : b+=k8[4];                      /* fall through */
588     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
589              break;
590     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
591     case 2 : a+=k[0];
592              break;
593     case 1 : a+=k8[0];
594              break;
595     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
596     }
597 
598   } else {                        /* need to read the key one byte at a time */
599     const uint8_t *k = (const uint8_t *)key;
600 
601     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
602     while (length > 12)
603     {
604       a += k[0];
605       a += ((uint32_t)k[1])<<8;
606       a += ((uint32_t)k[2])<<16;
607       a += ((uint32_t)k[3])<<24;
608       b += k[4];
609       b += ((uint32_t)k[5])<<8;
610       b += ((uint32_t)k[6])<<16;
611       b += ((uint32_t)k[7])<<24;
612       c += k[8];
613       c += ((uint32_t)k[9])<<8;
614       c += ((uint32_t)k[10])<<16;
615       c += ((uint32_t)k[11])<<24;
616       mix(a,b,c);
617       length -= 12;
618       k += 12;
619     }
620 
621     /*-------------------------------- last block: affect all 32 bits of (c) */
622     switch(length)                   /* all the case statements fall through */
623     {
624     case 12: c+=((uint32_t)k[11])<<24;
625     case 11: c+=((uint32_t)k[10])<<16;
626     case 10: c+=((uint32_t)k[9])<<8;
627     case 9 : c+=k[8];
628     case 8 : b+=((uint32_t)k[7])<<24;
629     case 7 : b+=((uint32_t)k[6])<<16;
630     case 6 : b+=((uint32_t)k[5])<<8;
631     case 5 : b+=k[4];
632     case 4 : a+=((uint32_t)k[3])<<24;
633     case 3 : a+=((uint32_t)k[2])<<16;
634     case 2 : a+=((uint32_t)k[1])<<8;
635     case 1 : a+=k[0];
636              break;
637     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
638     }
639   }
640 
641   final(a,b,c);
642   *pc=c; *pb=b;
643 }
644 
645 /*
646  * hashbig():
647  * This is the same as hashword() on big-endian machines.  It is different
648  * from hashlittle() on all machines.  hashbig() takes advantage of
649  * big-endian byte ordering.
650  */
jenkins_hashbig(const void * key,size_t length,uint32_t initval)651 uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval)
652 {
653   uint32_t a,b,c;
654   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
655 
656   /* Set up the internal state */
657   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
658 
659   u.ptr = key;
660   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
661     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
662 
663     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
664     while (length > 12)
665     {
666       a += k[0];
667       b += k[1];
668       c += k[2];
669       mix(a,b,c);
670       length -= 12;
671       k += 3;
672     }
673 
674     /*----------------------------- handle the last (probably partial) block */
675     /*
676      * "k[2]<<8" actually reads beyond the end of the string, but
677      * then shifts out the part it's not allowed to read.  Because the
678      * string is aligned, the illegal read is in the same word as the
679      * rest of the string.  Every machine with memory protection I've seen
680      * does it on word boundaries, so is OK with this.  But valgrind will
681      * still catch it and complain.  The masking trick does make the hash
682      * noticeably faster for short strings (like English words).
683      */
684 #if !VALGRIND && !HAS_FEATURE_ADDRESS_SANITIZER && !HAS_FEATURE_MEMORY_SANITIZER
685 
686     switch(length)
687     {
688     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
689     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
690     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
691     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
692     case 8 : b+=k[1]; a+=k[0]; break;
693     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
694     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
695     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
696     case 4 : a+=k[0]; break;
697     case 3 : a+=k[0]&0xffffff00; break;
698     case 2 : a+=k[0]&0xffff0000; break;
699     case 1 : a+=k[0]&0xff000000; break;
700     case 0 : return c;              /* zero length strings require no mixing */
701     }
702 
703 #else  /* make valgrind happy */
704 
705     {
706       const uint8_t *k8 = (const uint8_t *)k;
707       switch(length)                   /* all the case statements fall through */
708       {
709       case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
710       case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
711       case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
712       case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
713       case 8 : b+=k[1]; a+=k[0]; break;
714       case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
715       case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
716       case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
717       case 4 : a+=k[0]; break;
718       case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
719       case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
720       case 1 : a+=((uint32_t)k8[0])<<24; break;
721       case 0 : return c;
722       }
723     }
724 
725 #endif /* !VALGRIND */
726 
727   } else {                        /* need to read the key one byte at a time */
728     const uint8_t *k = (const uint8_t *)key;
729 
730     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
731     while (length > 12)
732     {
733       a += ((uint32_t)k[0])<<24;
734       a += ((uint32_t)k[1])<<16;
735       a += ((uint32_t)k[2])<<8;
736       a += ((uint32_t)k[3]);
737       b += ((uint32_t)k[4])<<24;
738       b += ((uint32_t)k[5])<<16;
739       b += ((uint32_t)k[6])<<8;
740       b += ((uint32_t)k[7]);
741       c += ((uint32_t)k[8])<<24;
742       c += ((uint32_t)k[9])<<16;
743       c += ((uint32_t)k[10])<<8;
744       c += ((uint32_t)k[11]);
745       mix(a,b,c);
746       length -= 12;
747       k += 12;
748     }
749 
750     /*-------------------------------- last block: affect all 32 bits of (c) */
751     switch(length)                   /* all the case statements fall through */
752     {
753     case 12: c+=k[11];
754     case 11: c+=((uint32_t)k[10])<<8;
755     case 10: c+=((uint32_t)k[9])<<16;
756     case 9 : c+=((uint32_t)k[8])<<24;
757     case 8 : b+=k[7];
758     case 7 : b+=((uint32_t)k[6])<<8;
759     case 6 : b+=((uint32_t)k[5])<<16;
760     case 5 : b+=((uint32_t)k[4])<<24;
761     case 4 : a+=k[3];
762     case 3 : a+=((uint32_t)k[2])<<8;
763     case 2 : a+=((uint32_t)k[1])<<16;
764     case 1 : a+=((uint32_t)k[0])<<24;
765              break;
766     case 0 : return c;
767     }
768   }
769 
770   final(a,b,c);
771   return c;
772 }
773 
774 #ifdef SELF_TEST
775 
776 /* used for timings */
driver1()777 void driver1()
778 {
779   uint8_t buf[256];
780   uint32_t i;
781   uint32_t h=0;
782   time_t a,z;
783 
784   time(&a);
785   for (i=0; i<256; ++i) buf[i] = 'x';
786   for (i=0; i<1; ++i)
787   {
788     h = hashlittle(&buf[0],1,h);
789   }
790   time(&z);
791   if (z-a > 0) printf("time %d %.8x\n", z-a, h);
792 }
793 
794 /* check that every input bit changes every output bit half the time */
795 #define HASHSTATE 1
796 #define HASHLEN   1
797 #define MAXPAIR 60
798 #define MAXLEN  70
driver2()799 void driver2()
800 {
801   uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
802   uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
803   uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
804   uint32_t x[HASHSTATE],y[HASHSTATE];
805   uint32_t hlen;
806 
807   printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
808   for (hlen=0; hlen < MAXLEN; ++hlen)
809   {
810     z=0;
811     for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
812     {
813       for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
814       {
815         for (m=1; m<8; ++m) /*------------- for several possible initvals, */
816         {
817           for (l=0; l<HASHSTATE; ++l)
818             e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
819 
820           /*---- check that every output bit is affected by that input bit */
821           for (k=0; k<MAXPAIR; k+=2)
822           {
823             uint32_t finished=1;
824             /* keys have one bit different */
825             for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
826             /* have a and b be two keys differing in only one bit */
827             a[i] ^= (k<<j);
828             a[i] ^= (k>>(8-j));
829              c[0] = hashlittle(a, hlen, m);
830             b[i] ^= ((k+1)<<j);
831             b[i] ^= ((k+1)>>(8-j));
832              d[0] = hashlittle(b, hlen, m);
833             /* check every bit is 1, 0, set, and not set at least once */
834             for (l=0; l<HASHSTATE; ++l)
835             {
836               e[l] &= (c[l]^d[l]);
837               f[l] &= ~(c[l]^d[l]);
838               g[l] &= c[l];
839               h[l] &= ~c[l];
840               x[l] &= d[l];
841               y[l] &= ~d[l];
842               if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
843             }
844             if (finished) break;
845           }
846           if (k>z) z=k;
847           if (k==MAXPAIR)
848           {
849              printf("Some bit didn't change: ");
850              printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
851                     e[0],f[0],g[0],h[0],x[0],y[0]);
852              printf("i %d j %d m %d len %d\n", i, j, m, hlen);
853           }
854           if (z==MAXPAIR) goto done;
855         }
856       }
857     }
858    done:
859     if (z < MAXPAIR)
860     {
861       printf("Mix success  %2d bytes  %2d initvals  ",i,m);
862       printf("required  %d  trials\n", z/2);
863     }
864   }
865   printf("\n");
866 }
867 
868 /* Check for reading beyond the end of the buffer and alignment problems */
driver3()869 void driver3()
870 {
871   uint8_t buf[MAXLEN+20], *b;
872   uint32_t len;
873   uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
874   uint32_t h;
875   uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
876   uint32_t i;
877   uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
878   uint32_t j;
879   uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
880   uint32_t ref,x,y;
881   uint8_t *p;
882 
883   printf("Endianness.  These lines should all be the same (for values filled in):\n");
884   printf("%.8x                            %.8x                            %.8x\n",
885          hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
886          hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
887          hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
888   p = q;
889   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
890          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
891          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
892          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
893          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
894          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
895          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
896   p = &qq[1];
897   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
898          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
899          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
900          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
901          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
902          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
903          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
904   p = &qqq[2];
905   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
906          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
907          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
908          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
909          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
910          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
911          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
912   p = &qqqq[3];
913   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
914          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
915          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
916          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
917          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
918          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
919          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
920   printf("\n");
921 
922   /* check that hashlittle2 and hashlittle produce the same results */
923   i=47; j=0;
924   hashlittle2(q, sizeof(q), &i, &j);
925   if (hashlittle(q, sizeof(q), 47) != i)
926     printf("hashlittle2 and hashlittle mismatch\n");
927 
928   /* check that hashword2 and hashword produce the same results */
929   len = 0xdeadbeef;
930   i=47, j=0;
931   hashword2(&len, 1, &i, &j);
932   if (hashword(&len, 1, 47) != i)
933     printf("hashword2 and hashword mismatch %x %x\n",
934            i, hashword(&len, 1, 47));
935 
936   /* check hashlittle doesn't read before or after the ends of the string */
937   for (h=0, b=buf+1; h<8; ++h, ++b)
938   {
939     for (i=0; i<MAXLEN; ++i)
940     {
941       len = i;
942       for (j=0; j<i; ++j) *(b+j)=0;
943 
944       /* these should all be equal */
945       ref = hashlittle(b, len, (uint32_t)1);
946       *(b+i)=(uint8_t)~0;
947       *(b-1)=(uint8_t)~0;
948       x = hashlittle(b, len, (uint32_t)1);
949       y = hashlittle(b, len, (uint32_t)1);
950       if ((ref != x) || (ref != y))
951       {
952         printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
953                h, i);
954       }
955     }
956   }
957 }
958 
959 /* check for problems with nulls */
driver4()960  void driver4()
961 {
962   uint8_t buf[1];
963   uint32_t h,i,state[HASHSTATE];
964 
965   buf[0] = ~0;
966   for (i=0; i<HASHSTATE; ++i) state[i] = 1;
967   printf("These should all be different\n");
968   for (i=0, h=0; i<8; ++i)
969   {
970     h = hashlittle(buf, 0, h);
971     printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
972   }
973 }
974 
driver5()975 void driver5()
976 {
977   uint32_t b,c;
978   b=0, c=0, hashlittle2("", 0, &c, &b);
979   printf("hash is %.8lx %.8lx\n", c, b);   /* deadbeef deadbeef */
980   b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
981   printf("hash is %.8lx %.8lx\n", c, b);   /* bd5b7dde deadbeef */
982   b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
983   printf("hash is %.8lx %.8lx\n", c, b);   /* 9c093ccd bd5b7dde */
984   b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
985   printf("hash is %.8lx %.8lx\n", c, b);   /* 17770551 ce7226e6 */
986   b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
987   printf("hash is %.8lx %.8lx\n", c, b);   /* e3607cae bd371de4 */
988   b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
989   printf("hash is %.8lx %.8lx\n", c, b);   /* cd628161 6cbea4b3 */
990   c = hashlittle("Four score and seven years ago", 30, 0);
991   printf("hash is %.8lx\n", c);   /* 17770551 */
992   c = hashlittle("Four score and seven years ago", 30, 1);
993   printf("hash is %.8lx\n", c);   /* cd628161 */
994 }
995 
main()996 int main()
997 {
998   driver1();   /* test that the key is hashed: used for timings */
999   driver2();   /* test that whole key is hashed thoroughly */
1000   driver3();   /* test that nothing but the key is hashed */
1001   driver4();   /* test hashing multiple buffers (all buffers are null) */
1002   driver5();   /* test the hash against known vectors */
1003   return 1;
1004 }
1005 
1006 #endif  /* SELF_TEST */
1007