datasketches-cpp
MurmurHash3.h
1 // Minimally modified from Austin Applebee's code:
2 // * Removed MurmurHash3_x86_32 and MurmurHash3_x86_128
3 // * Changed input seed in MurmurHash3_x64_128 to uint64_t
4 // * Define and use HashState reference to return result
5 // * Made entire hash function defined inline
6 // * Added compute_seed_hash
7 //-----------------------------------------------------------------------------
8 // MurmurHash3 was written by Austin Appleby, and is placed in the public
9 // domain. The author hereby disclaims copyright to this source code.
10 
11 // Note - The x86 and x64 versions do _not_ produce the same results, as the
12 // algorithms are optimized for their respective platforms. You can still
13 // compile and run any of them on any platform, but your performance with the
14 // non-native version will be less than optimal.
15 
16 #ifndef _MURMURHASH3_H_
17 #define _MURMURHASH3_H_
18 
19 #include <cstring>
20 
21 //-----------------------------------------------------------------------------
22 // Platform-specific functions and macros
23 
24 // Microsoft Visual Studio
25 
26 #if defined(_MSC_VER)
27 
28 typedef unsigned char uint8_t;
29 typedef unsigned int uint32_t;
30 typedef unsigned __int64 uint64_t;
31 
32 #define MURMUR3_FORCE_INLINE __forceinline
33 
34 #include <stdlib.h>
35 
36 #define MURMUR3_ROTL64(x,y) _rotl64(x,y)
37 
38 #define MURMUR3_BIG_CONSTANT(x) (x)
39 
40 // Other compilers
41 
42 #else // defined(_MSC_VER)
43 
44 #include <stdint.h>
45 
46 #define MURMUR3_FORCE_INLINE inline __attribute__((always_inline))
47 
48 inline uint64_t rotl64 ( uint64_t x, int8_t r )
49 {
50  return (x << r) | (x >> (64 - r));
51 }
52 
53 #define MURMUR3_ROTL64(x,y) rotl64(x,y)
54 
55 #define MURMUR3_BIG_CONSTANT(x) (x##LLU)
56 
57 #endif // !defined(_MSC_VER)
58 
59 //-----------------------------------------------------------------------------
60 
61 //-----------------------------------------------------------------------------
62 // Return type - Using C++ reference for return type which should allow better
63 // compiler optimization than a void* pointer
64 typedef struct {
65  uint64_t h1;
66  uint64_t h2;
67 } HashState;
68 
69 
70 //-----------------------------------------------------------------------------
71 // Block read - if your platform needs to do endian-swapping or can only
72 // handle aligned reads, do the conversion here
73 
74 MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint8_t * p, size_t i )
75 {
76  uint64_t res;
77  memcpy(&res, p + i * sizeof(uint64_t), sizeof(res));
78  return res;
79 }
80 
81 //-----------------------------------------------------------------------------
82 // Finalization mix - force all bits of a hash block to avalanche
83 
84 MURMUR3_FORCE_INLINE uint64_t fmix64 ( uint64_t k )
85 {
86  k ^= k >> 33;
87  k *= MURMUR3_BIG_CONSTANT(0xff51afd7ed558ccd);
88  k ^= k >> 33;
89  k *= MURMUR3_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
90  k ^= k >> 33;
91 
92  return k;
93 }
94 
95 MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
96  uint64_t seed, HashState& out) {
97  static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5);
98  static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f);
99 
100  const uint8_t* data = (const uint8_t*)key;
101 
102  out.h1 = seed;
103  out.h2 = seed;
104 
105  // Number of full 128-bit blocks of 16 bytes.
106  // Possible exclusion of a remainder of up to 15 bytes.
107  const size_t nblocks = lenBytes >> 4; // bytes / 16
108 
109  // Process the 128-bit blocks (the body) into the hash
110  for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
111  uint64_t k1 = getblock64(data, i * 2 + 0);
112  uint64_t k2 = getblock64(data, i * 2 + 1);
113 
114  k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
115  out.h1 = MURMUR3_ROTL64(out.h1,27);
116  out.h1 += out.h2;
117  out.h1 = out.h1*5+0x52dce729;
118 
119  k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
120  out.h2 = MURMUR3_ROTL64(out.h2,31);
121  out.h2 += out.h1;
122  out.h2 = out.h2*5+0x38495ab5;
123  }
124 
125  // tail
126  const uint8_t * tail = (const uint8_t*)(data + (nblocks << 4));
127 
128  uint64_t k1 = 0;
129  uint64_t k2 = 0;
130 
131  switch(lenBytes & 15)
132  {
133  case 15: k2 ^= ((uint64_t)tail[14]) << 48; // falls through
134  case 14: k2 ^= ((uint64_t)tail[13]) << 40; // falls through
135  case 13: k2 ^= ((uint64_t)tail[12]) << 32; // falls through
136  case 12: k2 ^= ((uint64_t)tail[11]) << 24; // falls through
137  case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
138  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // falls through
139  case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
140  k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
141  // falls through
142  case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
143  case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
144  case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; // falls through
145  case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; // falls through
146  case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; // falls through
147  case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
148  case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
149  case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
150  k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
151  };
152 
153  //----------
154  // finalization
155 
156  out.h1 ^= lenBytes;
157  out.h2 ^= lenBytes;
158 
159  out.h1 += out.h2;
160  out.h2 += out.h1;
161 
162  out.h1 = fmix64(out.h1);
163  out.h2 = fmix64(out.h2);
164 
165  out.h1 += out.h2;
166  out.h2 += out.h1;
167 }
168 
169 //-----------------------------------------------------------------------------
170 
171 MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
172  HashState hashes;
173  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
174  return static_cast<uint16_t>(hashes.h1 & 0xffff);
175 }
176 
177 #undef MURMUR3_FORCE_INLINE
178 #undef MURMUR3_ROTL64
179 #undef MURMUR3_BIG_CONSTANT
180 
181 #endif // _MURMURHASH3_H_