datasketches-cpp
Loading...
Searching...
No Matches
MurmurHash3.h
1// Minimally modified from Austin Applebee's code:
2// * Removed MurmurHash3_x86_32 and MurmurHash3_x86_128
3// * Changed input seed in MurmurHash3_x64_128 to uint64_t
4// * Define and use HashState reference to return result
5// * Made entire hash function defined inline
6// * Added compute_seed_hash
7//-----------------------------------------------------------------------------
8// MurmurHash3 was written by Austin Appleby, and is placed in the public
9// domain. The author hereby disclaims copyright to this source code.
10
11// Note - The x86 and x64 versions do _not_ produce the same results, as the
12// algorithms are optimized for their respective platforms. You can still
13// compile and run any of them on any platform, but your performance with the
14// non-native version will be less than optimal.
15
16#ifndef _MURMURHASH3_H_
17#define _MURMURHASH3_H_
18
19#include <cstring>
20
21//-----------------------------------------------------------------------------
22// Platform-specific functions and macros
23
24// Microsoft Visual Studio
25
26#if defined(_MSC_VER)
27
28typedef unsigned char uint8_t;
29typedef unsigned int uint32_t;
30typedef unsigned __int64 uint64_t;
31
32#define MURMUR3_FORCE_INLINE __forceinline
33
34#include <stdlib.h>
35
36#define MURMUR3_ROTL64(x,y) _rotl64(x,y)
37
38#define MURMUR3_BIG_CONSTANT(x) (x)
39
40// Other compilers
41
42#else // defined(_MSC_VER)
43
44#include <stdint.h>
45
46#define MURMUR3_FORCE_INLINE inline __attribute__((always_inline))
47
48inline uint64_t rotl64 ( uint64_t x, int8_t r )
49{
50 return (x << r) | (x >> (64 - r));
51}
52
53#define MURMUR3_ROTL64(x,y) rotl64(x,y)
54
55#define MURMUR3_BIG_CONSTANT(x) (x##LLU)
56
57#endif // !defined(_MSC_VER)
58
59//-----------------------------------------------------------------------------
60
61//-----------------------------------------------------------------------------
62// Return type - Using C++ reference for return type which should allow better
63// compiler optimization than a void* pointer
64typedef struct {
65 uint64_t h1;
66 uint64_t h2;
67} HashState;
68
69
70//-----------------------------------------------------------------------------
71// Block read - if your platform needs to do endian-swapping or can only
72// handle aligned reads, do the conversion here
73
74MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint8_t * p, size_t i )
75{
76 uint64_t res;
77 memcpy(&res, p + i * sizeof(uint64_t), sizeof(res));
78 return res;
79}
80
81//-----------------------------------------------------------------------------
82// Finalization mix - force all bits of a hash block to avalanche
83
84MURMUR3_FORCE_INLINE uint64_t fmix64 ( uint64_t k )
85{
86 k ^= k >> 33;
87 k *= MURMUR3_BIG_CONSTANT(0xff51afd7ed558ccd);
88 k ^= k >> 33;
89 k *= MURMUR3_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
90 k ^= k >> 33;
91
92 return k;
93}
94
95MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
96 uint64_t seed, HashState& out) {
97 static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5);
98 static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f);
99
100 const uint8_t* data = (const uint8_t*)key;
101
102 out.h1 = seed;
103 out.h2 = seed;
104
105 // Number of full 128-bit blocks of 16 bytes.
106 // Possible exclusion of a remainder of up to 15 bytes.
107 const size_t nblocks = lenBytes >> 4; // bytes / 16
108
109 // Process the 128-bit blocks (the body) into the hash
110 for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
111 uint64_t k1 = getblock64(data, i * 2 + 0);
112 uint64_t k2 = getblock64(data, i * 2 + 1);
113
114 k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
115 out.h1 = MURMUR3_ROTL64(out.h1,27);
116 out.h1 += out.h2;
117 out.h1 = out.h1*5+0x52dce729;
118
119 k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
120 out.h2 = MURMUR3_ROTL64(out.h2,31);
121 out.h2 += out.h1;
122 out.h2 = out.h2*5+0x38495ab5;
123 }
124
125 // tail
126 const uint8_t * tail = (const uint8_t*)(data + (nblocks << 4));
127
128 uint64_t k1 = 0;
129 uint64_t k2 = 0;
130
131 switch(lenBytes & 15)
132 {
133 case 15: k2 ^= ((uint64_t)tail[14]) << 48; // falls through
134 case 14: k2 ^= ((uint64_t)tail[13]) << 40; // falls through
135 case 13: k2 ^= ((uint64_t)tail[12]) << 32; // falls through
136 case 12: k2 ^= ((uint64_t)tail[11]) << 24; // falls through
137 case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
138 case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // falls through
139 case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
140 k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
141 // falls through
142 case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
143 case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
144 case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; // falls through
145 case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; // falls through
146 case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; // falls through
147 case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
148 case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
149 case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
150 k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
151 };
152
153 //----------
154 // finalization
155
156 out.h1 ^= lenBytes;
157 out.h2 ^= lenBytes;
158
159 out.h1 += out.h2;
160 out.h2 += out.h1;
161
162 out.h1 = fmix64(out.h1);
163 out.h2 = fmix64(out.h2);
164
165 out.h1 += out.h2;
166 out.h2 += out.h1;
167}
168
169//-----------------------------------------------------------------------------
170
171MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
172 HashState hashes;
173 MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
174 return static_cast<uint16_t>(hashes.h1 & 0xffff);
175}
176
177#undef MURMUR3_FORCE_INLINE
178#undef MURMUR3_ROTL64
179#undef MURMUR3_BIG_CONSTANT
180
181#endif // _MURMURHASH3_H_