datasketches-cpp
Loading...
Searching...
No Matches
xxhash64.h
1// //////////////////////////////////////////////////////////
2// xxhash64.h
3// Copyright (c) 2016 Stephan Brumme. All rights reserved.
4// see http://create.stephan-brumme.com/disclaimer.html
5//
6
7#pragma once
8#include <stdint.h> // for uint32_t and uint64_t
9
11
25{
26public:
28
29 explicit XXHash64(uint64_t seed)
30 {
31 state[0] = seed + Prime1 + Prime2;
32 state[1] = seed + Prime2;
33 state[2] = seed;
34 state[3] = seed - Prime1;
35 bufferSize = 0;
36 totalLength = 0;
37 }
38
40
43 bool add(const void* input, uint64_t length)
44 {
45 // no data ?
46 if (!input || length == 0)
47 return false;
48
49 totalLength += length;
50 // byte-wise access
51 const unsigned char* data = (const unsigned char*)input;
52
53 // unprocessed old data plus new data still fit in temporary buffer ?
54 if (bufferSize + length < MaxBufferSize)
55 {
56 // just add new data
57 while (length-- > 0)
58 buffer[bufferSize++] = *data++;
59 return true;
60 }
61
62 // point beyond last byte
63 const unsigned char* stop = data + length;
64 const unsigned char* stopBlock = stop - MaxBufferSize;
65
66 // some data left from previous update ?
67 if (bufferSize > 0)
68 {
69 // make sure temporary buffer is full (16 bytes)
70 while (bufferSize < MaxBufferSize)
71 buffer[bufferSize++] = *data++;
72
73 // process these 32 bytes (4x8)
74 process(buffer, state[0], state[1], state[2], state[3]);
75 }
76
77 // copying state to local variables helps optimizer A LOT
78 uint64_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3];
79 // 32 bytes at once
80 while (data <= stopBlock)
81 {
82 // local variables s0..s3 instead of state[0]..state[3] are much faster
83 process(data, s0, s1, s2, s3);
84 data += 32;
85 }
86 // copy back
87 state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3;
88
89 // copy remainder to temporary buffer
90 bufferSize = stop - data;
91 for (uint64_t i = 0; i < bufferSize; i++)
92 buffer[i] = data[i];
93
94 // done
95 return true;
96 }
97
99
100 uint64_t hash() const
101 {
102 // fold 256 bit state into one single 64 bit value
103 uint64_t result;
104 if (totalLength >= MaxBufferSize)
105 {
106 result = rotateLeft(state[0], 1) +
107 rotateLeft(state[1], 7) +
108 rotateLeft(state[2], 12) +
109 rotateLeft(state[3], 18);
110 result = (result ^ processSingle(0, state[0])) * Prime1 + Prime4;
111 result = (result ^ processSingle(0, state[1])) * Prime1 + Prime4;
112 result = (result ^ processSingle(0, state[2])) * Prime1 + Prime4;
113 result = (result ^ processSingle(0, state[3])) * Prime1 + Prime4;
114 }
115 else
116 {
117 // internal state wasn't set in add(), therefore original seed is still stored in state2
118 result = state[2] + Prime5;
119 }
120
121 result += totalLength;
122
123 // process remaining bytes in temporary buffer
124 const unsigned char* data = buffer;
125 // point beyond last byte
126 const unsigned char* stop = data + bufferSize;
127
128 // at least 8 bytes left ? => eat 8 bytes per step
129 for (; data + 8 <= stop; data += 8)
130 result = rotateLeft(result ^ processSingle(0, *(uint64_t*)data), 27) * Prime1 + Prime4;
131
132 // 4 bytes left ? => eat those
133 if (data + 4 <= stop)
134 {
135 result = rotateLeft(result ^ (*(uint32_t*)data) * Prime1, 23) * Prime2 + Prime3;
136 data += 4;
137 }
138
139 // take care of remaining 0..3 bytes, eat 1 byte per step
140 while (data != stop)
141 result = rotateLeft(result ^ (*data++) * Prime5, 11) * Prime1;
142
143 // mix bits
144 result ^= result >> 33;
145 result *= Prime2;
146 result ^= result >> 29;
147 result *= Prime3;
148 result ^= result >> 32;
149 return result;
150 }
151
152
154
158 static uint64_t hash(const void* input, uint64_t length, uint64_t seed)
159 {
160 XXHash64 hasher(seed);
161 hasher.add(input, length);
162 return hasher.hash();
163 }
164
165private:
167 static const uint64_t Prime1 = 11400714785074694791ULL;
168 static const uint64_t Prime2 = 14029467366897019727ULL;
169 static const uint64_t Prime3 = 1609587929392839161ULL;
170 static const uint64_t Prime4 = 9650029242287828579ULL;
171 static const uint64_t Prime5 = 2870177450012600261ULL;
172
174 static const uint64_t MaxBufferSize = 31+1;
175
176 uint64_t state[4];
177 unsigned char buffer[MaxBufferSize];
178 uint64_t bufferSize;
179 uint64_t totalLength;
180
182 static inline uint64_t rotateLeft(uint64_t x, unsigned char bits)
183 {
184 return (x << bits) | (x >> (64 - bits));
185 }
186
188 static inline uint64_t processSingle(uint64_t previous, uint64_t input)
189 {
190 return rotateLeft(previous + input * Prime2, 31) * Prime1;
191 }
192
194 static inline void process(const void* data, uint64_t& state0, uint64_t& state1, uint64_t& state2, uint64_t& state3)
195 {
196 const uint64_t* block = (const uint64_t*) data;
197 state0 = processSingle(state0, block[0]);
198 state1 = processSingle(state1, block[1]);
199 state2 = processSingle(state2, block[2]);
200 state3 = processSingle(state3, block[3]);
201 }
202};
XXHash (64 bit), based on Yann Collet's descriptions, see http://cyan4973.github.io/xxHash/.
Definition xxhash64.h:25
uint64_t hash() const
get current hash
Definition xxhash64.h:100
static uint64_t hash(const void *input, uint64_t length, uint64_t seed)
combine constructor, add() and hash() in one static function (C style)
Definition xxhash64.h:158
XXHash64(uint64_t seed)
create new XXHash (64 bit)
Definition xxhash64.h:29
bool add(const void *input, uint64_t length)
add a chunk of bytes
Definition xxhash64.h:43