datasketches-cpp
xxhash64.h
1 // //////////////////////////////////////////////////////////
2 // xxhash64.h
3 // Copyright (c) 2016 Stephan Brumme. All rights reserved.
4 // see http://create.stephan-brumme.com/disclaimer.html
5 //
6 
7 #pragma once
8 #include <stdint.h> // for uint32_t and uint64_t
9 
11 
24 class XXHash64
25 {
26 public:
28 
29  explicit XXHash64(uint64_t seed)
30  {
31  state[0] = seed + Prime1 + Prime2;
32  state[1] = seed + Prime2;
33  state[2] = seed;
34  state[3] = seed - Prime1;
35  bufferSize = 0;
36  totalLength = 0;
37  }
38 
40 
43  bool add(const void* input, uint64_t length)
44  {
45  // no data ?
46  if (!input || length == 0)
47  return false;
48 
49  totalLength += length;
50  // byte-wise access
51  const unsigned char* data = (const unsigned char*)input;
52 
53  // unprocessed old data plus new data still fit in temporary buffer ?
54  if (bufferSize + length < MaxBufferSize)
55  {
56  // just add new data
57  while (length-- > 0)
58  buffer[bufferSize++] = *data++;
59  return true;
60  }
61 
62  // point beyond last byte
63  const unsigned char* stop = data + length;
64  const unsigned char* stopBlock = stop - MaxBufferSize;
65 
66  // some data left from previous update ?
67  if (bufferSize > 0)
68  {
69  // make sure temporary buffer is full (16 bytes)
70  while (bufferSize < MaxBufferSize)
71  buffer[bufferSize++] = *data++;
72 
73  // process these 32 bytes (4x8)
74  process(buffer, state[0], state[1], state[2], state[3]);
75  }
76 
77  // copying state to local variables helps optimizer A LOT
78  uint64_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3];
79  // 32 bytes at once
80  while (data <= stopBlock)
81  {
82  // local variables s0..s3 instead of state[0]..state[3] are much faster
83  process(data, s0, s1, s2, s3);
84  data += 32;
85  }
86  // copy back
87  state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3;
88 
89  // copy remainder to temporary buffer
90  bufferSize = stop - data;
91  for (uint64_t i = 0; i < bufferSize; i++)
92  buffer[i] = data[i];
93 
94  // done
95  return true;
96  }
97 
99 
100  uint64_t hash() const
101  {
102  // fold 256 bit state into one single 64 bit value
103  uint64_t result;
104  if (totalLength >= MaxBufferSize)
105  {
106  result = rotateLeft(state[0], 1) +
107  rotateLeft(state[1], 7) +
108  rotateLeft(state[2], 12) +
109  rotateLeft(state[3], 18);
110  result = (result ^ processSingle(0, state[0])) * Prime1 + Prime4;
111  result = (result ^ processSingle(0, state[1])) * Prime1 + Prime4;
112  result = (result ^ processSingle(0, state[2])) * Prime1 + Prime4;
113  result = (result ^ processSingle(0, state[3])) * Prime1 + Prime4;
114  }
115  else
116  {
117  // internal state wasn't set in add(), therefore original seed is still stored in state2
118  result = state[2] + Prime5;
119  }
120 
121  result += totalLength;
122 
123  // process remaining bytes in temporary buffer
124  const unsigned char* data = buffer;
125  // point beyond last byte
126  const unsigned char* stop = data + bufferSize;
127 
128  // at least 8 bytes left ? => eat 8 bytes per step
129  for (; data + 8 <= stop; data += 8)
130  result = rotateLeft(result ^ processSingle(0, *(uint64_t*)data), 27) * Prime1 + Prime4;
131 
132  // 4 bytes left ? => eat those
133  if (data + 4 <= stop)
134  {
135  result = rotateLeft(result ^ (*(uint32_t*)data) * Prime1, 23) * Prime2 + Prime3;
136  data += 4;
137  }
138 
139  // take care of remaining 0..3 bytes, eat 1 byte per step
140  while (data != stop)
141  result = rotateLeft(result ^ (*data++) * Prime5, 11) * Prime1;
142 
143  // mix bits
144  result ^= result >> 33;
145  result *= Prime2;
146  result ^= result >> 29;
147  result *= Prime3;
148  result ^= result >> 32;
149  return result;
150  }
151 
152 
154 
158  static uint64_t hash(const void* input, uint64_t length, uint64_t seed)
159  {
160  XXHash64 hasher(seed);
161  hasher.add(input, length);
162  return hasher.hash();
163  }
164 
165 private:
167  static const uint64_t Prime1 = 11400714785074694791ULL;
168  static const uint64_t Prime2 = 14029467366897019727ULL;
169  static const uint64_t Prime3 = 1609587929392839161ULL;
170  static const uint64_t Prime4 = 9650029242287828579ULL;
171  static const uint64_t Prime5 = 2870177450012600261ULL;
172 
174  static const uint64_t MaxBufferSize = 31+1;
175 
176  uint64_t state[4];
177  unsigned char buffer[MaxBufferSize];
178  uint64_t bufferSize;
179  uint64_t totalLength;
180 
182  static inline uint64_t rotateLeft(uint64_t x, unsigned char bits)
183  {
184  return (x << bits) | (x >> (64 - bits));
185  }
186 
188  static inline uint64_t processSingle(uint64_t previous, uint64_t input)
189  {
190  return rotateLeft(previous + input * Prime2, 31) * Prime1;
191  }
192 
194  static inline void process(const void* data, uint64_t& state0, uint64_t& state1, uint64_t& state2, uint64_t& state3)
195  {
196  const uint64_t* block = (const uint64_t*) data;
197  state0 = processSingle(state0, block[0]);
198  state1 = processSingle(state1, block[1]);
199  state2 = processSingle(state2, block[2]);
200  state3 = processSingle(state3, block[3]);
201  }
202 };
XXHash (64 bit), based on Yann Collet's descriptions, see http://cyan4973.github.io/xxHash/.
Definition: xxhash64.h:25
uint64_t hash() const
get current hash
Definition: xxhash64.h:100
static uint64_t hash(const void *input, uint64_t length, uint64_t seed)
combine constructor, add() and hash() in one static function (C style)
Definition: xxhash64.h:158
XXHash64(uint64_t seed)
create new XXHash (64 bit)
Definition: xxhash64.h:29
bool add(const void *input, uint64_t length)
add a chunk of bytes
Definition: xxhash64.h:43