20 #ifndef REVERSE_PURGE_HASH_MAP_IMPL_HPP_
21 #define REVERSE_PURGE_HASH_MAP_IMPL_HPP_
28 #include "MurmurHash3.h"
33 template<
typename K,
typename V,
typename H,
typename E,
typename A>
34 constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE;
36 template<
typename K,
typename V,
typename H,
typename E,
typename A>
37 reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size,
38 const E& equal,
const A& allocator):
40 allocator_(allocator),
41 lg_cur_size_(lg_cur_size),
42 lg_max_size_(lg_max_size),
44 keys_(allocator_.allocate(1ULL << lg_cur_size)),
48 AllocV av(allocator_);
49 values_ = av.allocate(1ULL << lg_cur_size);
50 AllocU16 au16(allocator_);
51 states_ = au16.allocate(1ULL << lg_cur_size);
52 std::fill(states_, states_ + (1ULL << lg_cur_size),
static_cast<uint16_t
>(0));
55 template<
typename K,
typename V,
typename H,
typename E,
typename A>
56 reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(
const reverse_purge_hash_map<K, V, H, E, A>& other):
58 allocator_(other.allocator_),
59 lg_cur_size_(other.lg_cur_size_),
60 lg_max_size_(other.lg_max_size_),
61 num_active_(other.num_active_),
62 keys_(allocator_.allocate(1ULL << lg_cur_size_)),
66 AllocV av(allocator_);
67 values_ = av.allocate(1ULL << lg_cur_size_);
68 AllocU16 au16(allocator_);
69 states_ = au16.allocate(1ULL << lg_cur_size_);
70 const uint32_t size = 1 << lg_cur_size_;
71 if (num_active_ > 0) {
72 auto num = num_active_;
73 for (uint32_t i = 0; i < size; i++) {
74 if (other.states_[i] > 0) {
75 new (&keys_[i]) K(other.keys_[i]);
76 values_[i] = other.values_[i];
77 if (--num == 0)
break;
81 std::copy(other.states_, other.states_ + size, states_);
84 template<
typename K,
typename V,
typename H,
typename E,
typename A>
85 reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept:
86 equal_(std::move(other.equal_)),
87 allocator_(std::move(other.allocator_)),
88 lg_cur_size_(other.lg_cur_size_),
89 lg_max_size_(other.lg_max_size_),
90 num_active_(other.num_active_),
95 std::swap(keys_, other.keys_);
96 std::swap(values_, other.values_);
97 std::swap(states_, other.states_);
98 other.num_active_ = 0;
101 template<
typename K,
typename V,
typename H,
typename E,
typename A>
102 reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
103 const uint32_t size = 1 << lg_cur_size_;
104 if (num_active_ > 0) {
105 for (uint32_t i = 0; i < size; i++) {
108 if (--num_active_ == 0)
break;
112 if (keys_ !=
nullptr) {
113 allocator_.deallocate(keys_, size);
115 if (values_ !=
nullptr) {
116 AllocV av(allocator_);
117 av.deallocate(values_, size);
119 if (states_ !=
nullptr) {
120 AllocU16 au16(allocator_);
121 au16.deallocate(states_, size);
125 template<
typename K,
typename V,
typename H,
typename E,
typename A>
126 reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(
const reverse_purge_hash_map<K, V, H, E, A>& other) {
127 reverse_purge_hash_map copy(other);
128 std::swap(equal_, copy.equal_);
129 std::swap(allocator_, copy.allocator_);
130 std::swap(lg_cur_size_, copy.lg_cur_size_);
131 std::swap(lg_max_size_, copy.lg_max_size_);
132 std::swap(num_active_, copy.num_active_);
133 std::swap(keys_, copy.keys_);
134 std::swap(values_, copy.values_);
135 std::swap(states_, copy.states_);
139 template<
typename K,
typename V,
typename H,
typename E,
typename A>
140 reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) {
141 std::swap(equal_, other.equal_);
142 std::swap(allocator_, other.allocator_);
143 std::swap(lg_cur_size_, other.lg_cur_size_);
144 std::swap(lg_max_size_, other.lg_max_size_);
145 std::swap(num_active_, other.num_active_);
146 std::swap(keys_, other.keys_);
147 std::swap(values_, other.values_);
148 std::swap(states_, other.states_);
152 template<
typename K,
typename V,
typename H,
typename E,
typename A>
153 template<
typename FwdK>
154 V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(FwdK&& key, V value) {
155 const uint32_t num_active_before = num_active_;
156 const uint32_t index = internal_adjust_or_insert(key, value);
157 if (num_active_ > num_active_before) {
158 new (&keys_[index]) K(std::forward<FwdK>(key));
159 return resize_or_purge_if_needed();
164 template<
typename K,
typename V,
typename H,
typename E,
typename A>
165 V reverse_purge_hash_map<K, V, H, E, A>::get(
const K& key)
const {
166 const uint32_t mask = (1 << lg_cur_size_) - 1;
167 uint32_t probe = fmix64(H()(key)) & mask;
168 while (is_active(probe)) {
169 if (E()(keys_[probe], key))
return values_[probe];
170 probe = (probe + 1) & mask;
175 template<
typename K,
typename V,
typename H,
typename E,
typename A>
176 uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size()
const {
180 template<
typename K,
typename V,
typename H,
typename E,
typename A>
181 uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size()
const {
185 template<
typename K,
typename V,
typename H,
typename E,
typename A>
186 uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity()
const {
187 return static_cast<uint32_t
>((1 << lg_cur_size_) * LOAD_FACTOR);
190 template<
typename K,
typename V,
typename H,
typename E,
typename A>
191 uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active()
const {
195 template<
typename K,
typename V,
typename H,
typename E,
typename A>
196 const A& reverse_purge_hash_map<K, V, H, E, A>::get_allocator()
const {
200 template<
typename K,
typename V,
typename H,
typename E,
typename A>
201 typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin()
const {
202 const uint32_t size = 1 << lg_cur_size_;
204 while (i < size && !is_active(i)) i++;
205 return reverse_purge_hash_map<K, V, H, E, A>::iterator(
this, i, 0);
208 template<
typename K,
typename V,
typename H,
typename E,
typename A>
209 typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end()
const {
210 return reverse_purge_hash_map<K, V, H, E, A>::iterator(
this, 1 << lg_cur_size_, num_active_);
213 template<
typename K,
typename V,
typename H,
typename E,
typename A>
214 bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index)
const {
215 return states_[index] > 0;
218 template<
typename K,
typename V,
typename H,
typename E,
typename A>
219 void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) {
222 uint32_t first_probe = (1 << lg_cur_size_) - 1;
223 while (is_active(first_probe)) first_probe--;
226 for (uint32_t probe = first_probe; probe-- > 0;) {
227 if (is_active(probe)) {
228 if (values_[probe] <= amount) {
232 values_[probe] -= amount;
237 for (uint32_t probe = (1 << lg_cur_size_); probe-- > first_probe;) {
238 if (is_active(probe)) {
239 if (values_[probe] <= amount) {
243 values_[probe] -= amount;
249 template<
typename K,
typename V,
typename H,
typename E,
typename A>
250 void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
254 states_[delete_index] = 0;
255 keys_[delete_index].~K();
257 const uint32_t mask = (1 << lg_cur_size_) - 1;
258 uint32_t probe = (delete_index + drift) & mask;
260 while (is_active(probe)) {
261 if (states_[probe] > drift) {
263 new (&keys_[delete_index]) K(std::move(keys_[probe]));
264 values_[delete_index] = values_[probe];
265 states_[delete_index] = states_[probe] - drift;
269 delete_index = probe;
271 probe = (probe + 1) & mask;
274 if (drift >= DRIFT_LIMIT)
throw std::logic_error(
"drift: " + std::to_string(drift) +
" >= DRIFT_LIMIT");
278 template<
typename K,
typename V,
typename H,
typename E,
typename A>
279 uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(
const K& key, V value) {
280 const uint32_t mask = (1 << lg_cur_size_) - 1;
281 uint32_t index = fmix64(H()(key)) & mask;
283 while (is_active(index)) {
284 if (E()(keys_[index], key)) {
286 values_[index] += value;
289 index = (index + 1) & mask;
292 if (drift >= DRIFT_LIMIT)
throw std::logic_error(
"drift limit reached");
295 if (num_active_ > get_capacity()) {
296 throw std::logic_error(
"num_active " + std::to_string(num_active_) +
" > capacity " + std::to_string(get_capacity()));
298 values_[index] = value;
299 states_[index] = drift;
304 template<
typename K,
typename V,
typename H,
typename E,
typename A>
305 V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() {
306 if (num_active_ > get_capacity()) {
307 if (lg_cur_size_ < lg_max_size_) {
308 resize(lg_cur_size_ + 1);
310 const V offset = purge();
311 if (num_active_ > get_capacity()) {
312 throw std::logic_error(
"purge did not reduce number of active items");
320 template<
typename K,
typename V,
typename H,
typename E,
typename A>
321 void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
322 const uint32_t old_size = 1 << lg_cur_size_;
324 V* old_values = values_;
325 uint16_t* old_states = states_;
326 const uint32_t new_size = 1 << lg_new_size;
327 keys_ = allocator_.allocate(new_size);
328 AllocV av(allocator_);
329 values_ = av.allocate(new_size);
330 AllocU16 au16(allocator_);
331 states_ = au16.allocate(new_size);
332 std::fill(states_, states_ + new_size,
static_cast<uint16_t
>(0));
334 lg_cur_size_ = lg_new_size;
335 for (uint32_t i = 0; i < old_size; i++) {
336 if (old_states[i] > 0) {
337 adjust_or_insert(std::move(old_keys[i]), old_values[i]);
341 allocator_.deallocate(old_keys, old_size);
342 av.deallocate(old_values, old_size);
343 au16.deallocate(old_states, old_size);
346 template<
typename K,
typename V,
typename H,
typename E,
typename A>
347 V reverse_purge_hash_map<K, V, H, E, A>::purge() {
348 const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active_);
349 uint32_t num_samples = 0;
351 AllocV av(allocator_);
352 V* samples = av.allocate(limit);
353 while (num_samples < limit) {
355 samples[num_samples++] = values_[i];
359 std::nth_element(samples, samples+ (num_samples / 2), samples + num_samples);
360 const V median = samples[num_samples / 2];
361 av.deallocate(samples, limit);
362 subtract_and_keep_positive_only(median);
DataSketches namespace.
Definition: binomial_bounds.hpp:38