datasketches-cpp
Loading...
Searching...
No Matches
reverse_purge_hash_map.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef REVERSE_PURGE_HASH_MAP_HPP_
21#define REVERSE_PURGE_HASH_MAP_HPP_
22
23#include <cstdint>
24#include <iterator>
25#include <memory>
26
27namespace datasketches {
28
29/*
30 * This is a specialized linear-probing hash map with a reverse purge operation
31 * that removes all entries in the map with values that are less than zero.
32 * Based on Java implementation here:
33 * https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java
34 * author Alexander Saydakov
35 */
36
37template<
38 typename K,
39 typename V = uint64_t,
40 typename H = std::hash<K>,
41 typename E = std::equal_to<K>,
42 typename A = std::allocator<K>
43>
44class reverse_purge_hash_map {
45public:
46 using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
47 using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
48
49 reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const E& equal, const A& allocator);
50 reverse_purge_hash_map(const reverse_purge_hash_map& other);
51 reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
52 ~reverse_purge_hash_map();
53 reverse_purge_hash_map& operator=(const reverse_purge_hash_map& other);
54 reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
55
56 template<typename FwdK>
57 V adjust_or_insert(FwdK&& key, V value);
58
59 V get(const K& key) const;
60 uint8_t get_lg_cur_size() const;
61 uint8_t get_lg_max_size() const;
62 uint32_t get_capacity() const;
63 uint32_t get_num_active() const;
64 const A& get_allocator() const;
65
66 class iterator;
67 iterator begin() const;
68 iterator end() const;
69
70private:
71 static constexpr double LOAD_FACTOR = 0.75;
72 static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
73 static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
74
75 E equal_;
76 A allocator_;
77 uint8_t lg_cur_size_;
78 uint8_t lg_max_size_;
79 uint32_t num_active_;
80 K* keys_;
81 V* values_;
82 uint16_t* states_;
83
84 inline bool is_active(uint32_t probe) const;
85 void subtract_and_keep_positive_only(V amount);
86 void hash_delete(uint32_t probe);
87 uint32_t internal_adjust_or_insert(const K& key, V value);
88 V resize_or_purge_if_needed();
89 void resize(uint8_t lg_new_size);
90 V purge();
91};
92
93// This iterator uses strides based on golden ratio to avoid clustering during merge
94template<typename K, typename V, typename H, typename E, typename A>
95class reverse_purge_hash_map<K, V, H, E, A>::iterator {
96public:
97 using iterator_category = std::input_iterator_tag;
98 using value_type = std::pair<K&, V>;
99 using difference_type = void;
100 using pointer = void;
101 using reference = const value_type;
102
103 friend class reverse_purge_hash_map<K, V, H, E, A>;
104 iterator& operator++() {
105 ++count;
106 if (count < map->num_active_) {
107 const uint32_t mask = (1 << map->lg_cur_size_) - 1;
108 do {
109 index = (index + stride) & mask;
110 } while (!map->is_active(index));
111 }
112 return *this;
113 }
114 iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
115 bool operator==(const iterator& rhs) const { return count == rhs.count; }
116 bool operator!=(const iterator& rhs) const { return count != rhs.count; }
117 reference operator*() const {
118 return value_type(map->keys_[index], map->values_[index]);
119 }
120private:
121 static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
122 const reverse_purge_hash_map<K, V, H, E, A>* map;
123 uint32_t index;
124 uint32_t count;
125 uint32_t stride;
126 iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
127 map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
128};
129
130} /* namespace datasketches */
131
132#include "reverse_purge_hash_map_impl.hpp"
133
134#endif
DataSketches namespace.
Definition binomial_bounds.hpp:38