datasketches-cpp
Loading...
Searching...
No Matches
reverse_purge_hash_map.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef REVERSE_PURGE_HASH_MAP_HPP_
21#define REVERSE_PURGE_HASH_MAP_HPP_
22
23#include <memory>
24#include <iterator>
25
26namespace datasketches {
27
28/*
29 * This is a specialized linear-probing hash map with a reverse purge operation
30 * that removes all entries in the map with values that are less than zero.
31 * Based on Java implementation here:
32 * https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java
33 * author Alexander Saydakov
34 */
35
36template<
37 typename K,
38 typename V = uint64_t,
39 typename H = std::hash<K>,
40 typename E = std::equal_to<K>,
41 typename A = std::allocator<K>
42>
43class reverse_purge_hash_map {
44public:
45 using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
46 using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
47
48 reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const E& equal, const A& allocator);
49 reverse_purge_hash_map(const reverse_purge_hash_map& other);
50 reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
51 ~reverse_purge_hash_map();
52 reverse_purge_hash_map& operator=(const reverse_purge_hash_map& other);
53 reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
54
55 template<typename FwdK>
56 V adjust_or_insert(FwdK&& key, V value);
57
58 V get(const K& key) const;
59 uint8_t get_lg_cur_size() const;
60 uint8_t get_lg_max_size() const;
61 uint32_t get_capacity() const;
62 uint32_t get_num_active() const;
63 const A& get_allocator() const;
64
65 class iterator;
66 iterator begin() const;
67 iterator end() const;
68
69private:
70 static constexpr double LOAD_FACTOR = 0.75;
71 static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
72 static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
73
74 E equal_;
75 A allocator_;
76 uint8_t lg_cur_size_;
77 uint8_t lg_max_size_;
78 uint32_t num_active_;
79 K* keys_;
80 V* values_;
81 uint16_t* states_;
82
83 inline bool is_active(uint32_t probe) const;
84 void subtract_and_keep_positive_only(V amount);
85 void hash_delete(uint32_t probe);
86 uint32_t internal_adjust_or_insert(const K& key, V value);
87 V resize_or_purge_if_needed();
88 void resize(uint8_t lg_new_size);
89 V purge();
90};
91
92// This iterator uses strides based on golden ratio to avoid clustering during merge
93template<typename K, typename V, typename H, typename E, typename A>
94class reverse_purge_hash_map<K, V, H, E, A>::iterator {
95public:
96 using iterator_category = std::input_iterator_tag;
97 using value_type = std::pair<K&, V>;
98 using difference_type = void;
99 using pointer = void;
100 using reference = const value_type;
101
102 friend class reverse_purge_hash_map<K, V, H, E, A>;
103 iterator& operator++() {
104 ++count;
105 if (count < map->num_active_) {
106 const uint32_t mask = (1 << map->lg_cur_size_) - 1;
107 do {
108 index = (index + stride) & mask;
109 } while (!map->is_active(index));
110 }
111 return *this;
112 }
113 iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
114 bool operator==(const iterator& rhs) const { return count == rhs.count; }
115 bool operator!=(const iterator& rhs) const { return count != rhs.count; }
116 reference operator*() const {
117 return value_type(map->keys_[index], map->values_[index]);
118 }
119private:
120 static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
121 const reverse_purge_hash_map<K, V, H, E, A>* map;
122 uint32_t index;
123 uint32_t count;
124 uint32_t stride;
125 iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
126 map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
127};
128
129} /* namespace datasketches */
130
131#include "reverse_purge_hash_map_impl.hpp"
132
133#endif
DataSketches namespace.
Definition binomial_bounds.hpp:38