datasketches-cpp
reverse_purge_hash_map.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef REVERSE_PURGE_HASH_MAP_HPP_
21 #define REVERSE_PURGE_HASH_MAP_HPP_
22 
23 #include <memory>
24 #include <iterator>
25 
26 namespace datasketches {
27 
28 /*
29  * This is a specialized linear-probing hash map with a reverse purge operation
30  * that removes all entries in the map with values that are less than zero.
31  * Based on Java implementation here:
32  * https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java
33  * author Alexander Saydakov
34  */
35 
36 template<
37  typename K,
38  typename V = uint64_t,
39  typename H = std::hash<K>,
40  typename E = std::equal_to<K>,
41  typename A = std::allocator<K>
42 >
43 class reverse_purge_hash_map {
44 public:
45  using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>;
46  using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
47 
48  reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const E& equal, const A& allocator);
49  reverse_purge_hash_map(const reverse_purge_hash_map& other);
50  reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept;
51  ~reverse_purge_hash_map();
52  reverse_purge_hash_map& operator=(const reverse_purge_hash_map& other);
53  reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other);
54 
55  template<typename FwdK>
56  V adjust_or_insert(FwdK&& key, V value);
57 
58  V get(const K& key) const;
59  uint8_t get_lg_cur_size() const;
60  uint8_t get_lg_max_size() const;
61  uint32_t get_capacity() const;
62  uint32_t get_num_active() const;
63  const A& get_allocator() const;
64 
65  class iterator;
66  iterator begin() const;
67  iterator end() const;
68 
69 private:
70  static constexpr double LOAD_FACTOR = 0.75;
71  static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing
72  static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge
73 
74  E equal_;
75  A allocator_;
76  uint8_t lg_cur_size_;
77  uint8_t lg_max_size_;
78  uint32_t num_active_;
79  K* keys_;
80  V* values_;
81  uint16_t* states_;
82 
83  inline bool is_active(uint32_t probe) const;
84  void subtract_and_keep_positive_only(V amount);
85  void hash_delete(uint32_t probe);
86  uint32_t internal_adjust_or_insert(const K& key, V value);
87  V resize_or_purge_if_needed();
88  void resize(uint8_t lg_new_size);
89  V purge();
90 };
91 
92 // This iterator uses strides based on golden ratio to avoid clustering during merge
93 template<typename K, typename V, typename H, typename E, typename A>
94 class reverse_purge_hash_map<K, V, H, E, A>::iterator {
95 public:
96  using iterator_category = std::input_iterator_tag;
97  using value_type = std::pair<K&, V>;
98  using difference_type = void;
99  using pointer = void;
100  using reference = const value_type;
101 
102  friend class reverse_purge_hash_map<K, V, H, E, A>;
103  iterator& operator++() {
104  ++count;
105  if (count < map->num_active_) {
106  const uint32_t mask = (1 << map->lg_cur_size_) - 1;
107  do {
108  index = (index + stride) & mask;
109  } while (!map->is_active(index));
110  }
111  return *this;
112  }
113  iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
114  bool operator==(const iterator& rhs) const { return count == rhs.count; }
115  bool operator!=(const iterator& rhs) const { return count != rhs.count; }
116  reference operator*() const {
117  return value_type(map->keys_[index], map->values_[index]);
118  }
119 private:
120  static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
121  const reverse_purge_hash_map<K, V, H, E, A>* map;
122  uint32_t index;
123  uint32_t count;
124  uint32_t stride;
125  iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count):
126  map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {}
127 };
128 
129 } /* namespace datasketches */
130 
131 #include "reverse_purge_hash_map_impl.hpp"
132 
133 #endif
DataSketches namespace.
Definition: binomial_bounds.hpp:38