20 #ifndef THETA_UPDATE_SKETCH_BASE_HPP_
21 #define THETA_UPDATE_SKETCH_BASE_HPP_
28 #include "MurmurHash3.h"
29 #include "theta_comparators.hpp"
30 #include "theta_constants.hpp"
39 struct theta_update_sketch_base {
41 using comparator = compare_by_key<ExtractKey>;
43 theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
float p,
44 uint64_t theta, uint64_t seed,
const Allocator& allocator,
bool is_empty =
true);
45 theta_update_sketch_base(
const theta_update_sketch_base& other);
46 theta_update_sketch_base(theta_update_sketch_base&& other) noexcept;
47 ~theta_update_sketch_base();
48 theta_update_sketch_base& operator=(
const theta_update_sketch_base& other);
49 theta_update_sketch_base& operator=(theta_update_sketch_base&& other);
51 using iterator = Entry*;
53 inline uint64_t hash_and_screen(
const void* data,
size_t length);
55 inline std::pair<iterator, bool> find(uint64_t key)
const;
56 static inline std::pair<iterator, bool> find(Entry* entries, uint8_t lg_size, uint64_t key);
59 template<
typename FwdEntry>
60 inline void insert(iterator it, FwdEntry&& entry);
62 iterator begin()
const;
66 static constexpr
double RESIZE_THRESHOLD = 0.5;
68 static constexpr
double REBUILD_THRESHOLD = 15.0 / 16.0;
70 static constexpr uint8_t STRIDE_HASH_BITS = 7;
71 static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
79 uint32_t num_entries_;
89 static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
90 static inline uint32_t get_stride(uint64_t key, uint8_t lg_size);
91 static void consolidate_non_empty(Entry* entries,
size_t size,
size_t num);
96 template<
typename Derived,
typename Allocator>
126 Derived&
set_p(
float p);
138 Allocator allocator_;
144 uint64_t starting_theta()
const;
145 uint8_t starting_lg_size()
const;
150 struct trivial_extract_key {
152 auto operator()(T&& entry)
const -> decltype(std::forward<T>(entry)) {
153 return std::forward<T>(entry);
159 template<
typename Entry,
typename ExtractKey>
162 bool operator()(
const Entry& entry)
const {
163 return ExtractKey()(entry) != 0;
167 template<
typename Key,
typename Entry,
typename ExtractKey>
168 class key_not_zero_less_than {
170 explicit key_not_zero_less_than(
const Key& key): key(key) {}
171 bool operator()(
const Entry& entry)
const {
172 return ExtractKey()(entry) != 0 && ExtractKey()(entry) < this->key;
180 static inline uint64_t compute_hash(
const void* data,
size_t length, uint64_t seed) {
182 MurmurHash3_x64_128(data, length, seed, hashes);
183 return (hashes.h1 >> 1);
188 template<
typename Entry,
typename ExtractKey>
189 class theta_iterator {
191 using iterator_category = std::input_iterator_tag;
192 using value_type = Entry;
193 using difference_type = std::ptrdiff_t;
194 using pointer = Entry*;
195 using reference = Entry&;
197 theta_iterator(Entry* entries, uint32_t size, uint32_t index);
198 theta_iterator& operator++();
199 theta_iterator operator++(
int);
200 bool operator==(
const theta_iterator& other)
const;
201 bool operator!=(
const theta_iterator& other)
const;
202 reference operator*()
const;
203 pointer operator->()
const;
211 template<
typename Entry,
typename ExtractKey>
212 class theta_const_iterator {
214 using iterator_category = std::input_iterator_tag;
215 using value_type =
const Entry;
216 using difference_type = std::ptrdiff_t;
217 using pointer =
const Entry*;
218 using reference =
const Entry&;
220 theta_const_iterator(
const Entry* entries, uint32_t size, uint32_t index);
221 theta_const_iterator& operator++();
222 theta_const_iterator operator++(
int);
223 bool operator==(
const theta_const_iterator& other)
const;
224 bool operator!=(
const theta_const_iterator& other)
const;
225 reference operator*()
const;
226 pointer operator->()
const;
229 const Entry* entries_;
235 static inline int64_t canonical_double(
double value) {
242 long_double_union.double_value = 0.0;
243 }
else if (std::isnan(value)) {
244 long_double_union.long_value = 0x7ff8000000000000L;
246 long_double_union.double_value = value;
248 return long_double_union.long_value;
253 #include "theta_update_sketch_base_impl.hpp"
Theta base builder.
Definition: theta_update_sketch_base.hpp:97
Derived & set_lg_k(uint8_t lg_k)
Set log2(k), where k is a nominal number of entries in the sketch.
Definition: theta_update_sketch_base_impl.hpp:312
theta_base_builder(const Allocator &allocator)
Creates and instance of the builder with default parameters.
Definition: theta_update_sketch_base_impl.hpp:304
Derived & set_resize_factor(resize_factor rf)
Set resize factor for the internal hash table (defaults to 8)
Definition: theta_update_sketch_base_impl.hpp:324
Derived & set_p(float p)
Set sampling probability (initial theta).
Definition: theta_update_sketch_base_impl.hpp:330
Derived & set_seed(uint64_t seed)
Set the seed for the hash function.
Definition: theta_update_sketch_base_impl.hpp:337
datasketches::resize_factor resize_factor
hash table resize factor
Definition: theta_constants.hpp:31
DataSketches namespace.
Definition: binomial_bounds.hpp:38