137template<
typename EN,
typename EK,
typename A>
138uint64_t theta_update_sketch_base<EN, EK, A>::hash_and_screen(
const void* data,
size_t length) {
140 const uint64_t hash = compute_hash(data, length, seed_);
141 if (hash >= theta_)
return 0;
145template<
typename EN,
typename EK,
typename A>
146auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key)
const -> std::pair<iterator, bool> {
147 return find(entries_, lg_cur_size_, key);
150template<
typename EN,
typename EK,
typename A>
151auto theta_update_sketch_base<EN, EK, A>::find(EN* entries, uint8_t lg_size, uint64_t key) -> std::pair<iterator, bool> {
152 const uint32_t size = 1 << lg_size;
153 const uint32_t mask = size - 1;
154 const uint32_t stride = get_stride(key, lg_size);
155 uint32_t index =
static_cast<uint32_t
>(key) & mask;
157 const uint32_t loop_index = index;
159 const uint64_t probe = EK()(entries[index]);
161 return std::pair<iterator, bool>(&entries[index],
false);
162 }
else if (probe == key) {
163 return std::pair<iterator, bool>(&entries[index],
true);
165 index = (index + stride) & mask;
166 }
while (index != loop_index);
167 throw std::logic_error(
"key not found and no empty slots!");
170template<
typename EN,
typename EK,
typename A>
171template<
typename Fwd>
172void theta_update_sketch_base<EN, EK, A>::insert(iterator it, Fwd&& entry) {
173 new (it) EN(std::forward<Fwd>(entry));
175 if (num_entries_ > get_capacity(lg_cur_size_, lg_nom_size_)) {
176 if (lg_cur_size_ <= lg_nom_size_) {
184template<
typename EN,
typename EK,
typename A>
185auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
189template<
typename EN,
typename EK,
typename A>
190auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
191 return entries_ + (1ULL << lg_cur_size_);
194template<
typename EN,
typename EK,
typename A>
195uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
196 const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
197 return static_cast<uint32_t
>(std::floor(fraction * (1 << lg_cur_size)));
200template<
typename EN,
typename EK,
typename A>
201uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t lg_size) {
203 return (2 *
static_cast<uint32_t
>((key >> lg_size) & STRIDE_MASK)) + 1;
206template<
typename EN,
typename EK,
typename A>
207void theta_update_sketch_base<EN, EK, A>::resize() {
208 const size_t old_size = 1ULL << lg_cur_size_;
209 const uint8_t lg_new_size = std::min<uint8_t>(lg_cur_size_ +
static_cast<uint8_t
>(rf_), lg_nom_size_ + 1);
210 const size_t new_size = 1ULL << lg_new_size;
211 EN* new_entries = allocator_.allocate(new_size);
212 for (
size_t i = 0; i < new_size; ++i) EK()(new_entries[i]) = 0;
213 for (
size_t i = 0; i < old_size; ++i) {
214 const uint64_t key = EK()(entries_[i]);
217 new (find(new_entries, lg_new_size, key).first) EN(std::move(entries_[i]));
219 EK()(entries_[i]) = 0;
222 std::swap(entries_, new_entries);
223 lg_cur_size_ = lg_new_size;
224 allocator_.deallocate(new_entries, old_size);
228template<
typename EN,
typename EK,
typename A>
229void theta_update_sketch_base<EN, EK, A>::rebuild() {
230 const size_t size = 1ULL << lg_cur_size_;
231 const uint32_t nominal_size = 1 << lg_nom_size_;
235 consolidate_non_empty(entries_, size, num_entries_);
237 std::nth_element(entries_, entries_ + nominal_size, entries_ + num_entries_, comparator());
238 this->theta_ = EK()(entries_[nominal_size]);
239 EN* old_entries = entries_;
240 const size_t num_old_entries = num_entries_;
241 entries_ = allocator_.allocate(size);
242 for (
size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
243 num_entries_ = nominal_size;
245 for (
size_t i = 0; i < nominal_size; ++i) {
246 new (find(EK()(old_entries[i])).first) EN(std::move(old_entries[i]));
247 old_entries[i].~EN();
249 for (
size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();
250 allocator_.deallocate(old_entries, size);
253template<
typename EN,
typename EK,
typename A>
254void theta_update_sketch_base<EN, EK, A>::trim() {
255 if (num_entries_ >
static_cast<uint32_t
>(1 << lg_nom_size_)) rebuild();
258template<
typename EN,
typename EK,
typename A>
259void theta_update_sketch_base<EN, EK, A>::reset() {
260 const size_t cur_size = 1ULL << lg_cur_size_;
261 for (
size_t i = 0; i < cur_size; ++i) {
262 if (EK()(entries_[i]) != 0) {
264 EK()(entries_[i]) = 0;
267 const uint8_t starting_lg_size = theta_build_helper<true>::starting_sub_multiple(
268 lg_nom_size_ + 1, theta_constants::MIN_LG_K,
static_cast<uint8_t
>(rf_));
269 if (starting_lg_size != lg_cur_size_) {
270 allocator_.deallocate(entries_, cur_size);
271 lg_cur_size_ = starting_lg_size;
272 const size_t new_size = 1ULL << starting_lg_size;
273 entries_ = allocator_.allocate(new_size);
274 for (
size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
277 theta_ = theta_build_helper<true>::starting_theta_from_p(p_);
281template<
typename EN,
typename EK,
typename A>
282void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries,
size_t size,
size_t num) {
286 if (EK()(entries[i]) == 0)
break;
290 for (
size_t j = i + 1; j < size; ++j) {
291 if (EK()(entries[j]) != 0) {
292 new (&entries[i]) EN(std::move(entries[j]));
294 EK()(entries[j]) = 0;
303template<
typename Derived,
typename Allocator>
305allocator_(allocator),
306lg_k_(theta_constants::DEFAULT_LG_K),
307rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
309seed_(DEFAULT_SEED) {}
311template<
typename Derived,
typename Allocator>
314 throw std::invalid_argument(
"lg_k must not be less than " + std::to_string(
theta_constants::MIN_LG_K) +
": " + std::to_string(lg_k));
317 throw std::invalid_argument(
"lg_k must not be greater than " + std::to_string(
theta_constants::MAX_LG_K) +
": " + std::to_string(lg_k));
320 return static_cast<Derived&
>(*this);
323template<
typename Derived,
typename Allocator>
326 return static_cast<Derived&
>(*this);
329template<
typename Derived,
typename Allocator>
331 if (p <= 0 || p > 1)
throw std::invalid_argument(
"sampling probability must be between 0 and 1");
333 return static_cast<Derived&
>(*this);
336template<
typename Derived,
typename Allocator>
339 return static_cast<Derived&
>(*this);