20 #ifndef _VAR_OPT_SKETCH_IMPL_HPP_
21 #define _VAR_OPT_SKETCH_IMPL_HPP_
30 #include "var_opt_sketch.hpp"
32 #include "bounds_binomial_proportions.hpp"
33 #include "count_zeros.hpp"
34 #include "memory_operations.hpp"
35 #include "ceiling_power_of_2.hpp"
45 template<
typename T,
typename A>
49 template<
typename T,
typename A>
56 total_wt_r_(other.total_wt_r_),
58 curr_items_alloc_(other.curr_items_alloc_),
59 filled_data_(other.filled_data_),
60 allocator_(other.allocator_),
63 num_marks_in_h_(other.num_marks_in_h_),
66 data_ = allocator_.allocate(curr_items_alloc_);
68 for (
size_t i = 0; i < h_; ++i)
69 new (&data_[i]) T(other.data_[i]);
70 for (
size_t i = h_ + 1; i < h_ + r_ + 1; ++i)
71 new (&data_[i]) T(other.data_[i]);
76 weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
78 std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
80 if (other.marks_ !=
nullptr) {
81 marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
82 std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
86 template<
typename T,
typename A>
93 total_wt_r_(other.total_wt_r_),
95 curr_items_alloc_(other.curr_items_alloc_),
96 filled_data_(other.filled_data_),
97 allocator_(other.allocator_),
100 num_marks_in_h_(other.num_marks_in_h_),
103 data_ = allocator_.allocate(curr_items_alloc_);
105 for (
size_t i = 0; i < h_; ++i)
106 new (&data_[i]) T(other.data_[i]);
107 for (
size_t i = h_ + 1; i < h_ + r_ + 1; ++i)
108 new (&data_[i]) T(other.data_[i]);
111 filled_data_ =
false;
113 weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
115 std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
117 if (!as_sketch && other.marks_ !=
nullptr) {
118 marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
119 std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
123 template<
typename T,
typename A>
130 total_wt_r_(other.total_wt_r_),
132 curr_items_alloc_(other.curr_items_alloc_),
133 filled_data_(other.filled_data_),
134 allocator_(other.allocator_),
136 weights_(other.weights_),
137 num_marks_in_h_(other.num_marks_in_h_),
140 other.data_ =
nullptr;
141 other.weights_ =
nullptr;
142 other.marks_ =
nullptr;
145 template<
typename T,
typename A>
147 k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
148 if (k == 0 || k_ > MAX_K) {
149 throw std::invalid_argument(
"k must be at least 1 and less than 2^31 - 1");
152 uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
153 uint32_t initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
154 curr_items_alloc_ = get_adjusted_size(k_, 1 << initial_lg_size);
155 if (curr_items_alloc_ == k_) {
159 allocate_data_arrays(curr_items_alloc_, is_gadget);
163 template<
typename T,
typename A>
165 uint32_t curr_items_alloc,
bool filled_data, std::unique_ptr<T, items_deleter> items,
166 std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
167 std::unique_ptr<bool, marks_deleter> marks,
const A& allocator) :
173 total_wt_r_(total_wt_r),
175 curr_items_alloc_(curr_items_alloc),
176 filled_data_(filled_data),
177 allocator_(allocator),
178 data_(items.release()),
179 weights_(weights.release()),
180 num_marks_in_h_(num_marks_in_h),
181 marks_(marks.release())
185 template<
typename T,
typename A>
187 if (data_ !=
nullptr) {
190 const size_t num_to_destroy = std::min(k_ + 1, curr_items_alloc_);
191 for (
size_t i = 0; i < num_to_destroy; ++i) {
196 for (
size_t i = 0; i < h_; ++i) {
200 for (
size_t i = h_ + 1; i < h_ + r_ + 1; ++i) {
204 allocator_.deallocate(data_, curr_items_alloc_);
207 if (weights_ !=
nullptr) {
208 AllocDouble(allocator_).deallocate(weights_, curr_items_alloc_);
211 if (marks_ !=
nullptr) {
212 AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
216 template<
typename T,
typename A>
219 std::swap(k_, sk_copy.k_);
220 std::swap(h_, sk_copy.h_);
221 std::swap(m_, sk_copy.m_);
222 std::swap(r_, sk_copy.r_);
223 std::swap(n_, sk_copy.n_);
224 std::swap(total_wt_r_, sk_copy.total_wt_r_);
225 std::swap(rf_, sk_copy.rf_);
226 std::swap(curr_items_alloc_, sk_copy.curr_items_alloc_);
227 std::swap(filled_data_, sk_copy.filled_data_);
228 std::swap(allocator_, sk_copy.allocator_);
229 std::swap(data_, sk_copy.data_);
230 std::swap(weights_, sk_copy.weights_);
231 std::swap(num_marks_in_h_, sk_copy.num_marks_in_h_);
232 std::swap(marks_, sk_copy.marks_);
236 template<
typename T,
typename A>
238 std::swap(k_, other.k_);
239 std::swap(h_, other.h_);
240 std::swap(m_, other.m_);
241 std::swap(r_, other.r_);
242 std::swap(n_, other.n_);
243 std::swap(total_wt_r_, other.total_wt_r_);
244 std::swap(rf_, other.rf_);
245 std::swap(curr_items_alloc_, other.curr_items_alloc_);
246 std::swap(filled_data_, other.filled_data_);
247 std::swap(allocator_, other.allocator_);
248 std::swap(data_, other.data_);
249 std::swap(weights_, other.weights_);
250 std::swap(num_marks_in_h_, other.num_marks_in_h_);
251 std::swap(marks_, other.marks_);
295 template<
typename T,
typename A>
296 template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type>
298 if (is_empty()) {
return PREAMBLE_LONGS_EMPTY << 3; }
299 size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
300 num_bytes += h_ *
sizeof(double);
301 if (marks_ !=
nullptr) {
302 num_bytes += (h_ / 8) + (h_ % 8 > 0);
304 num_bytes += (h_ + r_) *
sizeof(TT);
309 template<
typename T,
typename A>
310 template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type>
312 if (is_empty()) {
return PREAMBLE_LONGS_EMPTY << 3; }
313 size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
314 num_bytes += h_ *
sizeof(double);
315 if (marks_ !=
nullptr) {
316 num_bytes += (h_ / 8) + (h_ % 8 > 0);
320 num_bytes += sd.size_of_item(it.first);
324 template<
typename T,
typename A>
325 template<
typename SerDe>
327 const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
328 std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
329 uint8_t* ptr = bytes.data() + header_size_bytes;
330 uint8_t* end_ptr = ptr + size;
332 bool empty = is_empty();
333 uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
334 : (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL));
335 uint8_t first_byte = (preLongs & 0x3F) | ((
static_cast<uint8_t
>(rf_)) << 6);
336 uint8_t flags = (marks_ !=
nullptr ? GADGET_FLAG_MASK : 0);
339 flags |= EMPTY_FLAG_MASK;
343 uint8_t ser_ver(SER_VER);
344 uint8_t family(FAMILY_ID);
345 ptr += copy_to_mem(first_byte, ptr);
346 ptr += copy_to_mem(ser_ver, ptr);
347 ptr += copy_to_mem(family, ptr);
348 ptr += copy_to_mem(flags, ptr);
349 ptr += copy_to_mem(k_, ptr);
353 ptr += copy_to_mem(n_, ptr);
354 ptr += copy_to_mem(h_, ptr);
355 ptr += copy_to_mem(r_, ptr);
359 ptr += copy_to_mem(total_wt_r_, ptr);
363 ptr += copy_to_mem(weights_, ptr, h_ *
sizeof(
double));
366 if (marks_ !=
nullptr) {
368 for (uint32_t i = 0; i < h_; ++i) {
370 val |= 0x1 << (i & 0x7);
373 if ((i & 0x7) == 0x7) {
374 ptr += copy_to_mem(val, ptr);
380 if ((h_ & 0x7) > 0) {
381 ptr += copy_to_mem(val, ptr);
386 ptr += sd.serialize(ptr, end_ptr - ptr, data_, h_);
387 ptr += sd.serialize(ptr, end_ptr - ptr, &data_[h_ + 1], r_);
390 size_t bytes_written = ptr - bytes.data();
391 if (bytes_written != size) {
392 throw std::logic_error(
"serialized size mismatch: " + std::to_string(bytes_written) +
" != " + std::to_string(size));
398 template<
typename T,
typename A>
399 template<
typename SerDe>
401 const bool empty = (h_ == 0) && (r_ == 0);
403 const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
404 : (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL));
405 const uint8_t first_byte = (preLongs & 0x3F) | ((
static_cast<uint8_t
>(rf_)) << 6);
406 uint8_t flags = (marks_ !=
nullptr ? GADGET_FLAG_MASK : 0);
409 flags |= EMPTY_FLAG_MASK;
413 const uint8_t ser_ver(SER_VER);
414 const uint8_t family(FAMILY_ID);
415 write(os, first_byte);
429 write(os, total_wt_r_);
433 write(os, weights_, h_ *
sizeof(
double));
436 if (marks_ !=
nullptr) {
438 for (uint32_t i = 0; i < h_; ++i) {
440 val |= 0x1 << (i & 0x7);
443 if ((i & 0x7) == 0x7) {
450 if ((h_ & 0x7) > 0) {
456 sd.serialize(os, data_, h_);
457 sd.serialize(os, &data_[h_ + 1], r_);
461 template<
typename T,
typename A>
462 template<
typename SerDe>
464 ensure_minimum_memory(size, 8);
465 const char* ptr =
static_cast<const char*
>(bytes);
466 const char* base = ptr;
467 const char* end_ptr = ptr + size;
469 ptr += copy_from_mem(ptr, first_byte);
470 uint8_t preamble_longs = first_byte & 0x3f;
471 resize_factor rf =
static_cast<resize_factor
>((first_byte >> 6) & 0x03);
472 uint8_t serial_version;
473 ptr += copy_from_mem(ptr, serial_version);
475 ptr += copy_from_mem(ptr, family_id);
477 ptr += copy_from_mem(ptr, flags);
479 ptr += copy_from_mem(ptr, k);
481 check_preamble_longs(preamble_longs, flags);
482 check_family_and_serialization_version(family_id, serial_version);
483 ensure_minimum_memory(size, preamble_longs << 3);
485 const bool is_empty = flags & EMPTY_FLAG_MASK;
486 const bool is_gadget = flags & GADGET_FLAG_MASK;
495 ptr += copy_from_mem(ptr, n);
496 ptr += copy_from_mem(ptr, h);
497 ptr += copy_from_mem(ptr, r);
499 const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
502 double total_wt_r = 0.0;
503 if (preamble_longs == PREAMBLE_LONGS_FULL) {
504 ptr += copy_from_mem(ptr, total_wt_r);
505 if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
506 throw std::invalid_argument(
"Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
507 "Found r = " + std::to_string(r) +
", R region weight = " + std::to_string(total_wt_r));
514 check_memory_size(ptr - base + (h *
sizeof(
double)), size);
515 std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
516 weights_deleter(array_size, allocator));
517 double* wts = weights.get();
518 ptr += copy_from_mem(ptr, wts, h *
sizeof(
double));
519 for (
size_t i = 0; i < h; ++i) {
520 if (!(wts[i] > 0.0)) {
521 throw std::invalid_argument(
"Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
524 std::fill(wts + h, wts + array_size, -1.0);
527 uint32_t num_marks_in_h = 0;
528 std::unique_ptr<bool, marks_deleter> marks(
nullptr, marks_deleter(array_size, allocator));
531 marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
532 const size_t size_marks = (h / 8) + (h % 8 > 0 ? 1 : 0);
533 check_memory_size(ptr - base + size_marks, size);
534 for (uint32_t i = 0; i < h; ++i) {
535 if ((i & 0x7) == 0x0) {
536 ptr += copy_from_mem(ptr, val);
538 marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
539 num_marks_in_h += (marks.get()[i] ? 1 : 0);
544 items_deleter deleter(array_size, allocator);
545 std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
547 ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), h);
548 items.get_deleter().set_h(h);
550 ptr += sd.deserialize(ptr, end_ptr - ptr, &(items.get()[h + 1]), r);
551 items.get_deleter().set_r(r);
553 return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size,
false,
554 std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
557 template<
typename T,
typename A>
558 template<
typename SerDe>
560 const auto first_byte = read<uint8_t>(is);
561 uint8_t preamble_longs = first_byte & 0x3f;
562 const resize_factor rf =
static_cast<resize_factor
>((first_byte >> 6) & 0x03);
563 const auto serial_version = read<uint8_t>(is);
564 const auto family_id = read<uint8_t>(is);
565 const auto flags = read<uint8_t>(is);
566 const auto k = read<uint32_t>(is);
568 check_preamble_longs(preamble_longs, flags);
569 check_family_and_serialization_version(family_id, serial_version);
571 const bool is_empty = flags & EMPTY_FLAG_MASK;
572 const bool is_gadget = flags & GADGET_FLAG_MASK;
576 throw std::runtime_error(
"error reading from std::istream");
578 return var_opt_sketch(k, rf, is_gadget, allocator);
582 const auto n = read<uint64_t>(is);
583 const auto h = read<uint32_t>(is);
584 const auto r = read<uint32_t>(is);
586 const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
589 double total_wt_r = 0.0;
590 if (preamble_longs == PREAMBLE_LONGS_FULL) {
591 total_wt_r = read<double>(is);
592 if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
593 throw std::invalid_argument(
"Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
594 "Found r = " + std::to_string(r) +
", R region weight = " + std::to_string(total_wt_r));
599 std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
600 weights_deleter(array_size, allocator));
601 double* wts = weights.get();
602 read(is, wts, h *
sizeof(
double));
603 for (
size_t i = 0; i < h; ++i) {
604 if (!(wts[i] > 0.0)) {
605 throw std::invalid_argument(
"Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
608 std::fill(wts + h, wts + array_size, -1.0);
611 uint32_t num_marks_in_h = 0;
612 std::unique_ptr<bool, marks_deleter> marks(
nullptr, marks_deleter(array_size, allocator));
614 marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
616 for (uint32_t i = 0; i < h; ++i) {
617 if ((i & 0x7) == 0x0) {
618 val = read<uint8_t>(is);
620 marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
621 num_marks_in_h += (marks.get()[i] ? 1 : 0);
626 items_deleter deleter(array_size, allocator);
627 std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
629 sd.deserialize(is, items.get(), h);
630 items.get_deleter().set_h(h);
632 sd.deserialize(is, &(items.get()[h + 1]), r);
633 items.get_deleter().set_r(r);
636 throw std::runtime_error(
"error reading from std::istream");
638 return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size,
false,
639 std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
642 template<
typename T,
typename A>
644 return (h_ == 0 && r_ == 0);
647 template<
typename T,
typename A>
649 const uint32_t prev_alloc = curr_items_alloc_;
650 const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
651 const uint32_t initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
652 curr_items_alloc_ = get_adjusted_size(k_, 1 << initial_lg_size);
653 if (curr_items_alloc_ == k_) {
659 const size_t num_to_destroy = std::min(k_ + 1, prev_alloc);
660 for (
size_t i = 0; i < num_to_destroy; ++i)
664 for (
size_t i = 0; i < h_; ++i)
667 for (
size_t i = h_ + 1; i < h_ + r_ + 1; ++i)
671 if (curr_items_alloc_ < prev_alloc) {
672 const bool is_gadget = (marks_ !=
nullptr);
674 allocator_.deallocate(data_, prev_alloc);
675 AllocDouble(allocator_).deallocate(weights_, prev_alloc);
677 if (marks_ !=
nullptr)
678 AllocBool(allocator_).deallocate(marks_, prev_alloc);
680 allocate_data_arrays(curr_items_alloc_, is_gadget);
689 filled_data_ =
false;
692 template<
typename T,
typename A>
697 template<
typename T,
typename A>
702 template<
typename T,
typename A>
704 const uint32_t num_in_sketch = h_ + r_;
705 return (num_in_sketch < k_ ? num_in_sketch : k_);
708 template<
typename T,
typename A>
710 update(item, weight,
false);
713 template<
typename T,
typename A>
715 update(std::move(item), weight,
false);
718 template<
typename T,
typename A>
722 std::ostringstream os;
723 os <<
"### VarOpt SUMMARY:" << std::endl;
724 os <<
" k : " << k_ << std::endl;
725 os <<
" h : " << h_ << std::endl;
726 os <<
" r : " << r_ << std::endl;
727 os <<
" weight_r : " << total_wt_r_ << std::endl;
728 os <<
" Current size : " << curr_items_alloc_ << std::endl;
729 os <<
" Resize factor: " << (1 << rf_) << std::endl;
730 os <<
"### END SKETCH SUMMARY" << std::endl;
731 return string<A>(os.str().c_str(), allocator_);
734 template<
typename T,
typename A>
738 std::ostringstream os;
739 os <<
"### Sketch Items" << std::endl;
741 for (
auto record : *
this) {
742 os << idx <<
": " << record.first <<
"\twt = " << record.second << std::endl;
745 return string<A>(os.str().c_str(), allocator_);
748 template<
typename T,
typename A>
752 std::ostringstream os;
753 os <<
"### Sketch Items" << std::endl;
754 const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
755 for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
756 if (i == h_ && print_gap) {
757 os << display_idx <<
": GAP" << std::endl;
760 os << display_idx <<
": " << data_[i] <<
"\twt = ";
761 if (weights_[i] == -1.0) {
762 os << get_tau() <<
"\t(-1.0)" << std::endl;
764 os << weights_[i] << std::endl;
769 return string<A>(os.str().c_str(), allocator_);
772 template<
typename T,
typename A>
775 if (weight < 0.0 || std::isnan(weight) || std::isinf(weight)) {
776 throw std::invalid_argument(
"Item weights must be nonnegative and finite. Found: "
777 + std::to_string(weight));
778 }
else if (weight == 0.0) {
785 update_warmup_phase(std::forward<O>(item), weight, mark);
789 if ((h_ != 0) && (peek_min() < get_tau()))
790 throw std::logic_error(
"sketch not in valid estimation mode");
794 const double hypothetical_tau = (weight + total_wt_r_) / ((r_ + 1) - 1);
797 const double condition1 = (h_ == 0) || (weight <= peek_min());
800 const double condition2 = weight < hypothetical_tau;
802 if (condition1 && condition2) {
803 update_light(std::forward<O>(item), weight, mark);
804 }
else if (r_ == 1) {
805 update_heavy_r_eq1(std::forward<O>(item), weight, mark);
807 update_heavy_general(std::forward<O>(item), weight, mark);
812 template<
typename T,
typename A>
814 void var_opt_sketch<T, A>::update_warmup_phase(O&& item,
double weight,
bool mark) {
816 if (r_ > 0 || m_ != 0 || h_ > k_)
throw std::logic_error(
"invalid sketch state during warmup");
818 if (h_ >= curr_items_alloc_) {
823 new (&data_[h_]) T(std::forward<O>(item));
824 weights_[h_] = weight;
825 if (marks_ !=
nullptr) {
829 num_marks_in_h_ += mark ? 1 : 0;
834 transition_from_warmup();
842 template<
typename T,
typename A>
844 void var_opt_sketch<T, A>::update_light(O&& item,
double weight,
bool mark) {
845 if (r_ == 0 || (r_ + h_) != k_)
throw std::logic_error(
"invalid sketch state during light warmup");
847 const uint32_t m_slot = h_;
849 if (&data_[m_slot] != &item)
850 data_[m_slot] = std::forward<O>(item);
852 new (&data_[m_slot]) T(std::forward<O>(item));
855 weights_[m_slot] = weight;
856 if (marks_ !=
nullptr) { marks_[m_slot] = mark; }
859 grow_candidate_set(total_wt_r_ + weight, r_ + 1);
870 template<
typename T,
typename A>
872 void var_opt_sketch<T, A>::update_heavy_general(O&& item,
double weight,
bool mark) {
873 if (r_ < 2 || m_ != 0 || (r_ + h_) != k_)
throw std::logic_error(
"invalid sketch state during heavy general update");
876 push(std::forward<O>(item), weight, mark);
878 grow_candidate_set(total_wt_r_, r_);
884 template<
typename T,
typename A>
886 void var_opt_sketch<T, A>::update_heavy_r_eq1(O&& item,
double weight,
bool mark) {
887 if (r_ != 1 || m_ != 0 || (r_ + h_) != k_)
throw std::logic_error(
"invalid sketch state during heavy r=1 update");
889 push(std::forward<O>(item), weight, mark);
890 pop_min_to_m_region();
894 const uint32_t m_slot = k_ - 1;
895 grow_candidate_set(weights_[m_slot] + total_wt_r_, 2);
904 template<
typename T,
typename A>
905 void var_opt_sketch<T, A>::decrease_k_by_1() {
907 throw std::logic_error(
"Cannot decrease k below 1 in union");
910 if ((h_ == 0) && (r_ == 0)) {
913 }
else if ((h_ > 0) && (r_ == 0)) {
917 transition_from_warmup();
919 }
else if ((h_ > 0) && (r_ > 0)) {
925 const uint32_t old_gap_idx = h_;
926 const uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
927 if (old_final_r_idx != k_)
throw std::logic_error(
"gadget in invalid state");
929 swap_values(old_final_r_idx, old_gap_idx);
936 const uint32_t pulled_idx = h_ - 1;
937 double pulled_weight = weights_[pulled_idx];
938 bool pulled_mark = marks_[pulled_idx];
941 if (pulled_mark) { --num_marks_in_h_; }
942 weights_[pulled_idx] = -1.0;
948 update(std::move(data_[pulled_idx]), pulled_weight, pulled_mark);
949 }
else if ((h_ == 0) && (r_ > 0)) {
951 if (r_ < 2)
throw std::logic_error(
"r_ too small for pure reservoir mode");
953 const uint32_t r_idx_to_delete = 1 + next_int(r_);
954 const uint32_t rightmost_r_idx = (1 + r_) - 1;
955 swap_values(r_idx_to_delete, rightmost_r_idx);
956 weights_[rightmost_r_idx] = -1.0;
963 template<
typename T,
typename A>
964 void var_opt_sketch<T, A>::allocate_data_arrays(uint32_t tgt_size,
bool use_marks) {
965 filled_data_ =
false;
967 data_ = allocator_.allocate(tgt_size);
968 weights_ = AllocDouble(allocator_).allocate(tgt_size);
971 marks_ = AllocBool(allocator_).allocate(tgt_size);
977 template<
typename T,
typename A>
978 void var_opt_sketch<T, A>::grow_data_arrays() {
979 const uint32_t prev_size = curr_items_alloc_;
980 curr_items_alloc_ = get_adjusted_size(k_, curr_items_alloc_ << rf_);
981 if (curr_items_alloc_ == k_) {
985 if (prev_size < curr_items_alloc_) {
986 filled_data_ =
false;
988 T* tmp_data = allocator_.allocate(curr_items_alloc_);
989 double* tmp_weights = AllocDouble(allocator_).allocate(curr_items_alloc_);
991 for (uint32_t i = 0; i < prev_size; ++i) {
992 new (&tmp_data[i]) T(std::move(data_[i]));
994 tmp_weights[i] = weights_[i];
997 allocator_.deallocate(data_, prev_size);
998 AllocDouble(allocator_).deallocate(weights_, prev_size);
1001 weights_ = tmp_weights;
1003 if (marks_ !=
nullptr) {
1004 bool* tmp_marks = AllocBool(allocator_).allocate(curr_items_alloc_);
1005 for (uint32_t i = 0; i < prev_size; ++i) {
1006 tmp_marks[i] = marks_[i];
1008 AllocBool(allocator_).deallocate(marks_, prev_size);
1014 template<
typename T,
typename A>
1015 void var_opt_sketch<T, A>::transition_from_warmup() {
1019 pop_min_to_m_region();
1020 pop_min_to_m_region();
1024 if (h_ != (k_ -1) || m_ != 1 || r_ != 1)
1025 throw std::logic_error(
"invalid state for transitioning from warmup");
1029 total_wt_r_ = weights_[k_];
1030 weights_[k_] = -1.0;
1034 grow_candidate_set(weights_[k_ - 1] + total_wt_r_, 2);
1037 template<
typename T,
typename A>
1038 void var_opt_sketch<T, A>::convert_to_heap() {
1043 const uint32_t last_slot = h_ - 1;
1044 const int last_non_leaf = ((last_slot + 1) / 2) - 1;
1046 for (
int j = last_non_leaf; j >= 0; --j) {
1047 restore_towards_leaves(j);
1057 template<
typename T,
typename A>
1058 void var_opt_sketch<T, A>::restore_towards_leaves(uint32_t slot_in) {
1059 const uint32_t last_slot = h_ - 1;
1060 if (h_ == 0 || slot_in > last_slot)
throw std::logic_error(
"invalid heap state");
1062 uint32_t slot = slot_in;
1063 uint32_t child = (2 * slot_in) + 1;
1065 while (child <= last_slot) {
1066 uint32_t child2 = child + 1;
1067 if ((child2 <= last_slot) && (weights_[child2] < weights_[child])) {
1072 if (weights_[slot] <= weights_[child]) {
1078 swap_values(slot, child);
1081 child = (2 * slot) + 1;
1085 template<
typename T,
typename A>
1086 void var_opt_sketch<T, A>::restore_towards_root(uint32_t slot_in) {
1087 uint32_t slot = slot_in;
1088 uint32_t p = (((slot + 1) / 2) - 1);
1089 while ((slot > 0) && (weights_[slot] < weights_[p])) {
1090 swap_values(slot, p);
1092 p = (((slot + 1) / 2) - 1);
1096 template<
typename T,
typename A>
1097 template<
typename O>
1098 void var_opt_sketch<T, A>::push(O&& item,
double wt,
bool mark) {
1100 if (&data_[h_] != &item)
1101 data_[h_] = std::forward<O>(item);
1103 new (&data_[h_]) T(std::forward<O>(item));
1104 filled_data_ =
true;
1107 if (marks_ !=
nullptr) {
1109 num_marks_in_h_ += (mark ? 1 : 0);
1113 restore_towards_root(h_ - 1);
1116 template<
typename T,
typename A>
1117 void var_opt_sketch<T, A>::pop_min_to_m_region() {
1118 if (h_ == 0 || (h_ + m_ + r_ != k_ + 1))
1119 throw std::logic_error(
"invalid heap state popping min to M region");
1127 uint32_t tgt = h_ - 1;
1128 swap_values(0, tgt);
1132 restore_towards_leaves(0);
1135 if (is_marked(h_)) {
1141 template<
typename T,
typename A>
1142 void var_opt_sketch<T, A>::swap_values(uint32_t src, uint32_t dst) {
1143 std::swap(data_[src], data_[dst]);
1144 std::swap(weights_[src], weights_[dst]);
1146 if (marks_ !=
nullptr) {
1147 std::swap(marks_[src], marks_[dst]);
1159 template<
typename T,
typename A>
1160 void var_opt_sketch<T, A>::grow_candidate_set(
double wt_cands, uint32_t num_cands) {
1161 if ((h_ + m_ + r_ != k_ + 1) || (num_cands < 1) || (num_cands != m_ + r_) || (m_ >= 2))
1162 throw std::logic_error(
"invariant violated when growing candidate set");
1165 const double next_wt = peek_min();
1166 const double next_tot_wt = wt_cands + next_wt;
1171 if ((next_wt * num_cands) < next_tot_wt) {
1172 wt_cands = next_tot_wt;
1174 pop_min_to_m_region();
1180 downsample_candidate_set(wt_cands, num_cands);
1183 template<
typename T,
typename A>
1184 void var_opt_sketch<T, A>::downsample_candidate_set(
double wt_cands, uint32_t num_cands) {
1185 if (num_cands < 2 || h_ + num_cands != k_ + 1)
1186 throw std::logic_error(
"invalid num_cands when downsampling");
1189 const uint32_t delete_slot = choose_delete_slot(wt_cands, num_cands);
1190 const uint32_t leftmost_cand_slot = h_;
1191 if (delete_slot < leftmost_cand_slot || delete_slot > k_)
1192 throw std::logic_error(
"invalid delete slot index when downsampling");
1197 const uint32_t stop_idx = leftmost_cand_slot + m_;
1198 for (uint32_t j = leftmost_cand_slot; j < stop_idx; ++j) {
1203 data_[delete_slot] = std::move(data_[leftmost_cand_slot]);
1207 total_wt_r_ = wt_cands;
1210 template<
typename T,
typename A>
1211 uint32_t var_opt_sketch<T, A>::choose_delete_slot(
double wt_cands, uint32_t num_cands)
const {
1212 if (r_ == 0)
throw std::logic_error(
"choosing delete slot while in exact mode");
1216 return pick_random_slot_in_r();
1217 }
else if (m_ == 1) {
1220 double wt_m_cand = weights_[h_];
1221 if ((wt_cands * next_double_exclude_zero()) < ((num_cands - 1) * wt_m_cand)) {
1222 return pick_random_slot_in_r();
1228 const uint32_t delete_slot = choose_weighted_delete_slot(wt_cands, num_cands);
1229 const uint32_t first_r_slot = h_ + m_;
1230 if (delete_slot == first_r_slot) {
1231 return pick_random_slot_in_r();
1238 template<
typename T,
typename A>
1239 uint32_t var_opt_sketch<T, A>::choose_weighted_delete_slot(
double wt_cands, uint32_t num_cands)
const {
1240 if (m_ < 1)
throw std::logic_error(
"must have weighted delete slot");
1242 const uint32_t offset = h_;
1243 const uint32_t final_m = (offset + m_) - 1;
1244 const uint32_t num_to_keep = num_cands - 1;
1246 double left_subtotal = 0.0;
1247 double right_subtotal = -1.0 * wt_cands * next_double_exclude_zero();
1249 for (uint32_t i = offset; i <= final_m; ++i) {
1250 left_subtotal += num_to_keep * weights_[i];
1251 right_subtotal += wt_cands;
1253 if (left_subtotal < right_subtotal) {
1262 template<
typename T,
typename A>
1263 uint32_t var_opt_sketch<T, A>::pick_random_slot_in_r()
const {
1264 if (r_ == 0)
throw std::logic_error(
"r_ = 0 when picking slot in R region");
1265 const uint32_t offset = h_ + m_;
1269 return offset + next_int(r_);
1273 template<
typename T,
typename A>
1274 double var_opt_sketch<T, A>::peek_min()
const {
1275 if (h_ == 0)
throw std::logic_error(
"h_ = 0 when checking min in H region");
1279 template<
typename T,
typename A>
1280 inline bool var_opt_sketch<T, A>::is_marked(uint32_t idx)
const {
1281 return marks_ ==
nullptr ? false : marks_[idx];
1284 template<
typename T,
typename A>
1285 double var_opt_sketch<T, A>::get_tau()
const {
1286 return r_ == 0 ? std::nan(
"1") : (total_wt_r_ / r_);
1289 template<
typename T,
typename A>
1290 void var_opt_sketch<T, A>::strip_marks() {
1291 if (marks_ ==
nullptr)
throw std::logic_error(
"request to strip marks from non-gadget");
1292 num_marks_in_h_ = 0;
1293 AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
1297 template<
typename T,
typename A>
1298 void var_opt_sketch<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
1299 const bool is_empty(flags & EMPTY_FLAG_MASK);
1302 if (preamble_longs != PREAMBLE_LONGS_EMPTY) {
1303 throw std::invalid_argument(
"Possible corruption: Preamble longs must be "
1304 + std::to_string(PREAMBLE_LONGS_EMPTY) +
" for an empty sketch. Found: "
1305 + std::to_string(preamble_longs));
1308 if (preamble_longs != PREAMBLE_LONGS_WARMUP
1309 && preamble_longs != PREAMBLE_LONGS_FULL) {
1310 throw std::invalid_argument(
"Possible corruption: Preamble longs must be "
1311 + std::to_string(PREAMBLE_LONGS_WARMUP) +
" or "
1312 + std::to_string(PREAMBLE_LONGS_FULL)
1313 +
" for a non-empty sketch. Found: " + std::to_string(preamble_longs));
1318 template<
typename T,
typename A>
1319 void var_opt_sketch<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
1320 if (family_id == FAMILY_ID) {
1321 if (ser_ver != SER_VER) {
1322 throw std::invalid_argument(
"Possible corruption: VarOpt serialization version must be "
1323 + std::to_string(SER_VER) +
". Found: " + std::to_string(ser_ver));
1329 throw std::invalid_argument(
"Possible corruption: VarOpt family id must be "
1330 + std::to_string(FAMILY_ID) +
". Found: " + std::to_string(family_id));
1333 template<
typename T,
typename A>
1334 uint32_t var_opt_sketch<T, A>::validate_and_get_target_size(uint32_t preamble_longs, uint32_t k, uint64_t n,
1335 uint32_t h, uint32_t r, resize_factor rf) {
1336 if (k == 0 || k > MAX_K) {
1337 throw std::invalid_argument(
"k must be at least 1 and less than 2^31 - 1");
1340 uint32_t array_size;
1343 if (preamble_longs != PREAMBLE_LONGS_WARMUP) {
1344 throw std::invalid_argument(
"Possible corruption: deserializing with n <= k but not in warmup mode. "
1345 "Found n = " + std::to_string(n) +
", k = " + std::to_string(k));
1348 throw std::invalid_argument(
"Possible corruption: deserializing in warmup mode but n != h. "
1349 "Found n = " + std::to_string(n) +
", h = " + std::to_string(h));
1352 throw std::invalid_argument(
"Possible corruption: deserializing in warmup mode but r > 0. "
1353 "Found r = " + std::to_string(r));
1356 const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k));
1357 const uint32_t min_lg_size = to_log_2(ceiling_power_of_2(h));
1358 const uint32_t initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf, min_lg_size);
1359 array_size = get_adjusted_size(k, 1 << initial_lg_size);
1360 if (array_size == k) {
1364 if (preamble_longs != PREAMBLE_LONGS_FULL) {
1365 throw std::invalid_argument(
"Possible corruption: deserializing with n > k but not in full mode. "
1366 "Found n = " + std::to_string(n) +
", k = " + std::to_string(k));
1369 throw std::invalid_argument(
"Possible corruption: deserializing in full mode but h + r != n. "
1370 "Found h = " + std::to_string(h) +
", r = " + std::to_string(r) +
", n = " + std::to_string(n));
1379 template<
typename T,
typename A>
1380 template<
typename P>
1383 return {0.0, 0.0, 0.0, 0.0};
1386 double total_wt_h = 0.0;
1387 double h_true_wt = 0.0;
1389 for (; idx < h_; ++idx) {
1390 double wt = weights_[idx];
1392 if (predicate(data_[idx])) {
1399 return {h_true_wt, h_true_wt, h_true_wt, h_true_wt};
1403 const uint64_t num_samples = n_ - h_;
1404 double effective_sampling_rate = r_ /
static_cast<double>(num_samples);
1405 if (effective_sampling_rate < 0.0 || effective_sampling_rate > 1.0)
1406 throw std::logic_error(
"invalid sampling rate outside [0.0, 1.0]");
1408 uint32_t r_true_count = 0;
1410 for (; idx < (k_ + 1); ++idx) {
1411 if (predicate(data_[idx])) {
1416 double lb_true_fraction = pseudo_hypergeometric_lb_on_p(r_, r_true_count, effective_sampling_rate);
1417 double estimated_true_fraction = (1.0 * r_true_count) / r_;
1418 double ub_true_fraction = pseudo_hypergeometric_ub_on_p(r_, r_true_count, effective_sampling_rate);
1420 return { h_true_wt + (total_wt_r_ * lb_true_fraction),
1421 h_true_wt + (total_wt_r_ * estimated_true_fraction),
1422 h_true_wt + (total_wt_r_ * ub_true_fraction),
1423 total_wt_h + total_wt_r_
1427 template<
typename T,
typename A>
1430 items_deleter(uint32_t num,
const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
1431 void set_h(uint32_t h) { h_count = h; }
1432 void set_r(uint32_t r) { r_count = r; }
1433 void operator() (T* ptr) {
1435 for (
size_t i = 0; i < h_count; ++i) {
1440 uint32_t end = h_count + r_count + 1;
1441 for (
size_t i = h_count + 1; i < end; ++i) {
1445 if (ptr !=
nullptr) {
1446 allocator.deallocate(ptr, num);
1456 template<
typename T,
typename A>
1457 class var_opt_sketch<T, A>::weights_deleter {
1459 weights_deleter(uint32_t num,
const A& allocator) : num(num), allocator(allocator) {}
1460 void operator() (
double* ptr) {
1461 if (ptr !=
nullptr) {
1462 allocator.deallocate(ptr, num);
1467 AllocDouble allocator;
1470 template<
typename T,
typename A>
1471 class var_opt_sketch<T, A>::marks_deleter {
1473 marks_deleter(uint32_t num,
const A& allocator) : num(num), allocator(allocator) {}
1474 void operator() (
bool* ptr) {
1475 if (ptr !=
nullptr) {
1476 allocator.deallocate(ptr, 1);
1481 AllocBool allocator;
1485 template<
typename T,
typename A>
1487 return const_iterator(*
this,
false);
1490 template<
typename T,
typename A>
1492 return const_iterator(*
this,
true);
1497 template<
typename T,
typename A>
1501 r_item_wt_(sk.get_tau()),
1502 final_idx_(sk.r_ > 0 ? sk.h_ + sk.r_ + 1 : sk.h_)
1509 idx_ = (sk.h_ == 0 && sk.r_ > 0 ? 1 : 0);
1513 if (idx_ == final_idx_) { sk_ =
nullptr; }
1516 template<
typename T,
typename A>
1517 var_opt_sketch<T, A>::const_iterator::const_iterator(
const var_opt_sketch& sk,
bool is_end,
bool use_r_region) :
1520 r_item_wt_(sk.get_tau()),
1521 final_idx_(sk.h_ + (use_r_region ? 1 + sk.r_ : 0))
1524 idx_ = sk.h_ + 1 + (is_end ? sk.r_ : 0);
1527 idx_ = (is_end ? sk.h_ : 0);
1531 if (idx_ == final_idx_) { sk_ =
nullptr; }
1534 template<
typename T,
typename A>
1535 var_opt_sketch<T, A>::const_iterator::const_iterator(
const const_iterator& other) :
1537 cum_r_weight_(other.cum_r_weight_),
1538 r_item_wt_(other.r_item_wt_),
1540 final_idx_(other.final_idx_)
1543 template<
typename T,
typename A>
1544 typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++() {
1546 if (idx_ > sk_->h_) { cum_r_weight_ += r_item_wt_; }
1550 if (idx_ == final_idx_) {
1553 }
else if (idx_ == sk_->h_ && sk_->r_ > 0) {
1559 template<
typename T,
typename A>
1560 typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++(
int) {
1561 const_iterator tmp(*
this);
1566 template<
typename T,
typename A>
1567 bool var_opt_sketch<T, A>::const_iterator::operator==(
const const_iterator& other)
const {
1568 if (sk_ != other.sk_)
return false;
1569 if (sk_ ==
nullptr)
return true;
1570 return idx_ == other.idx_;
1573 template<
typename T,
typename A>
1574 bool var_opt_sketch<T, A>::const_iterator::operator!=(
const const_iterator& other)
const {
1575 return !operator==(other);
1578 template<
typename T,
typename A>
1579 auto var_opt_sketch<T, A>::const_iterator::operator*() const -> reference {
1581 if (idx_ < sk_->h_) {
1582 wt = sk_->weights_[idx_];
1586 return value_type(sk_->data_[idx_], wt);
1589 template<
typename T,
typename A>
1590 auto var_opt_sketch<T, A>::const_iterator::operator->() const -> pointer {
1594 template<
typename T,
typename A>
1595 bool var_opt_sketch<T, A>::const_iterator::get_mark()
const {
1596 return sk_->marks_ ==
nullptr ? false : sk_->marks_[idx_];
1602 template<
typename T,
typename A>
1603 var_opt_sketch<T, A>::iterator::iterator(
const var_opt_sketch& sk,
bool is_end,
bool use_r_region) :
1606 r_item_wt_(sk.get_tau()),
1607 final_idx_(sk.h_ + (use_r_region ? 1 + sk.r_ : 0))
1610 idx_ = sk.h_ + 1 + (is_end ? sk.r_ : 0);
1613 idx_ = (is_end ? sk.h_ : 0);
1617 if (idx_ == final_idx_) { sk_ =
nullptr; }
1620 template<
typename T,
typename A>
1621 var_opt_sketch<T, A>::iterator::iterator(
const iterator& other) :
1623 cum_r_weight_(other.cum_r_weight_),
1624 r_item_wt_(other.r_item_wt_),
1626 final_idx_(other.final_idx_)
1629 template<
typename T,
typename A>
1630 typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++() {
1632 if (idx_ > sk_->h_) { cum_r_weight_ += r_item_wt_; }
1636 if (idx_ == final_idx_) {
1639 }
else if (idx_ == sk_->h_ && sk_->r_ > 0) {
1646 template<
typename T,
typename A>
1647 typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++(
int) {
1648 const_iterator tmp(*
this);
1653 template<
typename T,
typename A>
1654 bool var_opt_sketch<T, A>::iterator::operator==(
const iterator& other)
const {
1655 if (sk_ != other.sk_)
return false;
1656 if (sk_ ==
nullptr)
return true;
1657 return idx_ == other.idx_;
1660 template<
typename T,
typename A>
1661 bool var_opt_sketch<T, A>::iterator::operator!=(
const iterator& other)
const {
1662 return !operator==(other);
1665 template<
typename T,
typename A>
1666 auto var_opt_sketch<T, A>::iterator::operator*() -> reference {
1668 if (idx_ < sk_->h_) {
1669 wt = sk_->weights_[idx_];
1670 }
else if (idx_ == final_idx_ - 1) {
1671 wt = sk_->total_wt_r_ - cum_r_weight_;
1675 return value_type(sk_->data_[idx_], wt);
1678 template<
typename T,
typename A>
1679 auto var_opt_sketch<T, A>::iterator::operator->() -> pointer {
1683 template<
typename T,
typename A>
1684 bool var_opt_sketch<T, A>::iterator::get_mark()
const {
1685 return sk_->marks_ ==
nullptr ? false : sk_->marks_[idx_];
1692 template<
typename T,
typename A>
1693 uint32_t var_opt_sketch<T, A>::get_adjusted_size(uint32_t max_size, uint32_t resize_target) {
1694 if (max_size < (resize_target << 1)) {
1697 return resize_target;
1700 template<
typename T,
typename A>
1701 uint32_t var_opt_sketch<T, A>::starting_sub_multiple(uint32_t lg_target, uint32_t lg_rf, uint32_t lg_min) {
1702 return (lg_target <= lg_min)
1703 ? lg_min : (lg_rf == 0) ? lg_target
1704 : (lg_target - lg_min) % lg_rf + lg_min;
1707 template<
typename T,
typename A>
1708 double var_opt_sketch<T, A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k,
double sampling_rate) {
1709 const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
1710 return bounds_binomial_proportions::approximate_upper_bound_on_p(n, k, adjusted_kappa);
1713 template<
typename T,
typename A>
1714 double var_opt_sketch<T, A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k,
double sampling_rate) {
1715 const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
1716 return bounds_binomial_proportions::approximate_lower_bound_on_p(n, k, adjusted_kappa);
1719 template<
typename T,
typename A>
1720 bool var_opt_sketch<T, A>::is_power_of_2(uint32_t v) {
1721 return v && !(v & (v - 1));
1724 template<
typename T,
typename A>
1725 uint32_t var_opt_sketch<T, A>::to_log_2(uint32_t v) {
1726 if (is_power_of_2(v)) {
1727 return count_trailing_zeros_in_u32(v);
1729 throw std::invalid_argument(
"Attempt to compute integer log2 of non-positive or non-power of 2");
1734 template<
typename T,
typename A>
1735 uint32_t var_opt_sketch<T, A>::next_int(uint32_t max_value) {
1736 std::uniform_int_distribution<uint32_t> dist(0, max_value - 1);
1737 return dist(random_utils::rand);
1740 template<
typename T,
typename A>
1741 double var_opt_sketch<T, A>::next_double_exclude_zero() {
1742 double r = random_utils::next_double(random_utils::rand);
1744 r = random_utils::next_double(random_utils::rand);
This sketch samples data from a stream of items.
Definition: var_opt_sketch.hpp:67
void update(const T &item, double weight=1.0)
Updates this sketch with the given data item with the given weight.
Definition: var_opt_sketch_impl.hpp:709
vector_bytes serialize(unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const
This method serializes the sketch as a vector of bytes.
var_opt_sketch(uint32_t k, resize_factor rf=var_opt_constants::DEFAULT_RESIZE_FACTOR, const A &allocator=A())
Constructor.
Definition: var_opt_sketch_impl.hpp:46
static var_opt_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const A &allocator=A())
This method deserializes a sketch from a given stream.
DataSketches namespace.
Definition: binomial_bounds.hpp:38