20 #ifndef THETA_SKETCH_IMPL_HPP_
21 #define THETA_SKETCH_IMPL_HPP_
28 #include "binomial_bounds.hpp"
29 #include "theta_helpers.hpp"
30 #include "count_zeros.hpp"
31 #include "bit_packing.hpp"
42 return static_cast<double>(get_theta64()) /
48 return get_num_retained() / get_theta();
53 if (!is_estimation_mode())
return get_num_retained();
54 return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
59 if (!is_estimation_mode())
return get_num_retained();
60 return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
67 std::ostringstream os;
68 os <<
"### Theta sketch summary:" << std::endl;
69 os <<
" num retained entries : " << this->get_num_retained() << std::endl;
70 os <<
" seed hash : " << this->get_seed_hash() << std::endl;
71 os <<
" empty? : " << (this->is_empty() ?
"true" :
"false") << std::endl;
72 os <<
" ordered? : " << (this->is_ordered() ?
"true" :
"false") << std::endl;
73 os <<
" estimation mode? : " << (this->is_estimation_mode() ?
"true" :
"false") << std::endl;
74 os <<
" theta (fraction) : " << this->get_theta() << std::endl;
75 os <<
" theta (raw 64-bit) : " << this->get_theta64() << std::endl;
76 os <<
" estimate : " << this->get_estimate() << std::endl;
77 os <<
" lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
78 os <<
" upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
80 os <<
"### End sketch summary" << std::endl;
84 return string<A>(os.str().c_str(), this->get_allocator());
89 os <<
"### Retained entries" << std::endl;
90 for (
const auto& hash: *
this) {
91 os << hash << std::endl;
93 os <<
"### End retained entries" << std::endl;
101 float p, uint64_t theta, uint64_t seed,
const A& allocator):
102 table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
107 return table_.allocator_;
112 return table_.is_empty_;
117 return table_.num_entries_ > 1 ? false :
true;
122 return is_empty() ? theta_constants::MAX_THETA : table_.theta_;
127 return table_.num_entries_;
132 return compute_seed_hash(table_.seed_);
137 return table_.lg_nom_size_;
147 update(&value,
sizeof(value));
152 update(&value,
sizeof(value));
157 update(
static_cast<int32_t
>(value));
162 update(
static_cast<int64_t
>(value));
167 update(
static_cast<int16_t
>(value));
172 update(
static_cast<int64_t
>(value));
177 update(
static_cast<int8_t
>(value));
182 update(
static_cast<int64_t
>(value));
187 update(canonical_double(value));
192 update(
static_cast<double>(value));
197 if (value.empty())
return;
198 update(value.c_str(), value.length());
203 const uint64_t hash = table_.hash_and_screen(data, length);
204 if (hash == 0)
return;
205 auto result = table_.find(hash);
206 if (!result.second) {
207 table_.insert(result.first, hash);
223 return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
228 return iterator(
nullptr, 0, 1 << table_.lg_cur_size_);
233 return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
238 return const_iterator(
nullptr, 0, 1 << table_.lg_cur_size_);
248 os <<
" lg nominal size : " <<
static_cast<int>(table_.lg_nom_size_) << std::endl;
249 os <<
" lg current size : " <<
static_cast<int>(table_.lg_cur_size_) << std::endl;
250 os <<
" resize factor : " << (1 << table_.rf_) << std::endl;
260 return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
266 template<
typename Other>
274 if (!other.is_empty()) {
275 entries_.reserve(other.get_num_retained());
276 std::copy(other.begin(), other.end(), std::back_inserter(entries_));
277 if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
283 std::vector<uint64_t, A>&& entries):
285 is_ordered_(is_ordered || (entries.size() <= 1ULL)),
286 seed_hash_(seed_hash),
288 entries_(std::move(entries))
293 return entries_.get_allocator();
313 return static_cast<uint32_t
>(entries_.size());
323 return iterator(entries_.data(),
static_cast<uint32_t
>(entries_.size()), 0);
328 return iterator(
nullptr, 0,
static_cast<uint32_t
>(entries_.size()));
333 return const_iterator(entries_.data(),
static_cast<uint32_t
>(entries_.size()), 0);
338 return const_iterator(
nullptr, 0,
static_cast<uint32_t
>(entries_.size()));
346 const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
347 write(os, preamble_longs);
348 write(os, UNCOMPRESSED_SERIAL_VERSION);
349 write(os, SKETCH_TYPE);
350 write<uint16_t>(os, 0);
351 const uint8_t flags_byte(
352 (1 << flags::IS_COMPACT) |
353 (1 << flags::IS_READ_ONLY) |
354 (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
355 (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
357 write(os, flags_byte);
358 write(os, get_seed_hash());
359 if (preamble_longs > 1) {
360 write(os,
static_cast<uint32_t
>(entries_.size()));
361 write<uint32_t>(os, 0);
363 if (this->is_estimation_mode()) write(os, this->theta_);
364 if (entries_.size() > 0) write(os, entries_.data(), entries_.size() *
sizeof(uint64_t));
369 const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
370 const size_t size = header_size_bytes +
sizeof(uint64_t) * preamble_longs
371 +
sizeof(uint64_t) * entries_.size();
372 vector_bytes bytes(size, 0, entries_.get_allocator());
373 uint8_t* ptr = bytes.data() + header_size_bytes;
375 *ptr++ = preamble_longs;
376 *ptr++ = UNCOMPRESSED_SERIAL_VERSION;
377 *ptr++ = SKETCH_TYPE;
378 ptr +=
sizeof(uint16_t);
379 const uint8_t flags_byte(
380 (1 << flags::IS_COMPACT) |
381 (1 << flags::IS_READ_ONLY) |
382 (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
383 (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
386 ptr += copy_to_mem(get_seed_hash(), ptr);
387 if (preamble_longs > 1) {
388 ptr += copy_to_mem(
static_cast<uint32_t
>(entries_.size()), ptr);
389 ptr +=
sizeof(uint32_t);
391 if (this->is_estimation_mode()) ptr += copy_to_mem(theta_, ptr);
392 if (entries_.size() > 0) ptr += copy_to_mem(entries_.data(), ptr, entries_.size() *
sizeof(uint64_t));
398 if (!this->is_ordered() || entries_.size() == 0 ||
399 (entries_.size() == 1 && !this->is_estimation_mode()))
return false;
405 if (is_suitable_for_compression())
return serialize_version_4(os);
406 return serialize(os);
411 if (is_suitable_for_compression())
return serialize_version_4(header_size_bytes);
412 return serialize(header_size_bytes);
419 uint64_t previous = 0;
421 for (
const uint64_t entry: entries_) {
422 const uint64_t delta = entry - previous;
426 return count_leading_zeros_in_u64(ored);
430 void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os)
const {
431 const uint8_t preamble_longs = this->is_estimation_mode() ? 2 : 1;
432 const uint8_t entry_bits = 64 - compute_min_leading_zeros();
435 const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(
static_cast<uint32_t
>(entries_.size())));
437 write(os, preamble_longs);
438 write(os, COMPRESSED_SERIAL_VERSION);
439 write(os, SKETCH_TYPE);
440 write(os, entry_bits);
441 write(os, num_entries_bytes);
442 const uint8_t flags_byte(
443 (1 << flags::IS_COMPACT) |
444 (1 << flags::IS_READ_ONLY) |
445 (1 << flags::IS_ORDERED)
447 write(os, flags_byte);
448 write(os, get_seed_hash());
449 if (this->is_estimation_mode()) write(os, this->theta_);
450 uint32_t num_entries =
static_cast<uint32_t
>(entries_.size());
451 for (
unsigned i = 0; i < num_entries_bytes; ++i) {
452 write<uint8_t>(os, num_entries & 0xff);
456 uint64_t previous = 0;
458 vector_bytes buffer(entry_bits, 0, entries_.get_allocator());
462 for (i = 0; i + 7 < entries_.size(); i += 8) {
463 for (
unsigned j = 0; j < 8; ++j) {
464 deltas[j] = entries_[i + j] - previous;
465 previous = entries_[i + j];
467 pack_bits_block8(deltas, buffer.data(), entry_bits);
468 write(os, buffer.data(), buffer.size());
472 if (i < entries_.size()) {
474 uint8_t* ptr = buffer.data();
475 for (; i < entries_.size(); ++i) {
476 const uint64_t delta = entries_[i] - previous;
477 previous = entries_[i];
478 offset = pack_bits(delta, entry_bits, ptr, offset);
480 write(os, buffer.data(), ptr - buffer.data());
485 auto compact_theta_sketch_alloc<A>::serialize_version_4(
unsigned header_size_bytes)
const -> vector_bytes {
486 const uint8_t preamble_longs = this->is_estimation_mode() ? 2 : 1;
487 const uint8_t entry_bits = 64 - compute_min_leading_zeros();
488 const size_t compressed_bits = entry_bits * entries_.size();
491 const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(
static_cast<uint32_t
>(entries_.size())));
493 const size_t size = header_size_bytes +
sizeof(uint64_t) * preamble_longs + num_entries_bytes
494 + whole_bytes_to_hold_bits(compressed_bits);
495 vector_bytes bytes(size, 0, entries_.get_allocator());
496 uint8_t* ptr = bytes.data() + header_size_bytes;
498 *ptr++ = preamble_longs;
499 *ptr++ = COMPRESSED_SERIAL_VERSION;
500 *ptr++ = SKETCH_TYPE;
502 *ptr++ = num_entries_bytes;
503 const uint8_t flags_byte(
504 (1 << flags::IS_COMPACT) |
505 (1 << flags::IS_READ_ONLY) |
506 (1 << flags::IS_ORDERED)
509 ptr += copy_to_mem(get_seed_hash(), ptr);
510 if (this->is_estimation_mode()) {
511 ptr += copy_to_mem(theta_, ptr);
513 uint32_t num_entries =
static_cast<uint32_t
>(entries_.size());
514 for (
unsigned i = 0; i < num_entries_bytes; ++i) {
515 *ptr++ = num_entries & 0xff;
519 uint64_t previous = 0;
524 for (i = 0; i + 7 < entries_.size(); i += 8) {
525 for (
unsigned j = 0; j < 8; ++j) {
526 deltas[j] = entries_[i + j] - previous;
527 previous = entries_[i + j];
529 pack_bits_block8(deltas, ptr, entry_bits);
535 for (; i < entries_.size(); ++i) {
536 const uint64_t delta = entries_[i] - previous;
537 previous = entries_[i];
538 offset = pack_bits(delta, entry_bits, ptr, offset);
545 const auto preamble_longs = read<uint8_t>(is);
546 const auto serial_version = read<uint8_t>(is);
547 const auto type = read<uint8_t>(is);
548 checker<true>::check_sketch_type(type, SKETCH_TYPE);
549 switch (serial_version) {
551 return deserialize_v4(preamble_longs, is, seed, allocator);
553 return deserialize_v3(preamble_longs, is, seed, allocator);
555 return deserialize_v1(preamble_longs, is, seed, allocator);
557 return deserialize_v2(preamble_longs, is, seed, allocator);
559 throw std::invalid_argument(
"unexpected sketch serialization version " + std::to_string(serial_version));
564 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v1(
565 uint8_t, std::istream& is, uint64_t seed,
const A& allocator)
567 const auto seed_hash = compute_seed_hash(seed);
570 const auto num_entries = read<uint32_t>(is);
572 const auto theta = read<uint64_t>(is);
573 std::vector<uint64_t, A> entries(num_entries, 0, allocator);
575 if (!is_empty) read(is, entries.data(),
sizeof(uint64_t) * entries.size());
576 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
577 return compact_theta_sketch_alloc(is_empty,
true, seed_hash, theta, std::move(entries));
581 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v2(
582 uint8_t preamble_longs, std::istream& is, uint64_t seed,
const A& allocator)
586 const uint16_t seed_hash = read<uint16_t>(is);
587 checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
588 if (preamble_longs == 1) {
589 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
590 std::vector<uint64_t, A> entries(0, 0, allocator);
592 }
else if (preamble_longs == 2) {
593 const uint32_t num_entries = read<uint32_t>(is);
595 std::vector<uint64_t, A> entries(num_entries, 0, allocator);
596 if (num_entries == 0) {
599 read(is, entries.data(), entries.size() *
sizeof(uint64_t));
600 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
602 }
else if (preamble_longs == 3) {
603 const uint32_t num_entries = read<uint32_t>(is);
605 const auto theta = read<uint64_t>(is);
607 std::vector<uint64_t, A> entries(num_entries, 0, allocator);
609 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
610 return compact_theta_sketch_alloc(
true,
true, seed_hash, theta, std::move(entries));
612 read(is, entries.data(),
sizeof(uint64_t) * entries.size());
613 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
614 return compact_theta_sketch_alloc(
false,
true, seed_hash, theta, std::move(entries));
617 throw std::invalid_argument(std::to_string(preamble_longs) +
" longs of premable, but expected 1, 2, or 3");
622 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v3(
623 uint8_t preamble_longs, std::istream& is, uint64_t seed,
const A& allocator)
626 const auto flags_byte = read<uint8_t>(is);
627 const auto seed_hash = read<uint16_t>(is);
628 const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
629 if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
631 uint32_t num_entries = 0;
633 if (preamble_longs == 1) {
636 num_entries = read<uint32_t>(is);
638 if (preamble_longs > 2) theta = read<uint64_t>(is);
641 std::vector<uint64_t, A> entries(num_entries, 0, allocator);
642 if (!is_empty) read(is, entries.data(),
sizeof(uint64_t) * entries.size());
643 const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
644 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
645 return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
649 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v4(
650 uint8_t preamble_longs, std::istream& is, uint64_t seed,
const A& allocator)
652 const auto entry_bits = read<uint8_t>(is);
653 const auto num_entries_bytes = read<uint8_t>(is);
654 const auto flags_byte = read<uint8_t>(is);
655 const auto seed_hash = read<uint16_t>(is);
656 const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
657 if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
659 if (preamble_longs > 1) theta = read<uint64_t>(is);
660 uint32_t num_entries = 0;
661 for (
unsigned i = 0; i < num_entries_bytes; ++i) {
662 num_entries |= read<uint8_t>(is) << (i << 3);
664 vector_bytes buffer(entry_bits, 0, allocator);
665 std::vector<uint64_t, A> entries(num_entries, 0, allocator);
669 for (i = 0; i + 7 < num_entries; i += 8) {
670 read(is, buffer.data(), buffer.size());
671 unpack_bits_block8(&entries[i], buffer.data(), entry_bits);
674 if (i < num_entries) read(is, buffer.data(), whole_bytes_to_hold_bits((num_entries - i) * entry_bits));
675 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
676 const uint8_t* ptr = buffer.data();
678 for (; i < num_entries; ++i) {
679 offset = unpack_bits(entries[i], entry_bits, ptr, offset);
682 uint64_t previous = 0;
683 for (i = 0; i < num_entries; ++i) {
684 entries[i] += previous;
685 previous = entries[i];
687 const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
688 return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
693 auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed,
false);
694 if (data.entry_bits == 64) {
695 const uint64_t* entries =
reinterpret_cast<const uint64_t*
>(data.entries_start_ptr);
696 return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta,
697 std::vector<uint64_t, A>(entries, entries + data.num_entries, allocator));
699 std::vector<uint64_t, A> entries(data.num_entries, 0, allocator);
700 const uint8_t* ptr =
reinterpret_cast<const uint8_t*
>(data.entries_start_ptr);
703 for (i = 0; i + 7 < data.num_entries; i += 8) {
704 unpack_bits_block8(&entries[i], ptr, data.entry_bits);
705 ptr += data.entry_bits;
709 for (; i < data.num_entries; ++i) {
710 offset = unpack_bits(entries[i], data.entry_bits, ptr, offset);
713 uint64_t previous = 0;
714 for (i = 0; i < data.num_entries; ++i) {
715 entries[i] += previous;
716 previous = entries[i];
718 return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta, std::move(entries));
725 wrapped_compact_theta_sketch_alloc<A>::wrapped_compact_theta_sketch_alloc(
const data_type& data):
741 return data_.is_empty;
746 return data_.is_ordered;
756 return data_.num_entries;
761 return data_.seed_hash;
766 return const_iterator(data_.entries_start_ptr, data_.entry_bits, data_.num_entries, 0);
771 return const_iterator(data_.entries_start_ptr, data_.entry_bits, data_.num_entries, data_.num_entries);
778 void wrapped_compact_theta_sketch_alloc<A>::print_items(std::ostringstream& os)
const {
779 os <<
"### Retained entries" << std::endl;
780 for (
const auto hash: *
this) {
781 os << hash << std::endl;
783 os <<
"### End retained entries" << std::endl;
787 template<
typename Allocator>
788 wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::const_iterator(
789 const void* ptr, uint8_t entry_bits, uint32_t num_entries, uint32_t index):
791 entry_bits_(entry_bits),
792 num_entries_(num_entries),
795 is_block_mode_(num_entries_ >= 8),
799 if (entry_bits == 64) {
800 ptr_ =
reinterpret_cast<const uint64_t*
>(ptr) + index;
801 }
else if (index < num_entries) {
802 if (is_block_mode_) {
803 unpack_bits_block8(buffer_,
reinterpret_cast<const uint8_t*
>(ptr_), entry_bits_);
804 ptr_ =
reinterpret_cast<const uint8_t*
>(ptr_) + entry_bits_;
805 for (
int i = 0; i < 8; ++i) {
806 buffer_[i] += previous_;
807 previous_ = buffer_[i];
810 offset_ = unpack_bits(buffer_[0], entry_bits_,
reinterpret_cast<const uint8_t*&
>(ptr_), offset_);
811 buffer_[0] += previous_;
812 previous_ = buffer_[0];
817 template<
typename Allocator>
818 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++() -> const_iterator& {
819 if (entry_bits_ == 64) {
820 ptr_ =
reinterpret_cast<const uint64_t*
>(ptr_) + 1;
824 if (index_ < num_entries_) {
825 if (is_block_mode_) {
829 if (index_ + 8 < num_entries_) {
830 unpack_bits_block8(buffer_,
reinterpret_cast<const uint8_t*
>(ptr_), entry_bits_);
831 ptr_ =
reinterpret_cast<const uint8_t*
>(ptr_) + entry_bits_;
832 for (
int i = 0; i < 8; ++i) {
833 buffer_[i] += previous_;
834 previous_ = buffer_[i];
837 is_block_mode_ =
false;
838 offset_ = unpack_bits(buffer_[0], entry_bits_,
reinterpret_cast<const uint8_t*&
>(ptr_), offset_);
839 buffer_[0] += previous_;
840 previous_ = buffer_[0];
844 offset_ = unpack_bits(buffer_[0], entry_bits_,
reinterpret_cast<const uint8_t*&
>(ptr_), offset_);
845 buffer_[0] += previous_;
846 previous_ = buffer_[0];
852 template<
typename Allocator>
853 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++(
int) -> const_iterator {
854 const_iterator tmp(*
this);
859 template<
typename Allocator>
860 bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator!=(
const const_iterator& other)
const {
861 if (entry_bits_ == 64)
return ptr_ != other.ptr_;
862 return index_ != other.index_;
865 template<
typename Allocator>
866 bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator==(
const const_iterator& other)
const {
867 if (entry_bits_ == 64)
return ptr_ == other.ptr_;
868 return index_ == other.index_;
871 template<
typename Allocator>
872 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const -> reference {
873 if (entry_bits_ == 64)
return *
reinterpret_cast<const uint64_t*
>(ptr_);
874 return buffer_[buf_i_];
877 template<
typename Allocator>
878 auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const -> pointer {
879 if (entry_bits_ == 64)
return reinterpret_cast<const uint64_t*
>(ptr_);
880 return buffer_ + buf_i_;
double get_estimate() const
Definition: theta_sketch_impl.hpp:47
double get_lower_bound(uint8_t num_std_devs) const
Returns the approximate lower error bound given a number of standard deviations.
Definition: theta_sketch_impl.hpp:52
virtual string< Allocator > to_string(bool print_items=false) const
Provides a human-readable summary of this sketch as a string.
Definition: theta_sketch_impl.hpp:64
double get_upper_bound(uint8_t num_std_devs) const
Returns the approximate upper error bound given a number of standard deviations.
Definition: theta_sketch_impl.hpp:58
double get_theta() const
Definition: theta_sketch_impl.hpp:41
bool is_estimation_mode() const
Definition: theta_sketch_impl.hpp:36
Compact Theta sketch.
Definition: theta_sketch.hpp:359
compact_theta_sketch_alloc(const Other &other, bool ordered)
Copy constructor.
Definition: theta_sketch_impl.hpp:267
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition: theta_sketch_impl.hpp:345
virtual uint64_t get_theta64() const
Definition: theta_sketch_impl.hpp:307
virtual uint32_t get_num_retained() const
Definition: theta_sketch_impl.hpp:312
virtual bool is_empty() const
Definition: theta_sketch_impl.hpp:297
virtual bool is_ordered() const
Definition: theta_sketch_impl.hpp:302
virtual uint16_t get_seed_hash() const
Definition: theta_sketch_impl.hpp:317
virtual iterator end()
Iterator pointing past the valid range.
Definition: theta_sketch_impl.hpp:327
static compact_theta_sketch_alloc deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
virtual Allocator get_allocator() const
Definition: theta_sketch_impl.hpp:292
void serialize_compressed(std::ostream &os) const
This method serializes the sketch into a given stream in a compressed binary form.
Definition: theta_sketch_impl.hpp:404
virtual iterator begin()
Iterator over hash values in this sketch.
Definition: theta_sketch_impl.hpp:322
Theta base builder.
Definition: theta_update_sketch_base.hpp:97
Base class for the Theta Sketch, a generalization of the Kth Minimum Value (KMV) sketch.
Definition: theta_sketch.hpp:127
Update Theta sketch builder.
Definition: theta_sketch.hpp:509
update_theta_sketch_alloc build() const
Definition: theta_sketch_impl.hpp:259
Update Theta sketch.
Definition: theta_sketch.hpp:175
void trim()
Remove retained entries in excess of the nominal size k (if any)
Definition: theta_sketch_impl.hpp:212
virtual uint64_t get_theta64() const
Definition: theta_sketch_impl.hpp:121
virtual bool is_empty() const
Definition: theta_sketch_impl.hpp:111
virtual bool is_ordered() const
Definition: theta_sketch_impl.hpp:116
virtual uint16_t get_seed_hash() const
Definition: theta_sketch_impl.hpp:131
virtual Allocator get_allocator() const
Definition: theta_sketch_impl.hpp:106
void reset()
Reset the sketch to the initial empty state.
Definition: theta_sketch_impl.hpp:217
update_theta_sketch_alloc(const update_theta_sketch_alloc &other)=default
Copy constructor.
Wrapped Compact Theta sketch.
Definition: theta_sketch.hpp:526
uint64_t get_theta64() const
Definition: theta_sketch_impl.hpp:750
uint32_t get_num_retained() const
Definition: theta_sketch_impl.hpp:755
bool is_empty() const
Definition: theta_sketch_impl.hpp:740
bool is_ordered() const
Definition: theta_sketch_impl.hpp:745
uint16_t get_seed_hash() const
Definition: theta_sketch_impl.hpp:760
static const wrapped_compact_theta_sketch_alloc wrap(const void *bytes, size_t size, uint64_t seed=DEFAULT_SEED, bool dump_on_error=false)
This method wraps a serialized compact sketch as an array of bytes.
Definition: theta_sketch_impl.hpp:730
Allocator get_allocator() const
Definition: theta_sketch_impl.hpp:735
const_iterator begin() const
Const iterator over hash values in this sketch.
Definition: theta_sketch_impl.hpp:765
const_iterator end() const
Const iterator pointing past the valid range.
Definition: theta_sketch_impl.hpp:770
const uint64_t MAX_THETA
max theta - signed max for compatibility with Java
Definition: theta_constants.hpp:36
DataSketches namespace.
Definition: binomial_bounds.hpp:38