20 #ifndef THETA_SKETCH_HPP_
21 #define THETA_SKETCH_HPP_
23 #include "theta_update_sketch_base.hpp"
24 #include "compact_theta_sketch_parser.hpp"
29 template<
typename A>
class theta_sketch_alloc;
30 template<
typename A>
class update_theta_sketch_alloc;
31 template<
typename A>
class compact_theta_sketch_alloc;
32 template<
typename A>
class wrapped_compact_theta_sketch_alloc;
44 template<
typename Allocator = std::allocator<u
int64_t>>
118 virtual string<Allocator>
to_string(
bool print_items =
false)
const;
121 virtual void print_specifics(std::ostringstream& os)
const = 0;
122 virtual void print_items(std::ostringstream& os)
const = 0;
126 template<
typename Allocator = std::allocator<u
int64_t>>
129 using Entry = uint64_t;
130 using ExtractKey = trivial_extract_key;
131 using iterator = theta_iterator<Entry, ExtractKey>;
132 using const_iterator = theta_const_iterator<Entry, ExtractKey>;
147 virtual iterator
end() = 0;
153 virtual const_iterator
begin()
const = 0;
160 virtual const_iterator
end()
const = 0;
163 virtual void print_items(std::ostringstream& os)
const;
174 template<
typename Allocator = std::allocator<u
int64_t>>
178 using Entry =
typename Base::Entry;
179 using ExtractKey =
typename Base::ExtractKey;
180 using iterator =
typename Base::iterator;
181 using const_iterator =
typename Base::const_iterator;
182 using theta_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
183 using resize_factor =
typename theta_table::resize_factor;
231 resize_factor
get_rf()
const;
237 void update(
const std::string& value);
243 void update(uint64_t value);
249 void update(int64_t value);
256 void update(uint32_t value);
263 void update(int32_t value);
270 void update(uint16_t value);
277 void update(int16_t value);
284 void update(uint8_t value);
291 void update(int8_t value);
298 void update(
double value);
320 void update(
const void* data,
size_t length);
339 virtual iterator
begin();
340 virtual iterator
end();
341 virtual const_iterator
begin()
const;
342 virtual const_iterator
end()
const;
349 uint64_t theta, uint64_t seed,
const Allocator& allocator);
351 virtual void print_specifics(std::ostringstream& os)
const;
358 template<
typename Allocator = std::allocator<u
int64_t>>
362 using iterator =
typename Base::iterator;
363 using const_iterator =
typename Base::const_iterator;
364 using AllocBytes =
typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>;
365 using vector_bytes = std::vector<uint8_t, AllocBytes>;
367 static const uint8_t UNCOMPRESSED_SERIAL_VERSION = 3;
368 static const uint8_t COMPRESSED_SERIAL_VERSION = 4;
369 static const uint8_t SKETCH_TYPE = 3;
382 template<
typename Other>
433 vector_bytes
serialize(
unsigned header_size_bytes = 0)
const;
454 virtual iterator
begin();
455 virtual iterator
end();
456 virtual const_iterator
begin()
const;
457 virtual const_iterator
end()
const;
467 uint64_t seed = DEFAULT_SEED,
const Allocator& allocator = Allocator());
478 uint64_t seed = DEFAULT_SEED,
const Allocator& allocator = Allocator());
481 enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
487 std::vector<uint64_t, Allocator> entries_;
489 bool is_suitable_for_compression()
const;
490 uint8_t compute_min_leading_zeros()
const;
491 void serialize_version_4(std::ostream& os)
const;
492 vector_bytes serialize_version_4(
unsigned header_size_bytes = 0)
const;
494 static compact_theta_sketch_alloc deserialize_v1(uint8_t preamble_longs, std::istream& is, uint64_t seed,
const Allocator& allocator);
495 static compact_theta_sketch_alloc deserialize_v2(uint8_t preamble_longs, std::istream& is, uint64_t seed,
const Allocator& allocator);
496 static compact_theta_sketch_alloc deserialize_v3(uint8_t preamble_longs, std::istream& is, uint64_t seed,
const Allocator& allocator);
497 static compact_theta_sketch_alloc deserialize_v4(uint8_t preamble_longs, std::istream& is, uint64_t seed,
const Allocator& allocator);
499 virtual void print_specifics(std::ostringstream& os)
const;
501 template<
typename E,
typename EK,
typename P,
typename S,
typename CS,
typename A>
friend class theta_union_base;
502 template<
typename E,
typename EK,
typename P,
typename S,
typename CS,
typename A>
friend class theta_intersection_base;
503 template<
typename E,
typename EK,
typename CS,
typename A>
friend class theta_set_difference_base;
508 template<
typename Allocator>
515 builder(
const Allocator& allocator = Allocator());
525 template<
typename Allocator = std::allocator<u
int64_t>>
528 class const_iterator;
541 const_iterator
begin()
const;
548 const_iterator
end()
const;
561 virtual void print_specifics(std::ostringstream& os)
const;
562 virtual void print_items(std::ostringstream& os)
const;
565 using data_type = compact_theta_sketch_parser<true>::compact_theta_sketch_data;
571 template<
typename Allocator>
574 using iterator_category = std::input_iterator_tag;
575 using value_type =
const uint64_t;
576 using difference_type = void;
577 using pointer = value_type*;
578 using reference = uint64_t;
580 const_iterator(
const void* ptr, uint8_t entry_bits, uint32_t num_entries, uint32_t index);
581 const_iterator& operator++();
582 const_iterator operator++(
int);
583 bool operator==(
const const_iterator& other)
const;
584 bool operator!=(
const const_iterator& other)
const;
585 reference operator*()
const;
586 pointer operator->()
const;
591 uint32_t num_entries_;
602 #include "theta_sketch_impl.hpp"
Abstract base class for Theta sketch.
Definition: theta_sketch.hpp:45
double get_estimate() const
Definition: theta_sketch_impl.hpp:47
double get_lower_bound(uint8_t num_std_devs) const
Returns the approximate lower error bound given a number of standard deviations.
Definition: theta_sketch_impl.hpp:52
virtual bool is_ordered() const =0
virtual bool is_empty() const =0
virtual string< Allocator > to_string(bool print_items=false) const
Provides a human-readable summary of this sketch as a string.
Definition: theta_sketch_impl.hpp:64
virtual uint32_t get_num_retained() const =0
double get_upper_bound(uint8_t num_std_devs) const
Returns the approximate upper error bound given a number of standard deviations.
Definition: theta_sketch_impl.hpp:58
double get_theta() const
Definition: theta_sketch_impl.hpp:41
virtual uint16_t get_seed_hash() const =0
bool is_estimation_mode() const
Definition: theta_sketch_impl.hpp:36
virtual Allocator get_allocator() const =0
virtual uint64_t get_theta64() const =0
Compact Theta sketch.
Definition: theta_sketch.hpp:359
compact_theta_sketch_alloc(const Other &other, bool ordered)
Copy constructor.
Definition: theta_sketch_impl.hpp:267
compact_theta_sketch_alloc(const compact_theta_sketch_alloc &other)=default
Copy constructor.
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition: theta_sketch_impl.hpp:345
virtual uint64_t get_theta64() const
Definition: theta_sketch_impl.hpp:307
static compact_theta_sketch_alloc deserialize(const void *bytes, size_t size, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given array of bytes.
virtual uint32_t get_num_retained() const
Definition: theta_sketch_impl.hpp:312
virtual bool is_empty() const
Definition: theta_sketch_impl.hpp:297
virtual bool is_ordered() const
Definition: theta_sketch_impl.hpp:302
virtual uint16_t get_seed_hash() const
Definition: theta_sketch_impl.hpp:317
virtual iterator end()
Iterator pointing past the valid range.
Definition: theta_sketch_impl.hpp:327
static compact_theta_sketch_alloc deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
virtual Allocator get_allocator() const
Definition: theta_sketch_impl.hpp:292
compact_theta_sketch_alloc(compact_theta_sketch_alloc &&other) noexcept=default
Move constructor.
compact_theta_sketch_alloc & operator=(compact_theta_sketch_alloc &&other)=default
Move assignment.
void serialize_compressed(std::ostream &os) const
This method serializes the sketch into a given stream in a compressed binary form.
Definition: theta_sketch_impl.hpp:404
virtual iterator begin()
Iterator over hash values in this sketch.
Definition: theta_sketch_impl.hpp:322
compact_theta_sketch_alloc & operator=(const compact_theta_sketch_alloc &other)=default
Copy assignment.
Theta base builder.
Definition: theta_update_sketch_base.hpp:97
virtual const_iterator begin() const =0
Const iterator over hash values in this sketch.
virtual const_iterator end() const =0
Const iterator pointing past the valid range.
virtual iterator end()=0
Iterator pointing past the valid range.
virtual iterator begin()=0
Iterator over hash values in this sketch.
Update Theta sketch builder.
Definition: theta_sketch.hpp:509
Update Theta sketch.
Definition: theta_sketch.hpp:175
void trim()
Remove retained entries in excess of the nominal size k (if any)
Definition: theta_sketch_impl.hpp:212
virtual uint64_t get_theta64() const
Definition: theta_sketch_impl.hpp:121
update_theta_sketch_alloc & operator=(const update_theta_sketch_alloc &other)=default
Copy assignment.
virtual uint32_t get_num_retained() const
Definition: theta_sketch_impl.hpp:126
resize_factor get_rf() const
Definition: theta_sketch_impl.hpp:141
void update(const std::string &value)
Update this sketch with a given string.
Definition: theta_sketch_impl.hpp:196
virtual bool is_empty() const
Definition: theta_sketch_impl.hpp:111
virtual bool is_ordered() const
Definition: theta_sketch_impl.hpp:116
virtual uint16_t get_seed_hash() const
Definition: theta_sketch_impl.hpp:131
update_theta_sketch_alloc(update_theta_sketch_alloc &&other) noexcept=default
Move constructor.
virtual iterator end()
Iterator pointing past the valid range.
Definition: theta_sketch_impl.hpp:227
uint8_t get_lg_k() const
Definition: theta_sketch_impl.hpp:136
virtual Allocator get_allocator() const
Definition: theta_sketch_impl.hpp:106
void reset()
Reset the sketch to the initial empty state.
Definition: theta_sketch_impl.hpp:217
virtual iterator begin()
Iterator over hash values in this sketch.
Definition: theta_sketch_impl.hpp:222
compact_theta_sketch_alloc< Allocator > compact(bool ordered=true) const
Converts this sketch to a compact sketch (ordered or unordered).
Definition: theta_sketch_impl.hpp:242
update_theta_sketch_alloc & operator=(update_theta_sketch_alloc &&other)=default
Move assignment.
update_theta_sketch_alloc(const update_theta_sketch_alloc &other)=default
Copy constructor.
Wrapped Compact Theta sketch.
Definition: theta_sketch.hpp:526
uint64_t get_theta64() const
Definition: theta_sketch_impl.hpp:750
uint32_t get_num_retained() const
Definition: theta_sketch_impl.hpp:755
bool is_empty() const
Definition: theta_sketch_impl.hpp:740
bool is_ordered() const
Definition: theta_sketch_impl.hpp:745
uint16_t get_seed_hash() const
Definition: theta_sketch_impl.hpp:760
static const wrapped_compact_theta_sketch_alloc wrap(const void *bytes, size_t size, uint64_t seed=DEFAULT_SEED, bool dump_on_error=false)
This method wraps a serialized compact sketch as an array of bytes.
Definition: theta_sketch_impl.hpp:730
Allocator get_allocator() const
Definition: theta_sketch_impl.hpp:735
const_iterator begin() const
Const iterator over hash values in this sketch.
Definition: theta_sketch_impl.hpp:765
const_iterator end() const
Const iterator pointing past the valid range.
Definition: theta_sketch_impl.hpp:770
DataSketches namespace.
Definition: binomial_bounds.hpp:38