20 #ifndef TUPLE_SKETCH_HPP_
21 #define TUPLE_SKETCH_HPP_
26 #include "theta_update_sketch_base.hpp"
31 template<
typename S,
typename A>
class tuple_sketch;
32 template<
typename S,
typename U,
typename P,
typename A>
class update_tuple_sketch;
33 template<
typename S,
typename A>
class compact_tuple_sketch;
34 template<
typename A>
class theta_sketch_alloc;
36 template<
typename K,
typename V>
37 struct pair_extract_key {
38 K& operator()(std::pair<K, V>& entry)
const {
41 const K& operator()(
const std::pair<K, V>& entry)
const {
52 typename Allocator = std::allocator<Summary>
56 using Entry = std::pair<uint64_t, Summary>;
57 using ExtractKey = pair_extract_key<uint64_t, Summary>;
58 using iterator = theta_iterator<Entry, ExtractKey>;
59 using const_iterator = theta_const_iterator<Entry, ExtractKey>;
87 double get_lower_bound(uint8_t num_std_devs, uint32_t num_subset_entries)
const ;
108 double get_upper_bound(uint8_t num_std_devs, uint32_t num_subset_entries)
const ;
155 string<Allocator>
to_string(
bool print_items =
false)
const;
168 virtual iterator
end() = 0;
174 virtual const_iterator
begin()
const = 0;
181 virtual const_iterator
end()
const = 0;
184 virtual void print_specifics(std::ostringstream& os)
const = 0;
188 static void check_sketch_type(uint8_t actual, uint8_t expected);
189 static void check_serial_version(uint8_t actual, uint8_t expected);
190 static void check_seed_hash(uint16_t actual, uint16_t expected);
196 template<
typename Summary,
typename Update>
197 struct default_tuple_update_policy {
198 Summary create()
const {
201 void update(Summary& summary,
const Update& update)
const {
213 typename Update = Summary,
214 typename Policy = default_tuple_update_policy<Summary, Update>,
215 typename Allocator = std::allocator<Summary>
220 using Entry =
typename Base::Entry;
221 using ExtractKey =
typename Base::ExtractKey;
222 using iterator =
typename Base::iterator;
223 using const_iterator =
typename Base::const_iterator;
224 using AllocEntry =
typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
225 using tuple_map = theta_update_sketch_base<Entry, ExtractKey, AllocEntry>;
226 using resize_factor =
typename tuple_map::resize_factor;
252 resize_factor
get_rf()
const;
259 template<
typename FwdUpdate>
260 inline void update(
const std::string& key, FwdUpdate&& value);
267 template<
typename FwdUpdate>
268 inline void update(uint64_t key, FwdUpdate&& value);
275 template<
typename FwdUpdate>
276 inline void update(int64_t key, FwdUpdate&& value);
284 template<
typename FwdUpdate>
285 inline void update(uint32_t key, FwdUpdate&& value);
293 template<
typename FwdUpdate>
294 inline void update(int32_t key, FwdUpdate&& value);
302 template<
typename FwdUpdate>
303 inline void update(uint16_t key, FwdUpdate&& value);
311 template<
typename FwdUpdate>
312 inline void update(int16_t key, FwdUpdate&& value);
320 template<
typename FwdUpdate>
321 inline void update(uint8_t key, FwdUpdate&& value);
329 template<
typename FwdUpdate>
330 inline void update(int8_t key, FwdUpdate&& value);
338 template<
typename FwdUpdate>
339 inline void update(
double key, FwdUpdate&& value);
347 template<
typename FwdUpdate>
348 inline void update(
float key, FwdUpdate&& value);
364 template<
typename FwdUpdate>
365 void update(
const void* key,
size_t length, FwdUpdate&& value);
390 template<
typename Predicate>
393 virtual iterator
begin();
394 virtual iterator
end();
395 virtual const_iterator
begin()
const;
396 virtual const_iterator
end()
const;
403 update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
float p, uint64_t theta, uint64_t seed,
const Policy& policy,
const Allocator& allocator);
405 virtual void print_specifics(std::ostringstream& os)
const;
414 typename Allocator = std::allocator<Summary>
419 using Entry =
typename Base::Entry;
420 using ExtractKey =
typename Base::ExtractKey;
421 using iterator =
typename Base::iterator;
422 using const_iterator =
typename Base::const_iterator;
423 using AllocEntry =
typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
424 using AllocU64 =
typename std::allocator_traits<Allocator>::template rebind_alloc<uint64_t>;
425 using AllocBytes =
typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>;
426 using vector_bytes = std::vector<uint8_t, AllocBytes>;
427 using comparator = compare_by_key<ExtractKey>;
429 static const uint8_t SERIAL_VERSION_LEGACY = 1;
430 static const uint8_t SERIAL_VERSION = 3;
431 static const uint8_t SKETCH_FAMILY = 9;
432 static const uint8_t SKETCH_TYPE = 1;
433 static const uint8_t SKETCH_TYPE_LEGACY = 5;
434 enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
498 template<typename Predicate>
508 template<typename Sketch, typename Predicate>
516 template<typename SerDe =
serde<Summary>>
517 void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
528 template<typename SerDe =
serde<Summary>>
529 vector_bytes
serialize(
unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
531 virtual iterator
begin();
532 virtual iterator
end();
533 virtual const_iterator
begin() const;
534 virtual const_iterator
end() const;
544 template<typename SerDe =
serde<Summary>>
546 const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
557 template<typename SerDe =
serde<Summary>>
559 const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
566 std::vector<Entry, AllocEntry> entries_;
573 template<typename SerDe, typename SS = Summary, typename std::enable_if<std::is_arithmetic<SS>::value,
int>::type = 0>
581 template<typename SerDe, typename SS = Summary, typename std::enable_if<!std::is_arithmetic<SS>::value,
int>::type = 0>
585 class deleter_of_summaries {
587 deleter_of_summaries(uint32_t num,
bool destroy,
const Allocator& allocator):
588 allocator_(allocator), num_(num), destroy_(destroy) {}
589 void set_destroy(
bool destroy) { destroy_ = destroy; }
590 void operator() (Summary* ptr) {
591 if (ptr !=
nullptr) {
593 for (uint32_t i = 0; i < num_; ++i) ptr[i].~Summary();
595 allocator_.deallocate(ptr, num_);
599 Allocator allocator_;
604 virtual void print_specifics(std::ostringstream& os)
const;
606 template<
typename E,
typename EK,
typename P,
typename S,
typename CS,
typename A>
friend class theta_union_base;
607 template<
typename E,
typename EK,
typename P,
typename S,
typename CS,
typename A>
friend class theta_intersection_base;
608 template<
typename E,
typename EK,
typename CS,
typename A>
friend class theta_set_difference_base;
613 template<
typename Derived,
typename Policy,
typename Allocator>
623 template<
typename S,
typename U,
typename P,
typename A>
632 builder(
const P& policy = P(),
const A& allocator = A());
643 #include "tuple_sketch_impl.hpp"
Compact Tuple sketch.
Definition: tuple_sketch.hpp:416
virtual uint64_t get_theta64() const
Definition: tuple_sketch_impl.hpp:339
virtual uint32_t get_num_retained() const
Definition: tuple_sketch_impl.hpp:344
compact_tuple_sketch filter(const Predicate &predicate) const
Produces a Compact Tuple sketch from this sketch by applying a given predicate to each entry.
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition: tuple_sketch_impl.hpp:401
virtual bool is_empty() const
Definition: tuple_sketch_impl.hpp:329
virtual bool is_ordered() const
Definition: tuple_sketch_impl.hpp:334
virtual uint16_t get_seed_hash() const
Definition: tuple_sketch_impl.hpp:349
virtual iterator end()
Iterator pointing past the valid range.
Definition: tuple_sketch_impl.hpp:602
compact_tuple_sketch(const Base &other, bool ordered)
Copy constructor.
Definition: tuple_sketch_impl.hpp:287
virtual Allocator get_allocator() const
Definition: tuple_sketch_impl.hpp:324
static compact_tuple_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const SerDe &sd=SerDe(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
virtual iterator begin()
Iterator over entries in this sketch.
Definition: tuple_sketch_impl.hpp:597
compact_tuple_sketch(const compact_tuple_sketch &other)=default
Copy constructor.
size_t get_serialized_size_summaries_bytes(const SerDe &sd) const
Computes size needed to serialize summaries in the sketch.
Theta base builder.
Definition: theta_update_sketch_base.hpp:97
Base class for the Theta Sketch, a generalization of the Kth Minimum Value (KMV) sketch.
Definition: theta_sketch.hpp:127
Tuple base builder.
Definition: tuple_sketch.hpp:614
Base class for Tuple sketch.
Definition: tuple_sketch.hpp:54
double get_upper_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const
Returns the approximate upper error bound given a number of standard deviations over an arbitrary num...
Definition: tuple_sketch_impl.hpp:57
virtual const_iterator begin() const =0
Const iterator over entries in this sketch.
double get_estimate() const
Definition: tuple_sketch_impl.hpp:40
virtual bool is_ordered() const =0
virtual const_iterator end() const =0
Const iterator pointing past the valid range.
virtual bool is_empty() const =0
double get_lower_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const
Returns the approximate lower error bound given a number of standard deviations over an arbitrary num...
Definition: tuple_sketch_impl.hpp:45
string< Allocator > to_string(bool print_items=false) const
Provides a human-readable summary of this sketch as a string.
Definition: tuple_sketch_impl.hpp:69
virtual uint32_t get_num_retained() const =0
double get_theta() const
Definition: tuple_sketch_impl.hpp:34
virtual iterator end()=0
Iterator pointing past the valid range.
virtual uint16_t get_seed_hash() const =0
virtual iterator begin()=0
Iterator over entries in this sketch.
bool is_estimation_mode() const
Definition: tuple_sketch_impl.hpp:29
virtual Allocator get_allocator() const =0
virtual uint64_t get_theta64() const =0
Update Tuple sketch builder.
Definition: tuple_sketch.hpp:624
Update Tuple sketch.
Definition: tuple_sketch.hpp:217
void update(int16_t key, FwdUpdate &&value)
Update this sketch with a given signed 16-bit integer.
void trim()
Remove retained entries in excess of the nominal size k (if any)
Definition: tuple_sketch_impl.hpp:227
void update(uint64_t key, FwdUpdate &&value)
Update this sketch with a given unsigned 64-bit integer.
virtual uint64_t get_theta64() const
Definition: tuple_sketch_impl.hpp:120
virtual uint32_t get_num_retained() const
Definition: tuple_sketch_impl.hpp:125
resize_factor get_rf() const
Definition: tuple_sketch_impl.hpp:140
void update(int32_t key, FwdUpdate &&value)
Update this sketch with a given signed 32-bit integer.
virtual bool is_empty() const
Definition: tuple_sketch_impl.hpp:110
virtual bool is_ordered() const
Definition: tuple_sketch_impl.hpp:115
void update(float key, FwdUpdate &&value)
Update this sketch with a given floating point value.
virtual uint16_t get_seed_hash() const
Definition: tuple_sketch_impl.hpp:130
virtual iterator end()
Iterator pointing past the valid range.
Definition: tuple_sketch_impl.hpp:242
void update(int8_t key, FwdUpdate &&value)
Update this sketch with a given signed 8-bit integer.
compact_tuple_sketch< Summary, Allocator > compact(bool ordered=true) const
Converts this sketch to a compact sketch (ordered or unordered).
Definition: tuple_sketch_impl.hpp:257
uint8_t get_lg_k() const
Definition: tuple_sketch_impl.hpp:135
void update(const std::string &key, FwdUpdate &&value)
Update this sketch with a given string.
void update(uint8_t key, FwdUpdate &&value)
Update this sketch with a given unsigned 8-bit integer.
void update(uint32_t key, FwdUpdate &&value)
Update this sketch with a given unsigned 32-bit integer.
virtual Allocator get_allocator() const
Definition: tuple_sketch_impl.hpp:105
compact_tuple_sketch< Summary, Allocator > filter(const Predicate &predicate) const
Produces a Compact Tuple sketch from this sketch by applying a given predicate to each entry.
void update(int64_t key, FwdUpdate &&value)
Update this sketch with a given signed 64-bit integer.
void reset()
Reset the sketch to the initial empty state.
Definition: tuple_sketch_impl.hpp:232
void update(const void *key, size_t length, FwdUpdate &&value)
Update this sketch with given data of any type.
virtual iterator begin()
Iterator over entries in this sketch.
Definition: tuple_sketch_impl.hpp:237
void update(double key, FwdUpdate &&value)
Update this sketch with a given double-precision floating point value.
void update(uint16_t key, FwdUpdate &&value)
Update this sketch with a given unsigned 16-bit integer.
DataSketches namespace.
Definition: binomial_bounds.hpp:38
Interface for serializing and deserializing items.
Definition: serde.hpp:34