20 #ifndef REQ_SKETCH_HPP_
21 #define REQ_SKETCH_HPP_
25 #include "req_common.hpp"
26 #include "req_compactor.hpp"
27 #include "quantiles_sorted_view.hpp"
28 #include "optional.hpp"
76 typename Comparator = std::less<T>,
77 typename Allocator = std::allocator<T>
82 using comparator = Comparator;
83 using allocator_type = Allocator;
84 using Compactor = req_compactor<T, Comparator, Allocator>;
85 using AllocCompactor =
typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
86 using vector_double =
typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
103 explicit req_sketch(uint16_t k,
bool hra =
true,
const Comparator& comparator = Comparator(),
104 const Allocator& allocator = Allocator());
140 template<
typename TT,
typename CC,
typename AA>
142 const Allocator& allocator = Allocator());
148 uint16_t
get_k()
const;
166 uint64_t
get_n()
const;
184 template<
typename FwdT>
191 template<
typename FwdSk>
192 void merge(FwdSk&& other);
232 double get_rank(
const T& item,
bool inclusive =
true)
const;
253 vector_double
get_PMF(
const T* split_points, uint32_t size,
bool inclusive =
true)
const;
277 vector_double
get_CDF(
const T* split_points, uint32_t size,
bool inclusive =
true)
const;
318 static double get_RSE(uint16_t k,
double rank,
bool hra, uint64_t n);
326 template<
typename TT = T,
typename SerDe = serde<T>,
typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type = 0>
335 template<
typename TT = T,
typename SerDe = serde<T>,
typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type = 0>
343 template<
typename SerDe = serde<T>>
344 void serialize(std::ostream& os,
const SerDe& sd = SerDe())
const;
348 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>>;
358 template<
typename SerDe = serde<T>>
359 vector_bytes
serialize(
unsigned header_size_bytes = 0,
const SerDe& sd = SerDe())
const;
369 template<
typename SerDe = serde<T>>
371 const Comparator& comparator = Comparator(),
const Allocator& allocator = Allocator());
382 template<
typename SerDe = serde<T>>
384 const Comparator& comparator = Comparator(),
const Allocator& allocator = Allocator());
391 string<Allocator>
to_string(
bool print_levels =
false,
bool print_items =
false)
const;
393 class const_iterator;
400 const_iterator
begin()
const;
408 const_iterator
end()
const;
417 Comparator comparator_;
418 Allocator allocator_;
421 uint32_t max_nom_size_;
422 uint32_t num_retained_;
424 std::vector<Compactor, AllocCompactor> compactors_;
425 optional<T> min_item_;
426 optional<T> max_item_;
429 void setup_sorted_view()
const;
430 void reset_sorted_view();
432 static const bool LAZY_COMPRESSION =
false;
434 static const uint8_t SERIAL_VERSION = 1;
435 static const uint8_t FAMILY = 17;
436 static const size_t PREAMBLE_SIZE_BYTES = 8;
437 enum flags { RESERVED1, RESERVED2, IS_EMPTY, IS_HIGH_RANK, RAW_ITEMS, IS_LEVEL_ZERO_SORTED };
439 static constexpr
double FIXED_RSE_FACTOR = 0.084;
440 static double relative_rse_factor();
442 uint8_t get_num_levels()
const;
444 void update_max_nom_size();
445 void update_num_retained();
448 static double get_rank_lb(uint16_t k, uint8_t num_levels,
double rank, uint8_t num_std_dev, uint64_t n,
bool hra);
449 static double get_rank_ub(uint16_t k, uint8_t num_levels,
double rank, uint8_t num_std_dev, uint64_t n,
bool hra);
450 static bool is_exact_rank(uint16_t k, uint8_t num_levels,
double rank, uint64_t n,
bool hra);
454 optional<T>&& min_item, optional<T>&& max_item,
455 std::vector<Compactor, AllocCompactor>&& compactors,
const Comparator& comparator);
457 static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
458 static void check_serial_version(uint8_t serial_version);
459 static void check_family_id(uint8_t family_id);
461 template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value,
int>::type = 0>
462 static inline bool check_update_item(
const TT& item) {
463 return !std::isnan(item);
466 template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value,
int>::type = 0>
467 static inline bool check_update_item(
const TT&) {
472 template<
typename TT,
typename CC,
typename AA>
friend class req_sketch;
475 template<
typename T,
typename C,
typename A>
476 class req_sketch<T, C, A>::const_iterator {
478 using iterator_category = std::input_iterator_tag;
479 using value_type = std::pair<const T&, const uint64_t>;
480 using difference_type = void;
481 using pointer =
const return_value_holder<value_type>;
482 using reference =
const value_type;
484 const_iterator& operator++();
485 const_iterator& operator++(
int);
486 bool operator==(
const const_iterator& other)
const;
487 bool operator!=(
const const_iterator& other)
const;
488 reference operator*()
const;
489 pointer operator->()
const;
491 using LevelsIterator =
typename std::vector<Compactor, AllocCompactor>::const_iterator;
492 LevelsIterator levels_it_;
493 LevelsIterator levels_end_;
494 const T* compactor_it_;
495 friend class req_sketch<T, C, A>;
496 const_iterator(LevelsIterator
begin, LevelsIterator
end);
501 #include "req_sketch_impl.hpp"
Sorted view for quantiles sketches (REQ, KLL and Quantiles)
Definition: quantiles_sorted_view.hpp:38
typename std::conditional< std::is_arithmetic< T >::value, T, const T & >::type quantile_return_type
Quantile return type.
Definition: quantiles_sorted_view.hpp:93
Relative Error Quantiles Sketch.
Definition: req_sketch.hpp:79
Comparator get_comparator() const
Returns an instance of the comparator for this sketch.
Definition: req_sketch_impl.hpp:224
quantiles_sorted_view< T, Comparator, Allocator > get_sorted_view() const
Gets the sorted view of this sketch.
Definition: req_sketch_impl.hpp:269
static req_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
req_sketch(const req_sketch< TT, CC, AA > &other, const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
Type converting constructor.
vector_bytes serialize(unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const
This method serializes the sketch as a vector of bytes.
vector_double get_CDF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analo...
Definition: req_sketch_impl.hpp:251
uint32_t get_num_retained() const
Returns the number of retained items in the sketch.
Definition: req_sketch_impl.hpp:159
vector_double get_PMF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of sp...
Definition: req_sketch_impl.hpp:244
void merge(FwdSk &&other)
Merges another sketch into this one.
Definition: req_sketch_impl.hpp:188
typename quantiles_sorted_view< T, Comparator, Allocator >::quantile_return_type quantile_return_type
Quantile return type.
Definition: req_sketch.hpp:92
void update(FwdT &&item)
Updates this sketch with the given data item.
Definition: req_sketch_impl.hpp:170
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition: req_sketch_impl.hpp:369
string< Allocator > to_string(bool print_levels=false, bool print_items=false) const
Prints a summary of the sketch.
Definition: req_sketch_impl.hpp:631
uint16_t get_k() const
Returns configured parameter K.
Definition: req_sketch_impl.hpp:139
bool is_empty() const
Returns true if this sketch is empty.
Definition: req_sketch_impl.hpp:149
const T & get_max_item() const
Returns the max item of the stream.
Definition: req_sketch_impl.hpp:218
req_sketch(uint16_t k, bool hra=true, const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
Constructor.
double get_rank_upper_bound(double rank, uint8_t num_std_dev) const
Returns an approximate upper bound of the given normalized rank.
Definition: req_sketch_impl.hpp:289
double get_rank(const T &item, bool inclusive=true) const
Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.
Definition: req_sketch_impl.hpp:234
static double get_RSE(uint16_t k, double rank, bool hra, uint64_t n)
Returns an a priori estimate of relative standard error (RSE, expressed as a number in [0,...
Definition: req_sketch_impl.hpp:294
double get_rank_lower_bound(double rank, uint8_t num_std_dev) const
Returns an approximate lower bound of the given normalized rank.
Definition: req_sketch_impl.hpp:284
Allocator get_allocator() const
Returns an instance of the allocator for this sketch.
Definition: req_sketch_impl.hpp:229
req_sketch & operator=(const req_sketch &other)
Copy assignment.
Definition: req_sketch_impl.hpp:81
quantile_return_type get_quantile(double rank, bool inclusive=true) const
Returns an approximate quantile of the given normalized rank.
Definition: req_sketch_impl.hpp:258
const_iterator begin() const
Iterator pointing to the first item in the sketch.
Definition: req_sketch_impl.hpp:724
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the sketch.
Definition: req_sketch_impl.hpp:334
const_iterator end() const
Iterator pointing to the past-the-end item in the sketch.
Definition: req_sketch_impl.hpp:729
static req_sketch deserialize(const void *bytes, size_t size, const SerDe &sd=SerDe(), const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given array of bytes.
bool is_estimation_mode() const
Returns true if this sketch is in estimation mode.
Definition: req_sketch_impl.hpp:164
const T & get_min_item() const
Returns the min item of the stream.
Definition: req_sketch_impl.hpp:212
bool is_HRA() const
Returns configured parameter High Rank Accuracy.
Definition: req_sketch_impl.hpp:144
uint64_t get_n() const
Returns the length of the input stream.
Definition: req_sketch_impl.hpp:154
DataSketches namespace.
Definition: binomial_bounds.hpp:38