20 #ifndef _QUANTILES_SKETCH_HPP_
21 #define _QUANTILES_SKETCH_HPP_
27 #include "quantiles_sorted_view.hpp"
28 #include "common_defs.hpp"
30 #include "optional.hpp"
35 namespace quantiles_constants {
150 template <
typename T,
151 typename Comparator = std::less<T>,
152 typename Allocator = std::allocator<T>>
155 using value_type = T;
156 using allocator_type = Allocator;
157 using comparator = Comparator;
159 using vector_double =
typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
168 const Comparator& comparator = Comparator(),
const Allocator& allocator = Allocator());
203 template<
typename From,
typename FC,
typename FA>
205 const Comparator& comparator = Comparator(),
const Allocator& allocator = Allocator());
211 template<
typename FwdT>
218 template<
typename FwdSk>
219 void merge(FwdSk&& other);
231 uint16_t
get_k()
const;
237 uint64_t
get_n()
const;
289 quantile_return_type
get_quantile(
double rank,
bool inclusive =
true)
const;
305 double get_rank(
const T& item,
bool inclusive =
true)
const;
329 vector_double
get_PMF(
const T* split_points, uint32_t size,
bool inclusive =
true)
const;
356 vector_double
get_CDF(
const T* split_points, uint32_t size,
bool inclusive =
true)
const;
364 template<
typename SerDe = serde<T>,
typename TT = T,
typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type = 0>
373 template<
typename SerDe = serde<T>,
typename TT = T,
typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type = 0>
381 template<
typename SerDe = serde<T>>
382 void serialize(std::ostream& os,
const SerDe& sd = SerDe())
const;
386 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>>;
397 template<
typename SerDe = serde<T>>
398 vector_bytes
serialize(
unsigned header_size_bytes = 0,
const SerDe& sd = SerDe())
const;
408 template<
typename SerDe = serde<T>>
410 const Comparator& comparator = Comparator(),
const Allocator& allocator = Allocator());
421 template<
typename SerDe = serde<T>>
423 const Comparator& comparator = Comparator(),
const Allocator& allocator = Allocator());
449 string<Allocator>
to_string(
bool print_levels =
false,
bool print_items =
false)
const;
451 class const_iterator;
458 const_iterator
begin()
const;
466 const_iterator
end()
const;
475 using Level = std::vector<T, Allocator>;
476 using VectorLevels = std::vector<Level, typename std::allocator_traits<Allocator>::template rebind_alloc<Level>>;
491 static const size_t EMPTY_SIZE_BYTES = 8;
492 static const uint8_t SERIAL_VERSION_1 = 1;
493 static const uint8_t SERIAL_VERSION_2 = 2;
494 static const uint8_t SERIAL_VERSION = 3;
495 static const uint8_t FAMILY = 8;
497 enum flags { RESERVED0, RESERVED1, IS_EMPTY, IS_COMPACT, IS_SORTED };
499 static const uint8_t PREAMBLE_LONGS_SHORT = 1;
500 static const uint8_t PREAMBLE_LONGS_FULL = 2;
501 static const size_t DATA_START = 16;
503 Comparator comparator_;
504 Allocator allocator_;
505 bool is_base_buffer_sorted_;
508 uint64_t bit_pattern_;
510 VectorLevels levels_;
511 optional<T> min_item_;
512 optional<T> max_item_;
515 void setup_sorted_view()
const;
516 void reset_sorted_view();
521 Level&& base_buffer, VectorLevels&& levels,
522 optional<T>&& min_item, optional<T>&& max_item,
523 bool is_sorted,
const Comparator& comparator = Comparator(),
const Allocator& allocator = Allocator());
525 void grow_base_buffer();
526 void process_full_base_buffer();
529 bool grow_levels_if_needed();
532 template<
typename FwdV>
533 static void in_place_propagate_carry(uint8_t starting_level, FwdV&& buf_size_k,
534 Level& buf_size_2k,
bool apply_as_update,
536 static void zip_buffer(Level& buf_in, Level& buf_out);
537 static void merge_two_size_k_buffers(Level& arr_in_1, Level& arr_in_2, Level& arr_out,
const Comparator& comparator);
539 template<
typename SerDe>
540 static Level deserialize_array(std::istream& is, uint32_t num_items, uint32_t capcacity,
const SerDe&
serde,
const Allocator& allocator);
542 template<
typename SerDe>
543 static std::pair<Level, size_t> deserialize_array(
const void* bytes,
size_t size, uint32_t num_items, uint32_t capcacity,
const SerDe&
serde,
const Allocator& allocator);
545 static void check_k(uint16_t k);
546 static void check_serial_version(uint8_t serial_version);
547 static void check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version);
548 static void check_family_id(uint8_t family_id);
550 static uint32_t compute_retained_items(uint16_t k, uint64_t n);
551 static uint32_t compute_base_buffer_items(uint16_t k, uint64_t n);
552 static uint64_t compute_bit_pattern(uint16_t k, uint64_t n);
553 static uint32_t count_valid_levels(uint64_t bit_pattern);
554 static uint8_t compute_levels_needed(uint16_t k, uint64_t n);
560 template<
typename FwdSk>
569 template<
typename FwdSk>
572 template<
typename FwdV>
573 static void zip_buffer_with_stride(FwdV&& buf_in, Level& buf_out, uint16_t stride);
582 static uint8_t lowest_zero_bit_starting_at(uint64_t bits, uint8_t starting_bit);
584 template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value,
int>::type = 0>
585 static inline bool check_update_item(TT item) {
586 return !std::isnan(item);
589 template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value,
int>::type = 0>
590 static inline bool check_update_item(TT) {
595 template<
typename From,
typename FC,
typename FA>
friend class quantiles_sketch;
599 template<
typename T,
typename C,
typename A>
600 class quantiles_sketch<T, C, A>::const_iterator {
602 using iterator_category = std::input_iterator_tag;
603 using value_type = std::pair<const T&, const uint64_t>;
604 using difference_type = void;
605 using pointer =
const return_value_holder<value_type>;
606 using reference =
const value_type;
608 const_iterator& operator++();
609 const_iterator& operator++(
int);
610 bool operator==(
const const_iterator& other)
const;
611 bool operator!=(
const const_iterator& other)
const;
612 reference operator*()
const;
613 pointer operator->()
const;
615 friend class quantiles_sketch<T, C, A>;
616 using Level = std::vector<T, A>;
617 using AllocLevel =
typename std::allocator_traits<A>::template rebind_alloc<Level>;
619 std::vector<Level, AllocLevel> levels_;
623 uint64_t bit_pattern_;
626 const_iterator(
const Level& base_buffer,
const std::vector<Level, AllocLevel>& levels, uint16_t k, uint64_t n,
bool is_end);
631 #include "quantiles_sketch_impl.hpp"
This is a stochastic streaming sketch that enables near-real time analysis of the approximate distrib...
Definition: quantiles_sketch.hpp:153
Comparator get_comparator() const
Returns an instance of the comparator for this sketch.
Definition: quantiles_sketch_impl.hpp:681
quantiles_sorted_view< T, Comparator, Allocator > get_sorted_view() const
Gets the sorted view of this sketch.
Definition: quantiles_sketch_impl.hpp:723
quantiles_sketch(uint16_t k=quantiles_constants::DEFAULT_K, const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
Constructor.
quantiles_sketch & operator=(const quantiles_sketch &other)
Copy assignment.
Definition: quantiles_sketch_impl.hpp:89
vector_bytes serialize(unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const
This method serializes the sketch as a vector of bytes.
vector_double get_CDF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analo...
Definition: quantiles_sketch_impl.hpp:769
uint32_t get_num_retained() const
Returns the number of retained items (samples) in the sketch.
Definition: quantiles_sketch_impl.hpp:664
double get_normalized_rank_error(bool is_pmf) const
Gets the normalized rank error for this sketch.
Definition: quantiles_sketch_impl.hpp:711
vector_double get_PMF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of sp...
Definition: quantiles_sketch_impl.hpp:762
void merge(FwdSk &&other)
Merges another sketch into this one.
Definition: quantiles_sketch_impl.hpp:235
void update(FwdT &&item)
Updates this sketch with the given data item.
Definition: quantiles_sketch_impl.hpp:212
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition: quantiles_sketch_impl.hpp:277
string< Allocator > to_string(bool print_levels=false, bool print_items=false) const
Prints a summary of the sketch.
Definition: quantiles_sketch_impl.hpp:595
uint16_t get_k() const
Returns configured parameter k.
Definition: quantiles_sketch_impl.hpp:644
bool is_empty() const
Returns true if this sketch is empty.
Definition: quantiles_sketch_impl.hpp:654
const T & get_max_item() const
Returns the max item of the stream.
Definition: quantiles_sketch_impl.hpp:675
static quantiles_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
double get_rank(const T &item, bool inclusive=true) const
Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
Definition: quantiles_sketch_impl.hpp:755
static quantiles_sketch deserialize(const void *bytes, size_t size, const SerDe &sd=SerDe(), const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given array of bytes.
allocator_type get_allocator() const
Returns the allocator for this sketch.
Definition: quantiles_sketch_impl.hpp:686
quantile_return_type get_quantile(double rank, bool inclusive=true) const
Returns an approximation to the data item associated with the given rank of a hypothetical sorted ver...
Definition: quantiles_sketch_impl.hpp:744
const_iterator begin() const
Iterator pointing to the first item in the sketch.
Definition: quantiles_sketch_impl.hpp:866
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the sketch.
Definition: quantiles_sketch_impl.hpp:693
bool is_estimation_mode() const
Returns true if this sketch is in estimation mode.
Definition: quantiles_sketch_impl.hpp:659
quantiles_sketch(const quantiles_sketch< From, FC, FA > &other, const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
Type converting constructor.
const T & get_min_item() const
Returns the min item of the stream.
Definition: quantiles_sketch_impl.hpp:669
const_iterator end() const
Iterator pointing to the past-the-end item in the sketch.
Definition: quantiles_sketch_impl.hpp:871
uint64_t get_n() const
Returns the length of the input stream.
Definition: quantiles_sketch_impl.hpp:649
Sorted view for quantiles sketches (REQ, KLL and Quantiles)
Definition: quantiles_sorted_view.hpp:38
typename std::conditional< std::is_arithmetic< T >::value, T, const T & >::type quantile_return_type
Quantile return type.
Definition: quantiles_sorted_view.hpp:93
const uint16_t MAX_K
maximum value of parameter K
Definition: quantiles_sketch.hpp:41
const uint16_t MIN_K
minimum value of parameter K
Definition: quantiles_sketch.hpp:39
const uint16_t DEFAULT_K
default value of parameter K
Definition: quantiles_sketch.hpp:37
DataSketches namespace.
Definition: binomial_bounds.hpp:38
Interface for serializing and deserializing items.
Definition: serde.hpp:34