20#ifndef KLL_SKETCH_HPP_
21#define KLL_SKETCH_HPP_
26#include "common_defs.hpp"
28#include "quantiles_sorted_view.hpp"
29#include "optional.hpp"
34namespace kll_constants {
37 const uint8_t DEFAULT_M = 8;
39 const uint16_t
MIN_K = DEFAULT_M;
41 const uint16_t
MAX_K = (1 << 16) - 1;
168 typename C = std::less<T>,
169 typename A = std::allocator<T>
173 using value_type = T;
174 using comparator = C;
175 using allocator_type = A;
176 using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
177 using vector_double =
typename quantiles_sorted_view<T, C, A>::vector_double;
228 template<
typename TT,
typename CC,
typename AA>
237 template<
typename FwdT>
246 template<
typename FwdSk>
247 void merge(FwdSk&& other);
259 uint16_t
get_k()
const;
265 uint64_t
get_n()
const;
333 double get_rank(
const T& item,
bool inclusive =
true)
const;
357 vector_double
get_PMF(
const T* split_points, uint32_t size,
bool inclusive =
true)
const;
384 vector_double
get_CDF(
const T* split_points, uint32_t size,
bool inclusive =
true)
const;
401 template<
typename TT = T,
typename SerDe = serde<T>,
typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type = 0>
410 template<
typename TT = T,
typename SerDe = serde<T>,
typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type = 0>
423 template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type = 0>
437 template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type = 0>
445 template<
typename SerDe = serde<T>>
446 void serialize(std::ostream& os,
const SerDe& sd = SerDe())
const;
450 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
461 template<
typename SerDe = serde<T>>
462 vector_bytes
serialize(
unsigned header_size_bytes = 0,
const SerDe& sd = SerDe())
const;
472 template<
typename SerDe = serde<T>>
474 const C& comparator = C(),
const A& allocator = A());
485 template<
typename SerDe = serde<T>>
487 const C& comparator = C(),
const A& allocator = A());
503 string<A>
to_string(
bool print_levels =
false,
bool print_items =
false)
const;
505 class const_iterator;
512 const_iterator
begin()
const;
520 const_iterator
end()
const;
539 static const size_t EMPTY_SIZE_BYTES = 8;
540 static const size_t DATA_START_SINGLE_ITEM = 8;
541 static const size_t DATA_START = 20;
543 static const uint8_t SERIAL_VERSION_1 = 1;
544 static const uint8_t SERIAL_VERSION_2 = 2;
545 static const uint8_t FAMILY = 15;
547 enum flags { IS_EMPTY, IS_LEVEL_ZERO_SORTED, IS_SINGLE_ITEM };
549 static const uint8_t PREAMBLE_INTS_SHORT = 2;
550 static const uint8_t PREAMBLE_INTS_FULL = 5;
558 bool is_level_zero_sorted_;
562 uint32_t items_size_;
563 optional<T> min_item_;
564 optional<T> max_item_;
569 kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
570 std::unique_ptr<T, items_deleter> items, uint32_t items_size, optional<T>&& min_item,
571 optional<T>&& max_item,
bool is_level_zero_sorted,
const C& comparator);
574 inline void update_min_max(
const T& item);
575 inline uint32_t internal_update();
579 void compress_while_updating(
void);
581 uint8_t find_level_to_compact()
const;
582 void add_empty_top_level_to_completely_full_sketch();
583 void sort_level_zero();
585 template<
typename O>
void merge_higher_levels(O&& other, uint64_t final_n);
587 template<
typename FwdSk>
588 void populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels);
590 void assert_correct_total_weight()
const;
591 uint32_t safe_level_size(uint8_t level)
const;
592 uint32_t get_num_retained_above_level_zero()
const;
594 static void check_m(uint8_t m);
595 static void check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte);
596 static void check_serial_version(uint8_t serial_version);
597 static void check_family_id(uint8_t family_id);
599 void check_sorting()
const;
601 template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value,
int>::type = 0>
602 static inline bool check_update_item(TT item) {
603 return !std::isnan(item);
606 template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value,
int>::type = 0>
607 static inline bool check_update_item(TT) {
612 template<
typename TT,
typename CC,
typename AA>
friend class kll_sketch;
614 void setup_sorted_view()
const;
615 void reset_sorted_view();
618template<
typename T,
typename C,
typename A>
619class kll_sketch<T, C, A>::const_iterator {
621 using iterator_category = std::input_iterator_tag;
622 using value_type = std::pair<const T&, const uint64_t>;
623 using difference_type = void;
624 using pointer =
const return_value_holder<value_type>;
625 using reference =
const value_type;
627 friend class kll_sketch<T, C, A>;
628 const_iterator& operator++();
629 const_iterator& operator++(
int);
630 bool operator==(
const const_iterator& other)
const;
631 bool operator!=(
const const_iterator& other)
const;
632 reference operator*()
const;
633 pointer operator->()
const;
636 const uint32_t* levels;
637 const uint8_t num_levels;
641 const_iterator(
const T* items,
const uint32_t* levels,
const uint8_t num_levels);
646#include "kll_sketch_impl.hpp"
Implementation of a very compact quantiles sketch with lazy compaction scheme and nearly optimal accu...
Definition kll_sketch.hpp:171
C get_comparator() const
Returns an instance of the comparator for this sketch.
Definition kll_sketch_impl.hpp:272
quantiles_sorted_view< T, C, A > get_sorted_view() const
Gets the sorted view of this sketch.
Definition kll_sketch_impl.hpp:769
kll_sketch & operator=(const kll_sketch &other)
Copy assignment.
Definition kll_sketch_impl.hpp:103
vector_bytes serialize(unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const
This method serializes the sketch as a vector of bytes.
vector_double get_CDF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analo...
Definition kll_sketch_impl.hpp:296
const_iterator begin() const
Iterator pointing to the first item in the sketch.
Definition kll_sketch_impl.hpp:965
uint32_t get_num_retained() const
Returns the number of retained items (samples) in the sketch.
Definition kll_sketch_impl.hpp:250
static kll_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const C &comparator=C(), const A &allocator=A())
This method deserializes a sketch from a given stream.
vector_double get_PMF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of sp...
Definition kll_sketch_impl.hpp:289
void merge(FwdSk &&other)
Merges another sketch into this one.
Definition kll_sketch_impl.hpp:210
T get_max_item() const
Returns the max item of the stream.
Definition kll_sketch_impl.hpp:266
const_iterator end() const
Iterator pointing to the past-the-end item in the sketch.
Definition kll_sketch_impl.hpp:970
void update(FwdT &&item)
Updates this sketch with the given data item.
Definition kll_sketch_impl.hpp:181
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition kll_sketch_impl.hpp:368
string< A > to_string(bool print_levels=false, bool print_items=false) const
Prints a summary of the sketch.
Definition kll_sketch_impl.hpp:913
uint16_t get_k() const
Returns configured parameter k.
Definition kll_sketch_impl.hpp:240
bool is_empty() const
Returns true if this sketch is empty.
Definition kll_sketch_impl.hpp:235
double get_rank(const T &item, bool inclusive=true) const
Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
Definition kll_sketch_impl.hpp:282
A get_allocator() const
Returns an instance of the allocator for this sketch.
Definition kll_sketch_impl.hpp:277
T get_min_item() const
Returns the min item of the stream.
Definition kll_sketch_impl.hpp:260
typename quantiles_sorted_view< T, C, A >::quantile_return_type quantile_return_type
Quantile return type.
Definition kll_sketch.hpp:183
quantile_return_type get_quantile(double rank, bool inclusive=true) const
Returns an item from the sketch that is the best approximation to an item from the original stream wi...
Definition kll_sketch_impl.hpp:303
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the sketch.
Definition kll_sketch_impl.hpp:321
static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n)
Returns upper bound on the serialized size of a sketch given a parameter k and stream length.
Definition kll_sketch_impl.hpp:349
double get_normalized_rank_error(bool pmf) const
Gets the approximate rank error of this sketch normalized as a fraction between zero and one.
Definition kll_sketch_impl.hpp:314
bool is_estimation_mode() const
Returns true if this sketch is in estimation mode.
Definition kll_sketch_impl.hpp:255
uint64_t get_n() const
Returns the length of the input stream.
Definition kll_sketch_impl.hpp:245
static kll_sketch deserialize(const void *bytes, size_t size, const SerDe &sd=SerDe(), const C &comparator=C(), const A &allocator=A())
This method deserializes a sketch from a given array of bytes.
Sorted view for quantiles sketches (REQ, KLL and Quantiles)
Definition quantiles_sorted_view.hpp:38
typename std::conditional< std::is_arithmetic< T >::value, T, const T & >::type quantile_return_type
Quantile return type.
Definition quantiles_sorted_view.hpp:93
const uint16_t MAX_K
max value of parameter K
Definition kll_sketch.hpp:41
const uint16_t MIN_K
min value of parameter K
Definition kll_sketch.hpp:39
const uint16_t DEFAULT_K
default value of parameter K
Definition kll_sketch.hpp:36
DataSketches namespace.
Definition binomial_bounds.hpp:38