20#ifndef FREQUENT_ITEMS_SKETCH_HPP_
21#define FREQUENT_ITEMS_SKETCH_HPP_
29#include "reverse_purge_hash_map.hpp"
30#include "common_defs.hpp"
55 typename W = uint64_t,
56 typename H = std::hash<T>,
57 typename E = std::equal_to<T>,
58 typename A = std::allocator<T>
61 static_assert(std::is_arithmetic<W>::value,
"Arithmetic type expected");
64 static const uint8_t LG_MIN_MAP_SIZE = 3;
78 const E& equal = E(),
const A& allocator = A());
88 void update(
const T& item, W weight = 1);
98 void update(T&& item, W weight = 1);
183 static double get_epsilon(uint8_t lg_max_map_size);
192 static double get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight);
195 using vector_row =
typename std::vector<row, typename std::allocator_traits<A>::template rebind_alloc<row>>;
250 template<
typename SerDe = serde<T>>
258 template<
typename SerDe = serde<T>>
259 void serialize(std::ostream& os,
const SerDe& sd = SerDe())
const;
263 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
274 template<
typename SerDe = serde<T>>
275 vector_bytes
serialize(
unsigned header_size_bytes = 0,
const SerDe& sd = SerDe())
const;
285 template<
typename SerDe = serde<T>>
287 const E& equal = E(),
const A& allocator = A());
298 template<
typename SerDe = serde<T>>
300 const E& equal = E(),
const A& allocator = A());
306 string<A>
to_string(
bool print_items =
false)
const;
309 static const uint8_t SERIAL_VERSION = 1;
310 static const uint8_t FAMILY_ID = 10;
311 static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
312 static const uint8_t PREAMBLE_LONGS_NONEMPTY = 4;
313 static constexpr double EPSILON_FACTOR = 3.5;
316 enum flags { IS_EMPTY_1 = 0, IS_EMPTY_2 = 2 };
319 reverse_purge_hash_map<T, W, H, E, A> map;
320 static void check_preamble_longs(uint8_t preamble_longs,
bool is_empty);
321 static void check_serial_version(uint8_t serial_version);
322 static void check_family_id(uint8_t family_id);
323 static void check_size(uint8_t lg_cur_size, uint8_t lg_max_size);
326 template<typename WW = W, typename std::enable_if<std::is_integral<WW>::value && std::is_signed<WW>::value,
int>::type = 0>
327 static inline void check_weight(WW weight);
330 template<typename WW = W, typename std::enable_if<std::is_integral<WW>::value && std::is_unsigned<WW>::value,
int>::type = 0>
331 static inline void check_weight(WW weight);
334 template<typename WW = W, typename std::enable_if<std::is_floating_point<WW>::value,
int>::type = 0>
335 static inline void check_weight(WW weight);
342template<
typename T,
typename W,
typename H,
typename E,
typename A>
345 row(
const T* item, W weight, W offset):
346 item(item), weight(weight), offset(offset) {}
363#include "frequent_items_sketch_impl.hpp"
Row in the output from get_frequent_items.
Definition frequent_items_sketch.hpp:343
W get_upper_bound() const
Definition frequent_items_sketch.hpp:354
W get_estimate() const
Definition frequent_items_sketch.hpp:350
const T & get_item() const
Definition frequent_items_sketch.hpp:348
W get_lower_bound() const
Definition frequent_items_sketch.hpp:352
Frequent Items sketch.
Definition frequent_items_sketch.hpp:60
vector_bytes serialize(unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const
This method serializes the sketch as a vector of bytes.
W get_upper_bound(const T &item) const
Returns the guaranteed upper bound weight (frequency) of the given item.
Definition frequent_items_sketch_impl.hpp:118
void merge(const frequent_items_sketch &other)
This function merges the other sketch into this one.
Definition frequent_items_sketch_impl.hpp:68
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition frequent_items_sketch_impl.hpp:165
vector_row get_frequent_items(frequent_items_error_type err_type) const
Returns an array of rows that include frequent items, estimates, upper and lower bounds given an erro...
Definition frequent_items_sketch_impl.hpp:144
bool is_empty() const
Definition frequent_items_sketch_impl.hpp:90
string< A > to_string(bool print_items=false) const
Returns a human readable summary of this sketch.
Definition frequent_items_sketch_impl.hpp:432
void update(const T &item, W weight=1)
Update this sketch with an item and a positive weight (frequency count).
Definition frequent_items_sketch_impl.hpp:52
static double get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight)
Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_st...
Definition frequent_items_sketch_impl.hpp:138
W get_total_weight() const
Returns the sum of the weights (frequencies) in the stream seen so far by the sketch.
Definition frequent_items_sketch_impl.hpp:100
static frequent_items_sketch deserialize(const void *bytes, size_t size, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A())
This method deserializes a sketch from a given array of bytes.
double get_epsilon() const
Returns epsilon value of this sketch.
Definition frequent_items_sketch_impl.hpp:128
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the sketch.
Definition frequent_items_sketch_impl.hpp:212
W get_maximum_error() const
Definition frequent_items_sketch_impl.hpp:123
W get_estimate(const T &item) const
Returns the estimate of the weight (frequency) of the given item.
Definition frequent_items_sketch_impl.hpp:105
static frequent_items_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A())
This method deserializes a sketch from a given stream.
W get_lower_bound(const T &item) const
Returns the guaranteed lower bound weight (frequency) of the given item.
Definition frequent_items_sketch_impl.hpp:113
uint32_t get_num_active_items() const
Definition frequent_items_sketch_impl.hpp:95
DataSketches namespace.
Definition binomial_bounds.hpp:38
frequent_items_error_type
Frequent items error type.
Definition frequent_items_sketch.hpp:36
@ NO_FALSE_NEGATIVES
include an item in the result list if get_upper_bound(item) > threshold
Definition frequent_items_sketch.hpp:38
@ NO_FALSE_POSITIVES
include an item in the result list if get_lower_bound(item) > threshold
Definition frequent_items_sketch.hpp:37