20 #ifndef FREQUENT_ITEMS_SKETCH_HPP_
21 #define FREQUENT_ITEMS_SKETCH_HPP_
27 #include <type_traits>
29 #include "reverse_purge_hash_map.hpp"
30 #include "common_defs.hpp"
50 typename W = uint64_t,
51 typename H = std::hash<T>,
52 typename E = std::equal_to<T>,
53 typename A = std::allocator<T>
56 static_assert(std::is_arithmetic<W>::value,
"Arithmetic type expected");
59 static const uint8_t LG_MIN_MAP_SIZE = 3;
73 const E& equal = E(),
const A& allocator = A());
81 void update(
const T& item, W weight = 1);
89 void update(T&& item, W weight = 1);
170 static double get_epsilon(uint8_t lg_max_map_size);
179 static double get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight);
182 using vector_row =
typename std::vector<row, typename std::allocator_traits<A>::template rebind_alloc<row>>;
237 template<
typename SerDe = serde<T>>
245 template<
typename SerDe = serde<T>>
246 void serialize(std::ostream& os,
const SerDe& sd = SerDe())
const;
250 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
261 template<
typename SerDe = serde<T>>
262 vector_bytes
serialize(
unsigned header_size_bytes = 0,
const SerDe& sd = SerDe())
const;
272 template<
typename SerDe = serde<T>>
274 const E& equal = E(),
const A& allocator = A());
285 template<
typename SerDe = serde<T>>
287 const E& equal = E(),
const A& allocator = A());
293 string<A>
to_string(
bool print_items =
false)
const;
296 static const uint8_t SERIAL_VERSION = 1;
297 static const uint8_t FAMILY_ID = 10;
298 static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
299 static const uint8_t PREAMBLE_LONGS_NONEMPTY = 4;
300 static constexpr
double EPSILON_FACTOR = 3.5;
303 enum flags { IS_EMPTY_1 = 0, IS_EMPTY_2 = 2 };
306 reverse_purge_hash_map<T, W, H, E, A> map;
307 static void check_preamble_longs(uint8_t preamble_longs,
bool is_empty);
308 static void check_serial_version(uint8_t serial_version);
309 static void check_family_id(uint8_t family_id);
310 static void check_size(uint8_t lg_cur_size, uint8_t lg_max_size);
313 template<typename WW = W, typename std::enable_if<std::is_integral<WW>::value && std::is_signed<WW>::value,
int>::type = 0>
314 static inline void check_weight(WW weight);
317 template<typename WW = W, typename std::enable_if<std::is_integral<WW>::value && std::is_unsigned<WW>::value,
int>::type = 0>
318 static inline void check_weight(WW weight);
321 template<typename WW = W, typename std::enable_if<std::is_floating_point<WW>::value,
int>::type = 0>
322 static inline void check_weight(WW weight);
329 template<
typename T,
typename W,
typename H,
typename E,
typename A>
332 row(
const T* item, W weight, W offset):
333 item(item), weight(weight), offset(offset) {}
350 #include "frequent_items_sketch_impl.hpp"
Row in the output from get_frequent_items.
Definition: frequent_items_sketch.hpp:330
W get_upper_bound() const
Definition: frequent_items_sketch.hpp:341
W get_estimate() const
Definition: frequent_items_sketch.hpp:337
W get_lower_bound() const
Definition: frequent_items_sketch.hpp:339
const T & get_item() const
Definition: frequent_items_sketch.hpp:335
Frequent Items sketch.
Definition: frequent_items_sketch.hpp:55
vector_bytes serialize(unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const
This method serializes the sketch as a vector of bytes.
frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size=LG_MIN_MAP_SIZE, const E &equal=E(), const A &allocator=A())
Construct this sketch with parameters lg_max_map_size and lg_start_map_size.
Definition: frequent_items_sketch_impl.hpp:37
W get_upper_bound(const T &item) const
Returns the guaranteed upper bound weight (frequency) of the given item.
Definition: frequent_items_sketch_impl.hpp:118
void merge(const frequent_items_sketch &other)
This function merges the other sketch into this one.
Definition: frequent_items_sketch_impl.hpp:68
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition: frequent_items_sketch_impl.hpp:165
vector_row get_frequent_items(frequent_items_error_type err_type) const
Returns an array of rows that include frequent items, estimates, upper and lower bounds given an erro...
Definition: frequent_items_sketch_impl.hpp:144
bool is_empty() const
Definition: frequent_items_sketch_impl.hpp:90
string< A > to_string(bool print_items=false) const
Returns a human readable summary of this sketch.
Definition: frequent_items_sketch_impl.hpp:433
void update(const T &item, W weight=1)
Update this sketch with an item and a positive weight (frequency count).
Definition: frequent_items_sketch_impl.hpp:52
static double get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight)
Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_st...
Definition: frequent_items_sketch_impl.hpp:138
W get_total_weight() const
Returns the sum of the weights (frequencies) in the stream seen so far by the sketch.
Definition: frequent_items_sketch_impl.hpp:100
static frequent_items_sketch deserialize(const void *bytes, size_t size, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A())
This method deserializes a sketch from a given array of bytes.
double get_epsilon() const
Returns epsilon value of this sketch.
Definition: frequent_items_sketch_impl.hpp:128
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the sketch.
Definition: frequent_items_sketch_impl.hpp:212
W get_maximum_error() const
Definition: frequent_items_sketch_impl.hpp:123
W get_estimate(const T &item) const
Returns the estimate of the weight (frequency) of the given item.
Definition: frequent_items_sketch_impl.hpp:105
static frequent_items_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A())
This method deserializes a sketch from a given stream.
W get_lower_bound(const T &item) const
Returns the guaranteed lower bound weight (frequency) of the given item.
Definition: frequent_items_sketch_impl.hpp:113
uint32_t get_num_active_items() const
Definition: frequent_items_sketch_impl.hpp:95
DataSketches namespace.
Definition: binomial_bounds.hpp:38
frequent_items_error_type
Frequent items error type.
Definition: frequent_items_sketch.hpp:36
@ NO_FALSE_NEGATIVES
include an item in the result list if get_upper_bound(item) > threshold
Definition: frequent_items_sketch.hpp:38
@ NO_FALSE_POSITIVES
include an item in the result list if get_lower_bound(item) > threshold
Definition: frequent_items_sketch.hpp:37