20 #ifndef COUNT_MIN_HPP_
21 #define COUNT_MIN_HPP_
24 #include "common_defs.hpp"
36 typename Allocator = std::allocator<W>>
38 static_assert(std::is_arithmetic<W>::value,
"Arithmetic type expected");
40 using allocator_type = Allocator;
41 using const_iterator =
typename std::vector<W, Allocator>::const_iterator;
53 count_min_sketch(uint8_t num_hashes, uint32_t num_buckets, uint64_t seed = DEFAULT_SEED,
const Allocator& allocator = Allocator());
214 void update(
const void* item,
size_t size, W weight);
221 void update(uint64_t item, W weight = 1);
228 void update(int64_t item, W weight = 1);
235 void update(
const std::string& item, W weight = 1);
262 const_iterator
begin()
const;
270 const_iterator
end()
const;
331 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>>;
340 vector_bytes
serialize(
unsigned header_size_bytes = 0)
const;
367 Allocator _allocator;
369 uint32_t _num_buckets;
370 std::vector<W, Allocator> _sketch_array;
373 std::vector<uint64_t> hash_seeds;
375 enum flags {IS_EMPTY};
376 static const uint8_t PREAMBLE_LONGS_SHORT = 2;
377 static const uint8_t PREAMBLE_LONGS_FULL = 3;
378 static const uint8_t SERIAL_VERSION_1 = 1;
379 static const uint8_t FAMILY_ID = 18;
380 static const uint8_t NULL_8 = 0;
381 static const uint32_t NULL_32 = 0;
389 static void check_header_validity(uint8_t preamble_longs, uint8_t serial_version, uint8_t family_id, uint8_t flags_byte);
397 std::vector<uint64_t> get_hashes(
const void* item,
size_t size)
const;
403 #include "count_min_impl.hpp"
C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
Definition: count_min.hpp:37
static count_min_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition: count_min_impl.hpp:270
static uint32_t suggest_num_buckets(double relative_error)
Suggests the number of buckets required to achieve the given relative error.
Definition: count_min_impl.hpp:86
const_iterator end() const
Iterator pointing to the past-the-end item in the sketch.
Definition: count_min_impl.hpp:265
static uint8_t suggest_num_hashes(double confidence)
Suggests the number of hash functions required to achieve the given confidence.
Definition: count_min_impl.hpp:98
double get_relative_error() const
Definition: count_min_impl.hpp:76
uint32_t get_num_buckets() const
Definition: count_min_impl.hpp:66
bool is_empty() const
Returns true if this sketch is empty.
Definition: count_min_impl.hpp:443
W get_total_weight() const
Definition: count_min_impl.hpp:81
uint8_t get_num_hashes() const
Definition: count_min_impl.hpp:61
allocator_type get_allocator() const
W get_upper_bound(const void *item, size_t size) const
Query the sketch for the upper bound of a given item.
Definition: count_min_impl.hpp:209
W get_lower_bound(const void *item, size_t size) const
Query the sketch for the lower bound of a given item.
Definition: count_min_impl.hpp:226
W get_estimate(uint64_t item) const
Specific get_estimate function for uint64_t type see generic get_estimate function.
Definition: count_min_impl.hpp:143
count_min_sketch(uint8_t num_hashes, uint32_t num_buckets, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
Creates an instance of the sketch given parameters _num_hashes, _num_buckets and hash seed,...
Definition: count_min_impl.hpp:35
uint64_t get_seed() const
Definition: count_min_impl.hpp:71
void merge(const count_min_sketch &other_sketch)
Merges another count_min_sketch into this count_min_sketch.
Definition: count_min_impl.hpp:231
void update(const void *item, size_t size, W weight)
Update this sketch with given data of any type.
Definition: count_min_impl.hpp:184
string< Allocator > to_string() const
Returns a string describing the sketch.
Definition: count_min_impl.hpp:448
size_t get_serialized_size_bytes() const
Computes size needed to serialize the current state of the sketch.
Definition: count_min_impl.hpp:341
const_iterator begin() const
Iterator pointing to the first item in the sketch.
Definition: count_min_impl.hpp:260
static count_min_sketch deserialize(const void *bytes, size_t size, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given array of bytes.
DataSketches namespace.
Definition: binomial_bounds.hpp:38