20 #ifndef CPC_SKETCH_HPP_
21 #define CPC_SKETCH_HPP_
28 #include "u32_table.hpp"
29 #include "cpc_common.hpp"
30 #include "cpc_compressor.hpp"
31 #include "cpc_confidence.hpp"
32 #include "common_defs.hpp"
37 template<
typename A>
class cpc_sketch_alloc;
38 template<
typename A>
class cpc_union_alloc;
51 template<
typename A>
void cpc_init();
66 using allocator_type = A;
67 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
68 using vector_u64 = std::vector<uint64_t, typename std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
112 void update(
const std::string& value);
118 void update(uint64_t value);
124 void update(int64_t value);
131 void update(uint32_t value);
138 void update(int32_t value);
145 void update(uint16_t value);
152 void update(int16_t value);
159 void update(uint8_t value);
166 void update(int8_t value);
173 void update(
double value);
195 void update(
const void* value,
size_t size);
217 vector_bytes
serialize(
unsigned header_size_bytes = 0)
const;
250 uint32_t get_num_coupons()
const;
254 bool validate()
const;
257 static const uint8_t SERIAL_VERSION = 1;
258 static const uint8_t FAMILY = 16;
260 enum flags { IS_BIG_ENDIAN, IS_COMPRESSED, HAS_HIP, HAS_TABLE, HAS_WINDOW };
276 uint32_t num_coupons;
278 u32_table<A> surprising_value_table;
279 vector_bytes sliding_window;
280 uint8_t window_offset;
281 uint8_t first_interesting_column;
284 double hip_est_accum;
287 cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t first_interesting_column, u32_table<A>&& table,
288 vector_bytes&& window,
bool has_hip,
double kxp,
double hip_est_accum, uint64_t seed);
290 inline void row_col_update(uint32_t row_col);
291 inline void update_sparse(uint32_t row_col);
292 inline void update_windowed(uint32_t row_col);
293 inline void update_hip(uint32_t row_col);
294 void promote_sparse_to_windowed();
296 void refresh_kxp(
const uint64_t* bit_matrix);
302 double get_hip_estimate()
const;
303 double get_icon_estimate()
const;
305 inline flavor determine_flavor()
const;
306 static inline flavor determine_flavor(uint8_t lg_k, uint64_t c);
308 static inline uint8_t determine_correct_offset(uint8_t lg_k, uint64_t c);
311 vector_u64 build_bit_matrix()
const;
313 static uint8_t get_preamble_ints(uint32_t num_coupons,
bool has_hip,
bool has_table,
bool has_window);
314 inline void write_hip(std::ostream& os)
const;
315 inline size_t copy_hip_to_mem(
void* dst)
const;
317 static void check_lg_k(uint8_t lg_k);
319 friend cpc_compressor<A>;
325 #include "cpc_sketch_impl.hpp"
High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch.
Definition: cpc_sketch.hpp:64
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition: cpc_sketch_impl.hpp:408
double get_estimate() const
Definition: cpc_sketch_impl.hpp:75
cpc_sketch_alloc(uint8_t lg_k=cpc_constants::DEFAULT_LG_K, uint64_t seed=DEFAULT_SEED, const A &allocator=A())
Creates an instance of the sketch given the lg_k parameter and hash seed.
Definition: cpc_sketch_impl.hpp:44
static cpc_sketch_alloc< A > deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const A &allocator=A())
This method deserializes a sketch from a given stream.
Definition: cpc_sketch_impl.hpp:519
void update(const std::string &value)
Update this sketch with a given string.
Definition: cpc_sketch_impl.hpp:109
bool is_empty() const
Definition: cpc_sketch_impl.hpp:70
static size_t get_max_serialized_size_bytes(uint8_t lg_k)
The actual size of a compressed CPC sketch has a small random variance, but the following empirically...
Definition: cpc_sketch_impl.hpp:713
uint8_t get_lg_k() const
Definition: cpc_sketch_impl.hpp:65
A get_allocator() const
Definition: cpc_sketch_impl.hpp:60
double get_lower_bound(unsigned kappa) const
Returns the approximate lower error bound given a parameter kappa (1, 2 or 3).
Definition: cpc_sketch_impl.hpp:91
double get_upper_bound(unsigned kappa) const
Returns the approximate upper error bound given a parameter kappa (1, 2 or 3).
Definition: cpc_sketch_impl.hpp:100
string< A > to_string() const
Returns a human-readable summary of this sketch.
Definition: cpc_sketch_impl.hpp:383
High performance C++ implementation of Compressed Probabilistic Counting (CPC) Union.
Definition: cpc_union.hpp:40
const uint8_t DEFAULT_LG_K
default log2 of K
Definition: cpc_common.hpp:36
DataSketches namespace.
Definition: binomial_bounds.hpp:38
void cpc_init()
Allocation and initialization of global decompression (decoding) tables.
Definition: cpc_sketch_impl.hpp:39