20#ifndef CPC_SKETCH_IMPL_HPP_
21#define CPC_SKETCH_IMPL_HPP_
28#include "cpc_confidence.hpp"
29#include "kxp_byte_lookup.hpp"
30#include "inv_pow2_table.hpp"
31#include "cpc_util.hpp"
32#include "icon_estimator.hpp"
34#include "count_zeros.hpp"
49surprising_value_table(2, 6 + lg_k, allocator),
50sliding_window(allocator),
52first_interesting_column(0),
61 return sliding_window.get_allocator();
71 return num_coupons == 0;
76 if (!was_merged) {
return get_hip_estimate(); }
77 return get_icon_estimate();
86double cpc_sketch_alloc<A>::get_icon_estimate()
const {
87 return compute_icon_estimate(lg_k, num_coupons);
92 if (kappa < 1 || kappa > 3) {
93 throw std::invalid_argument(
"kappa must be 1, 2 or 3");
95 if (!was_merged) {
return get_hip_confidence_lb<A>(*
this, kappa); }
96 return get_icon_confidence_lb<A>(*
this, kappa);
101 if (kappa < 1 || kappa > 3) {
102 throw std::invalid_argument(
"kappa must be 1, 2 or 3");
104 if (!was_merged) {
return get_hip_confidence_ub<A>(*
this, kappa); }
105 return get_icon_confidence_ub<A>(*
this, kappa);
110 if (value.empty()) {
return; }
111 update(value.c_str(), value.length());
116 update(&value,
sizeof(value));
121 update(&value,
sizeof(value));
126 update(
static_cast<int32_t
>(value));
131 update(
static_cast<int64_t
>(value));
136 update(
static_cast<int16_t
>(value));
141 update(
static_cast<int64_t
>(value));
146 update(
static_cast<int8_t
>(value));
151 update(
static_cast<int64_t
>(value));
161 ldu.double_value = 0.0;
162 }
else if (std::isnan(value)) {
163 ldu.long_value = 0x7ff8000000000000L;
165 ldu.double_value = value;
167 update(&ldu,
sizeof(ldu));
172 update(
static_cast<double>(value));
175static inline uint32_t row_col_from_two_hashes(uint64_t hash0, uint64_t hash1, uint8_t lg_k) {
176 if (lg_k > 26) {
throw std::logic_error(
"lg_k > 26"); }
177 const uint32_t k = 1 << lg_k;
178 uint8_t col = count_leading_zeros_in_u64(hash1);
179 if (col > 63) { col = 63; }
180 const uint32_t row = hash0 & (k - 1);
181 uint32_t row_col = (row << 6) | col;
184 if (row_col == UINT32_MAX) { row_col ^= 1 << 6; }
191 MurmurHash3_x64_128(value, size, seed, hashes);
192 row_col_update(row_col_from_two_hashes(hashes.h1, hashes.h2, lg_k));
197 const uint8_t col = row_col & 63;
198 if (col < first_interesting_column) {
return; }
200 if (sliding_window.size() == 0) {
201 update_sparse(row_col);
203 update_windowed(row_col);
208void cpc_sketch_alloc<A>::update_sparse(uint32_t row_col) {
209 const uint32_t k = 1 << lg_k;
210 const uint64_t c32pre =
static_cast<uint64_t
>(num_coupons) << 5;
211 if (c32pre >= 3 * k) {
throw std::logic_error(
"c32pre >= 3 * k"); }
212 bool is_novel = surprising_value_table.maybe_insert(row_col);
216 const uint64_t c32post =
static_cast<uint64_t
>(num_coupons) << 5;
217 if (c32post >= 3 * k) { promote_sparse_to_windowed(); }
223void cpc_sketch_alloc<A>::update_windowed(uint32_t row_col) {
224 if (window_offset > 56) {
throw std::logic_error(
"wrong window offset"); }
225 const uint32_t k = 1 << lg_k;
226 const uint64_t c32pre =
static_cast<uint64_t
>(num_coupons) << 5;
227 if (c32pre < 3 * k) {
throw std::logic_error(
"c32pre < 3 * k"); }
228 const uint64_t c8pre =
static_cast<uint64_t
>(num_coupons) << 3;
229 const uint64_t w8pre =
static_cast<uint64_t
>(window_offset) << 3;
230 if (c8pre >= (27 + w8pre) * k) {
throw std::logic_error(
"c8pre is wrong"); }
232 bool is_novel =
false;
233 const uint8_t col = row_col & 63;
235 if (col < window_offset) {
236 is_novel = surprising_value_table.maybe_delete(row_col);
237 }
else if (col < window_offset + 8) {
238 if (col < window_offset) {
throw std::logic_error(
"col < window_offset"); }
239 const uint32_t row = row_col >> 6;
240 const uint8_t old_bits = sliding_window[row];
241 const uint8_t new_bits = old_bits | (1 << (col - window_offset));
242 if (new_bits != old_bits) {
243 sliding_window[row] = new_bits;
247 if (col < window_offset + 8) {
throw std::logic_error(
"col < window_offset + 8"); }
248 is_novel = surprising_value_table.maybe_insert(row_col);
254 const uint64_t c8post =
static_cast<uint64_t
>(num_coupons) << 3;
255 if (c8post >= (27 + w8pre) * k) {
257 if (window_offset < 1 || window_offset > 56) {
throw std::logic_error(
"wrong window offset"); }
258 const uint64_t w8post =
static_cast<uint64_t
>(window_offset) << 3;
259 if (c8post >= (27 + w8post) * k) {
throw std::logic_error(
"c8pre is wrong"); }
266void cpc_sketch_alloc<A>::update_hip(uint32_t row_col) {
267 const uint32_t k = 1 << lg_k;
268 const uint8_t col = row_col & 63;
269 const double one_over_p =
static_cast<double>(k) / kxp;
270 hip_est_accum += one_over_p;
271 kxp -= INVERSE_POWERS_OF_2[col + 1];
276void cpc_sketch_alloc<A>::promote_sparse_to_windowed() {
277 const uint32_t k = 1 << lg_k;
278 const uint64_t c32 =
static_cast<uint64_t
>(num_coupons) << 5;
279 if (!(c32 == 3 * k || (lg_k == 4 && c32 > 3 * k))) {
throw std::logic_error(
"wrong c32"); }
281 sliding_window.resize(k, 0);
283 u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
285 const uint32_t* old_slots = surprising_value_table.get_slots();
286 const uint32_t old_num_slots = 1 << surprising_value_table.get_lg_size();
288 if (window_offset != 0) {
throw std::logic_error(
"window_offset != 0"); }
290 for (uint32_t i = 0; i < old_num_slots; i++) {
291 const uint32_t row_col = old_slots[i];
292 if (row_col != UINT32_MAX) {
293 const uint8_t col = row_col & 63;
295 const uint32_t row = row_col >> 6;
296 sliding_window[row] |= 1 << col;
299 const bool is_novel = new_table.maybe_insert(row_col);
300 if (!is_novel) {
throw std::logic_error(
"is_novel != true"); }
305 surprising_value_table = std::move(new_table);
309void cpc_sketch_alloc<A>::move_window() {
310 const uint8_t new_offset = window_offset + 1;
311 if (new_offset > 56) {
throw std::logic_error(
"new_offset > 56"); }
312 if (new_offset != determine_correct_offset(lg_k, num_coupons)) {
throw std::logic_error(
"new_offset is wrong"); }
314 if (sliding_window.size() == 0) {
throw std::logic_error(
"no sliding window"); }
315 const uint32_t k = 1 << lg_k;
318 vector_u64 bit_matrix = build_bit_matrix();
321 if ((new_offset & 0x7) == 0) { refresh_kxp(bit_matrix.data()); }
323 surprising_value_table.clear();
325 const uint64_t mask_for_clearing_window = (
static_cast<uint64_t
>(0xff) << new_offset) ^ UINT64_MAX;
326 const uint64_t mask_for_flipping_early_zone = (
static_cast<uint64_t
>(1) << new_offset) - 1;
327 uint64_t all_surprises_ored = 0;
329 for (uint32_t i = 0; i < k; i++) {
330 uint64_t pattern = bit_matrix[i];
331 sliding_window[i] = (pattern >> new_offset) & 0xff;
332 pattern &= mask_for_clearing_window;
335 pattern ^= mask_for_flipping_early_zone;
336 all_surprises_ored |= pattern;
337 while (pattern != 0) {
338 const uint8_t col = count_trailing_zeros_in_u64(pattern);
339 pattern = pattern ^ (
static_cast<uint64_t
>(1) << col);
340 const uint32_t row_col = (i << 6) | col;
341 const bool is_novel = surprising_value_table.maybe_insert(row_col);
342 if (!is_novel) {
throw std::logic_error(
"is_novel != true"); }
346 window_offset = new_offset;
348 first_interesting_column = count_trailing_zeros_in_u64(all_surprises_ored);
349 if (first_interesting_column > new_offset) { first_interesting_column = new_offset; }
357void cpc_sketch_alloc<A>::refresh_kxp(
const uint64_t* bit_matrix) {
358 const uint32_t k = 1 << lg_k;
362 std::fill(byte_sums, byte_sums + 8, 0);
364 for (
size_t i = 0; i < k; i++) {
365 uint64_t word = bit_matrix[i];
366 for (
unsigned j = 0; j < 8; j++) {
367 const uint8_t
byte = word & 0xff;
368 byte_sums[j] += KXP_BYTE_TABLE[byte];
374 for (
int j = 7; j >= 0; j--) {
375 const double factor = INVERSE_POWERS_OF_2[8 * j];
376 total += factor * byte_sums[j];
386 std::ostringstream os;
387 os <<
"### CPC sketch summary:" << std::endl;
388 os <<
" lg_k : " << std::to_string(lg_k) << std::endl;
389 os <<
" seed hash : " << std::hex << compute_seed_hash(seed) << std::dec << std::endl;
390 os <<
" C : " << num_coupons << std::endl;
391 os <<
" flavor : " << determine_flavor() << std::endl;
392 os <<
" merged : " << (was_merged ?
"true" :
"false") << std::endl;
394 os <<
" HIP estimate : " << hip_est_accum << std::endl;
395 os <<
" kxp : " << kxp << std::endl;
397 os <<
" interesting col: " << std::to_string(first_interesting_column) << std::endl;
398 os <<
" table entries : " << surprising_value_table.get_num_items() << std::endl;
399 os <<
" window : " << (sliding_window.size() == 0 ?
"not " :
"") <<
"allocated" << std::endl;
400 if (sliding_window.size() > 0) {
401 os <<
" window offset : " << std::to_string(window_offset) << std::endl;
403 os <<
"### End sketch summary" << std::endl;
404 return string<A>(os.str().c_str(), sliding_window.get_allocator());
409 compressed_state<A> compressed(A(sliding_window.get_allocator()));
410 compressed.table_data_words = 0;
411 compressed.table_num_entries = 0;
412 compressed.window_data_words = 0;
413 get_compressor<A>().compress(*
this, compressed);
414 const bool has_hip = !was_merged;
415 const bool has_table = compressed.table_data.size() > 0;
416 const bool has_window = compressed.window_data.size() > 0;
417 const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
418 write(os, preamble_ints);
419 const uint8_t serial_version = SERIAL_VERSION;
420 write(os, serial_version);
421 const uint8_t family = FAMILY;
424 write(os, first_interesting_column);
425 const uint8_t flags_byte(
426 (1 << flags::IS_COMPRESSED)
427 | (has_hip ? 1 << flags::HAS_HIP : 0)
428 | (has_table ? 1 << flags::HAS_TABLE : 0)
429 | (has_window ? 1 << flags::HAS_WINDOW : 0)
431 write(os, flags_byte);
432 const uint16_t seed_hash(compute_seed_hash(seed));
433 write(os, seed_hash);
435 write(os, num_coupons);
436 if (has_table && has_window) {
438 write(os, compressed.table_num_entries);
441 if (has_hip) { write_hip(os); }
444 write(os, compressed.table_data_words);
447 write(os, compressed.window_data_words);
450 if (has_hip && !(has_table && has_window)) { write_hip(os); }
452 write(os, compressed.window_data.data(), compressed.window_data_words *
sizeof(uint32_t));
455 write(os, compressed.table_data.data(), compressed.table_data_words *
sizeof(uint32_t));
462 compressed_state<A> compressed(sliding_window.get_allocator());
463 compressed.table_data_words = 0;
464 compressed.table_num_entries = 0;
465 compressed.window_data_words = 0;
466 get_compressor<A>().compress(*
this, compressed);
467 const bool has_hip = !was_merged;
468 const bool has_table = compressed.table_data.size() > 0;
469 const bool has_window = compressed.window_data.size() > 0;
470 const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
471 const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) *
sizeof(uint32_t);
472 vector_bytes bytes(size, 0, sliding_window.get_allocator());
473 uint8_t* ptr = bytes.data() + header_size_bytes;
474 ptr += copy_to_mem(preamble_ints, ptr);
475 const uint8_t serial_version = SERIAL_VERSION;
476 ptr += copy_to_mem(serial_version, ptr);
477 const uint8_t family = FAMILY;
478 ptr += copy_to_mem(family, ptr);
479 ptr += copy_to_mem(lg_k, ptr);
480 ptr += copy_to_mem(first_interesting_column, ptr);
481 const uint8_t flags_byte(
482 (1 << flags::IS_COMPRESSED)
483 | (has_hip ? 1 << flags::HAS_HIP : 0)
484 | (has_table ? 1 << flags::HAS_TABLE : 0)
485 | (has_window ? 1 << flags::HAS_WINDOW : 0)
487 ptr += copy_to_mem(flags_byte, ptr);
488 const uint16_t seed_hash = compute_seed_hash(seed);
489 ptr += copy_to_mem(seed_hash, ptr);
491 ptr += copy_to_mem(num_coupons, ptr);
492 if (has_table && has_window) {
494 ptr += copy_to_mem(compressed.table_num_entries, ptr);
497 if (has_hip) { ptr += copy_hip_to_mem(ptr); }
500 ptr += copy_to_mem(compressed.table_data_words, ptr);
503 ptr += copy_to_mem(compressed.window_data_words, ptr);
506 if (has_hip && !(has_table && has_window)) { ptr += copy_hip_to_mem(ptr); }
508 ptr += copy_to_mem(compressed.window_data.data(), ptr, compressed.window_data_words *
sizeof(uint32_t));
511 ptr += copy_to_mem(compressed.table_data.data(), ptr, compressed.table_data_words *
sizeof(uint32_t));
514 if (ptr != bytes.data() + size) {
throw std::logic_error(
"serialized size mismatch"); }
520 const auto preamble_ints = read<uint8_t>(is);
521 const auto serial_version = read<uint8_t>(is);
522 const auto family_id = read<uint8_t>(is);
523 const auto lg_k = read<uint8_t>(is);
524 const auto first_interesting_column = read<uint8_t>(is);
525 const auto flags_byte = read<uint8_t>(is);
526 const auto seed_hash = read<uint16_t>(is);
527 const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
528 const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
529 const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
530 compressed_state<A> compressed(allocator);
531 compressed.table_data_words = 0;
532 compressed.table_num_entries = 0;
533 compressed.window_data_words = 0;
534 uint32_t num_coupons = 0;
536 double hip_est_accum = 0;
537 if (has_table || has_window) {
538 num_coupons = read<uint32_t>(is);
539 if (has_table && has_window) {
540 compressed.table_num_entries = read<uint32_t>(is);
542 kxp = read<double>(is);
543 hip_est_accum = read<double>(is);
547 compressed.table_data_words = read<uint32_t>(is);
550 compressed.window_data_words = read<uint32_t>(is);
552 if (has_hip && !(has_table && has_window)) {
553 kxp = read<double>(is);
554 hip_est_accum = read<double>(is);
557 compressed.window_data.resize(compressed.window_data_words);
558 read(is, compressed.window_data.data(), compressed.window_data_words *
sizeof(uint32_t));
561 compressed.table_data.resize(compressed.table_data_words);
562 read(is, compressed.table_data.data(), compressed.table_data_words *
sizeof(uint32_t));
564 if (!has_window) { compressed.table_num_entries = num_coupons; }
567 uint8_t expected_preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
568 if (preamble_ints != expected_preamble_ints) {
569 throw std::invalid_argument(
"Possible corruption: preamble ints: expected "
570 + std::to_string(expected_preamble_ints) +
", got " + std::to_string(preamble_ints));
572 if (serial_version != SERIAL_VERSION) {
573 throw std::invalid_argument(
"Possible corruption: serial version: expected "
574 + std::to_string(SERIAL_VERSION) +
", got " + std::to_string(serial_version));
576 if (family_id != FAMILY) {
577 throw std::invalid_argument(
"Possible corruption: family: expected "
578 + std::to_string(FAMILY) +
", got " + std::to_string(family_id));
580 if (seed_hash != compute_seed_hash(seed)) {
581 throw std::invalid_argument(
"Incompatible seed hashes: " + std::to_string(seed_hash) +
", "
582 + std::to_string(compute_seed_hash(seed)));
584 uncompressed_state<A> uncompressed(allocator);
585 get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
587 throw std::runtime_error(
"error reading from std::istream");
589 return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
590 std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
595 ensure_minimum_memory(size, 8);
596 const char* ptr =
static_cast<const char*
>(bytes);
597 const char* base =
static_cast<const char*
>(bytes);
598 uint8_t preamble_ints;
599 ptr += copy_from_mem(ptr, preamble_ints);
600 uint8_t serial_version;
601 ptr += copy_from_mem(ptr, serial_version);
603 ptr += copy_from_mem(ptr, family_id);
605 ptr += copy_from_mem(ptr, lg_k);
606 uint8_t first_interesting_column;
607 ptr += copy_from_mem(ptr, first_interesting_column);
609 ptr += copy_from_mem(ptr, flags_byte);
611 ptr += copy_from_mem(ptr, seed_hash);
612 const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
613 const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
614 const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
615 ensure_minimum_memory(size, preamble_ints << 2);
616 compressed_state<A> compressed(allocator);
617 compressed.table_data_words = 0;
618 compressed.table_num_entries = 0;
619 compressed.window_data_words = 0;
620 uint32_t num_coupons = 0;
622 double hip_est_accum = 0;
623 if (has_table || has_window) {
624 check_memory_size(ptr - base +
sizeof(num_coupons), size);
625 ptr += copy_from_mem(ptr, num_coupons);
626 if (has_table && has_window) {
627 check_memory_size(ptr - base +
sizeof(compressed.table_num_entries), size);
628 ptr += copy_from_mem(ptr, compressed.table_num_entries);
630 check_memory_size(ptr - base +
sizeof(kxp) +
sizeof(hip_est_accum), size);
631 ptr += copy_from_mem(ptr, kxp);
632 ptr += copy_from_mem(ptr, hip_est_accum);
636 check_memory_size(ptr - base +
sizeof(compressed.table_data_words), size);
637 ptr += copy_from_mem(ptr, compressed.table_data_words);
640 check_memory_size(ptr - base +
sizeof(compressed.window_data_words), size);
641 ptr += copy_from_mem(ptr, compressed.window_data_words);
643 if (has_hip && !(has_table && has_window)) {
644 check_memory_size(ptr - base +
sizeof(kxp) +
sizeof(hip_est_accum), size);
645 ptr += copy_from_mem(ptr, kxp);
646 ptr += copy_from_mem(ptr, hip_est_accum);
649 compressed.window_data.resize(compressed.window_data_words);
650 check_memory_size(ptr - base + (compressed.window_data_words *
sizeof(uint32_t)), size);
651 ptr += copy_from_mem(ptr, compressed.window_data.data(), compressed.window_data_words *
sizeof(uint32_t));
654 compressed.table_data.resize(compressed.table_data_words);
655 check_memory_size(ptr - base + (compressed.table_data_words *
sizeof(uint32_t)), size);
656 ptr += copy_from_mem(ptr, compressed.table_data.data(), compressed.table_data_words *
sizeof(uint32_t));
658 if (!has_window) compressed.table_num_entries = num_coupons;
660 if (ptr !=
static_cast<const char*
>(bytes) + size)
throw std::logic_error(
"deserialized size mismatch");
662 uint8_t expected_preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
663 if (preamble_ints != expected_preamble_ints) {
664 throw std::invalid_argument(
"Possible corruption: preamble ints: expected "
665 + std::to_string(expected_preamble_ints) +
", got " + std::to_string(preamble_ints));
667 if (serial_version != SERIAL_VERSION) {
668 throw std::invalid_argument(
"Possible corruption: serial version: expected "
669 + std::to_string(SERIAL_VERSION) +
", got " + std::to_string(serial_version));
671 if (family_id != FAMILY) {
672 throw std::invalid_argument(
"Possible corruption: family: expected "
673 + std::to_string(FAMILY) +
", got " + std::to_string(family_id));
675 if (seed_hash != compute_seed_hash(seed)) {
676 throw std::invalid_argument(
"Incompatible seed hashes: " + std::to_string(seed_hash) +
", "
677 + std::to_string(compute_seed_hash(seed)));
679 uncompressed_state<A> uncompressed(allocator);
680 get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
681 return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
682 std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
691static const uint8_t CPC_EMPIRICAL_SIZE_MAX_LGK = 19;
692static const size_t CPC_EMPIRICAL_MAX_SIZE_BYTES[] = {
710static const double CPC_EMPIRICAL_MAX_SIZE_FACTOR = 0.6;
711static const size_t CPC_MAX_PREAMBLE_SIZE_BYTES = 40;
716 if (lg_k <= CPC_EMPIRICAL_SIZE_MAX_LGK) {
719 const uint32_t k = 1 << lg_k;
720 return (
int) (CPC_EMPIRICAL_MAX_SIZE_FACTOR * k) + CPC_MAX_PREAMBLE_SIZE_BYTES;
732uint32_t cpc_sketch_alloc<A>::get_num_coupons()
const {
737bool cpc_sketch_alloc<A>::validate()
const {
738 vector_u64 bit_matrix = build_bit_matrix();
739 const uint64_t num_bits_set = count_bits_set_in_matrix(bit_matrix.data(), 1ULL << lg_k);
740 return num_bits_set == num_coupons;
745 u32_table<A>&& table, vector_bytes&& window,
bool has_hip,
double kxp,
double hip_est_accum, uint64_t seed):
749num_coupons(num_coupons),
750surprising_value_table(std::move(table)),
751sliding_window(std::move(window)),
752window_offset(determine_correct_offset(lg_k, num_coupons)),
753first_interesting_column(first_interesting_column),
755hip_est_accum(hip_est_accum)
759uint8_t cpc_sketch_alloc<A>::get_preamble_ints(uint32_t num_coupons,
bool has_hip,
bool has_table,
bool has_window) {
760 uint8_t preamble_ints = 2;
761 if (num_coupons > 0) {
777 return preamble_ints;
781typename cpc_sketch_alloc<A>::flavor cpc_sketch_alloc<A>::determine_flavor()
const {
782 return determine_flavor(lg_k, num_coupons);
786typename cpc_sketch_alloc<A>::flavor cpc_sketch_alloc<A>::determine_flavor(uint8_t lg_k, uint64_t c) {
787 const uint32_t k = 1 << lg_k;
788 const uint64_t c2 = c << 1;
789 const uint64_t c8 = c << 3;
790 const uint64_t c32 = c << 5;
791 if (c == 0)
return EMPTY;
792 if (c32 < 3 * k)
return SPARSE;
793 if (c2 < k)
return HYBRID;
794 if (c8 < 27 * k)
return PINNED;
799uint8_t cpc_sketch_alloc<A>::determine_correct_offset(uint8_t lg_k, uint64_t c) {
800 const uint32_t k = 1 << lg_k;
801 const int64_t tmp =
static_cast<int64_t
>(c << 3) -
static_cast<int64_t
>(19 * k);
802 if (tmp < 0)
return 0;
803 return static_cast<uint8_t
>(tmp >> (lg_k + 3));
807auto cpc_sketch_alloc<A>::build_bit_matrix() const -> vector_u64 {
808 const uint32_t k = 1 << lg_k;
809 if (window_offset > 56)
throw std::logic_error(
"offset > 56");
813 const uint64_t default_row = (
static_cast<uint64_t
>(1) << window_offset) - 1;
814 vector_u64 matrix(k, default_row, sliding_window.get_allocator());
816 if (num_coupons == 0)
return matrix;
818 if (sliding_window.size() > 0) {
819 for (
size_t i = 0; i < k; i++) {
820 matrix[i] |=
static_cast<uint64_t
>(sliding_window[i]) << window_offset;
824 const uint32_t* slots = surprising_value_table.get_slots();
825 const uint32_t num_slots = 1 << surprising_value_table.get_lg_size();
826 for (
size_t i = 0; i < num_slots; i++) {
827 const uint32_t row_col = slots[i];
828 if (row_col != UINT32_MAX) {
829 const uint8_t col = row_col & 63;
830 const uint32_t row = row_col >> 6;
834 matrix[row] ^=
static_cast<uint64_t
>(1) << col;
841void cpc_sketch_alloc<A>::write_hip(std::ostream& os)
const {
843 write(os, hip_est_accum);
847size_t cpc_sketch_alloc<A>::copy_hip_to_mem(
void* dst)
const {
848 memcpy(dst, &kxp,
sizeof(kxp));
849 memcpy(
static_cast<char*
>(dst) +
sizeof(kxp), &hip_est_accum,
sizeof(hip_est_accum));
850 return sizeof(kxp) +
sizeof(hip_est_accum);
High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch.
Definition cpc_sketch.hpp:64
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition cpc_sketch_impl.hpp:408
double get_estimate() const
Definition cpc_sketch_impl.hpp:75
cpc_sketch_alloc(uint8_t lg_k=cpc_constants::DEFAULT_LG_K, uint64_t seed=DEFAULT_SEED, const A &allocator=A())
Creates an instance of the sketch given the lg_k parameter and hash seed.
Definition cpc_sketch_impl.hpp:44
static cpc_sketch_alloc< A > deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const A &allocator=A())
This method deserializes a sketch from a given stream.
Definition cpc_sketch_impl.hpp:519
void update(const std::string &value)
Update this sketch with a given string.
Definition cpc_sketch_impl.hpp:109
bool is_empty() const
Definition cpc_sketch_impl.hpp:70
static size_t get_max_serialized_size_bytes(uint8_t lg_k)
The actual size of a compressed CPC sketch has a small random variance, but the following empirically...
Definition cpc_sketch_impl.hpp:714
uint8_t get_lg_k() const
Definition cpc_sketch_impl.hpp:65
A get_allocator() const
Definition cpc_sketch_impl.hpp:60
double get_lower_bound(unsigned kappa) const
Returns the approximate lower error bound given a parameter kappa (1, 2 or 3).
Definition cpc_sketch_impl.hpp:91
double get_upper_bound(unsigned kappa) const
Returns the approximate upper error bound given a parameter kappa (1, 2 or 3).
Definition cpc_sketch_impl.hpp:100
string< A > to_string() const
Returns a human-readable summary of this sketch.
Definition cpc_sketch_impl.hpp:383
const uint8_t MIN_LG_K
min log2 of K
Definition cpc_common.hpp:32
const uint8_t MAX_LG_K
max log2 of K
Definition cpc_common.hpp:34
DataSketches namespace.
Definition binomial_bounds.hpp:38
void cpc_init()
Allocation and initialization of global decompression (decoding) tables.
Definition cpc_sketch_impl.hpp:39