20 #ifndef COMPACT_THETA_SKETCH_PARSER_IMPL_HPP_
21 #define COMPACT_THETA_SKETCH_PARSER_IMPL_HPP_
30 T whole_bytes_to_hold_bits(T bits) {
31 static_assert(std::is_integral<T>::value,
"integral type expected");
32 return (bits >> 3) + ((bits & 7) > 0);
36 auto compact_theta_sketch_parser<dummy>::parse(
const void* ptr,
size_t size, uint64_t seed,
bool dump_on_error) -> compact_theta_sketch_data {
37 check_memory_size(ptr, size, 8, dump_on_error);
38 checker<true>::check_sketch_type(
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_TYPE_BYTE], COMPACT_SKETCH_TYPE);
39 uint8_t serial_version =
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_SERIAL_VERSION_BYTE];
40 switch(serial_version) {
43 const uint16_t seed_hash =
reinterpret_cast<const uint16_t*
>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
44 checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
45 const bool has_theta =
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 1;
48 check_memory_size(ptr, size, 16, dump_on_error);
49 theta =
reinterpret_cast<const uint64_t*
>(ptr)[COMPACT_SKETCH_V4_THETA_U64];
51 const uint8_t num_entries_bytes =
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_V4_NUM_ENTRIES_BYTES_BYTE];
52 size_t data_offset_bytes = has_theta ? COMPACT_SKETCH_V4_PACKED_DATA_ESTIMATION_BYTE : COMPACT_SKETCH_V4_PACKED_DATA_EXACT_BYTE;
53 check_memory_size(ptr, size, data_offset_bytes + num_entries_bytes, dump_on_error);
54 uint32_t num_entries = 0;
55 const uint8_t* num_entries_ptr =
reinterpret_cast<const uint8_t*
>(ptr) + data_offset_bytes;
56 for (
unsigned i = 0; i < num_entries_bytes; ++i) {
57 num_entries |= (*num_entries_ptr++) << (i << 3);
59 data_offset_bytes += num_entries_bytes;
60 const uint8_t entry_bits =
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_V4_ENTRY_BITS_BYTE];
61 const size_t expected_bits = entry_bits * num_entries;
62 const size_t expected_size_bytes = data_offset_bytes + whole_bytes_to_hold_bits(expected_bits);
63 check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
64 return {
false,
true, seed_hash, num_entries, theta,
65 reinterpret_cast<const uint8_t*
>(ptr) + data_offset_bytes, entry_bits};
69 const uint16_t seed_hash =
reinterpret_cast<const uint16_t*
>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
70 if (
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_EMPTY_FLAG)) {
71 return {
true,
true, seed_hash, 0, theta,
nullptr, 64};
73 checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
74 const bool has_theta =
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 2;
76 check_memory_size(ptr, size, (COMPACT_SKETCH_THETA_U64 + 1) *
sizeof(uint64_t), dump_on_error);
77 theta =
reinterpret_cast<const uint64_t*
>(ptr)[COMPACT_SKETCH_THETA_U64];
79 if (
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] == 1) {
80 check_memory_size(ptr, size, 16, dump_on_error);
81 return {
false,
true, seed_hash, 1, theta,
reinterpret_cast<const uint64_t*
>(ptr) + COMPACT_SKETCH_SINGLE_ENTRY_U64, 64};
83 const uint32_t num_entries =
reinterpret_cast<const uint32_t*
>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
84 const size_t entries_start_u64 = has_theta ? COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 : COMPACT_SKETCH_ENTRIES_EXACT_U64;
85 const uint64_t* entries =
reinterpret_cast<const uint64_t*
>(ptr) + entries_start_u64;
86 const size_t expected_size_bytes = (entries_start_u64 + num_entries) *
sizeof(uint64_t);
87 check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
88 const bool is_ordered =
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_ORDERED_FLAG);
89 return {
false, is_ordered, seed_hash, num_entries, theta, entries, 64};
92 uint16_t seed_hash = compute_seed_hash(seed);
93 const uint32_t num_entries =
reinterpret_cast<const uint32_t*
>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
94 uint64_t theta =
reinterpret_cast<const uint64_t*
>(ptr)[COMPACT_SKETCH_THETA_U64];
96 if (is_empty)
return {
true,
true, seed_hash, 0, theta,
nullptr, 64};
97 const uint64_t* entries =
reinterpret_cast<const uint64_t*
>(ptr) + COMPACT_SKETCH_ENTRIES_ESTIMATION_U64;
98 const size_t expected_size_bytes = (COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 + num_entries) *
sizeof(uint64_t);
99 check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
100 return {
false,
true, seed_hash, num_entries, theta, entries, 64};
103 uint8_t preamble_size =
reinterpret_cast<const uint8_t*
>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE];
104 const uint16_t seed_hash =
reinterpret_cast<const uint16_t*
>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
105 checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
106 if (preamble_size == 1) {
108 }
else if (preamble_size == 2) {
109 const uint32_t num_entries =
reinterpret_cast<const uint32_t*
>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
110 if (num_entries == 0) {
113 const size_t expected_size_bytes = (preamble_size + num_entries) << 3;
114 check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
115 const uint64_t* entries =
reinterpret_cast<const uint64_t*
>(ptr) + COMPACT_SKETCH_ENTRIES_EXACT_U64;
118 }
else if (preamble_size == 3) {
119 const uint32_t num_entries =
reinterpret_cast<const uint32_t*
>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
120 uint64_t theta =
reinterpret_cast<const uint64_t*
>(ptr)[COMPACT_SKETCH_THETA_U64];
122 if (is_empty)
return {
true,
true, seed_hash, 0, theta,
nullptr, 64};
123 const uint64_t* entries =
reinterpret_cast<const uint64_t*
>(ptr) + COMPACT_SKETCH_ENTRIES_ESTIMATION_U64;
124 const size_t expected_size_bytes = (COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 + num_entries) *
sizeof(uint64_t);
125 check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
126 return {
false,
true, seed_hash, num_entries, theta, entries, 64};
128 throw std::invalid_argument(std::to_string(preamble_size) +
" longs of premable, but expected 1, 2, or 3");
132 throw std::invalid_argument(
"unsupported serial version " + std::to_string(serial_version));
137 void compact_theta_sketch_parser<dummy>::check_memory_size(
const void* ptr,
size_t actual_bytes,
size_t expected_bytes,
bool dump_on_error) {
138 if (actual_bytes < expected_bytes)
throw std::out_of_range(
"at least " + std::to_string(expected_bytes)
139 +
" bytes expected, actual " + std::to_string(actual_bytes)
140 + (dump_on_error ? (
", sketch dump: " + hex_dump(
reinterpret_cast<const uint8_t*
>(ptr), actual_bytes)) :
""));
144 std::string compact_theta_sketch_parser<dummy>::hex_dump(
const uint8_t* ptr,
size_t size) {
146 s << std::hex << std::setfill(
'0') << std::uppercase;
147 for (
size_t i = 0; i < size; ++i) s << std::setw(2) << (ptr[i] & 0xff);
const uint64_t MAX_THETA
max theta - signed max for compatibility with Java
Definition: theta_constants.hpp:36
DataSketches namespace.
Definition: binomial_bounds.hpp:38