22 #ifndef CPC_COMPRESSOR_IMPL_HPP_
23 #define CPC_COMPRESSOR_IMPL_HPP_
29 #include "common_defs.hpp"
30 #include "compression_data.hpp"
31 #include "cpc_util.hpp"
32 #include "cpc_common.hpp"
33 #include "count_zeros.hpp"
39 cpc_compressor<A>& get_compressor() {
40 static cpc_compressor<A>* instance =
new cpc_compressor<A>();
41 static int reg_result = std::atexit(destroy_compressor<A>);
48 void destroy_compressor() {
49 delete std::addressof(get_compressor<A>());
53 cpc_compressor<A>::cpc_compressor() {
54 make_decoding_tables();
58 cpc_compressor<A>::~cpc_compressor() {
59 free_decoding_tables();
63 uint8_t* cpc_compressor<A>::make_inverse_permutation(
const uint8_t* permu,
unsigned length) {
64 uint8_t* inverse =
new uint8_t[length];
65 for (
unsigned i = 0; i < length; i++) {
66 inverse[permu[i]] =
static_cast<uint8_t
>(i);
68 for (
unsigned i = 0; i < length; i++) {
69 if (permu[inverse[i]] != i)
throw std::logic_error(
"inverse permutation error");
78 uint16_t* cpc_compressor<A>::make_decoding_table(
const uint16_t* encoding_table,
unsigned num_byte_values) {
79 uint16_t* decoding_table =
new uint16_t[4096];
80 for (
unsigned byte_value = 0; byte_value < num_byte_values; byte_value++) {
81 const uint16_t encoding_entry = encoding_table[byte_value];
82 const uint16_t code_value = encoding_entry & 0xfff;
83 const uint8_t code_length = encoding_entry >> 12;
84 const uint16_t decoding_entry =
static_cast<uint16_t
>((code_length << 8) | byte_value);
85 const uint8_t garbage_length = 12 - code_length;
86 const uint32_t num_copies = 1 << garbage_length;
87 for (uint32_t garbage_bits = 0; garbage_bits < num_copies; garbage_bits++) {
88 const uint16_t extended_code_value =
static_cast<uint16_t
>(code_value | (garbage_bits << code_length));
89 decoding_table[extended_code_value & 0xfff] = decoding_entry;
92 return decoding_table;
96 void cpc_compressor<A>::validate_decoding_table(
const uint16_t* decoding_table,
const uint16_t* encoding_table)
const {
97 for (
int decode_this = 0; decode_this < 4096; decode_this++) {
98 const int tmp_d = decoding_table[decode_this];
99 const int decoded_byte = tmp_d & 0xff;
100 const int decoded_length = tmp_d >> 8;
102 const int tmp_e = encoding_table[decoded_byte];
103 const int encoded_bit_pattern = tmp_e & 0xfff;
104 const int encoded_length = tmp_e >> 12;
106 if (decoded_length != encoded_length)
throw std::logic_error(
"decoded length error");
107 if (encoded_bit_pattern != (decode_this & ((1 << decoded_length) - 1)))
throw std::logic_error(
"bit pattern error");
112 void cpc_compressor<A>::make_decoding_tables() {
113 length_limited_unary_decoding_table65 = make_decoding_table(length_limited_unary_encoding_table65, 65);
114 validate_decoding_table(
115 length_limited_unary_decoding_table65,
116 length_limited_unary_encoding_table65
119 for (
int i = 0; i < (16 + 6); i++) {
120 decoding_tables_for_high_entropy_byte[i] = make_decoding_table(encoding_tables_for_high_entropy_byte[i], 256);
121 validate_decoding_table(
122 decoding_tables_for_high_entropy_byte[i],
123 encoding_tables_for_high_entropy_byte[i]
127 for (
int i = 0; i < 16; i++) {
128 column_permutations_for_decoding[i] = make_inverse_permutation(column_permutations_for_encoding[i], 56);
133 void cpc_compressor<A>::free_decoding_tables() {
134 delete[] length_limited_unary_decoding_table65;
135 for (
int i = 0; i < (16 + 6); i++) {
136 delete[] decoding_tables_for_high_entropy_byte[i];
138 for (
int i = 0; i < 16; i++) {
139 delete[] column_permutations_for_decoding[i];
144 void cpc_compressor<A>::compress(
const cpc_sketch_alloc<A>& source, compressed_state<A>& result)
const {
145 switch (source.determine_flavor()) {
146 case cpc_sketch_alloc<A>::flavor::EMPTY:
148 case cpc_sketch_alloc<A>::flavor::SPARSE:
149 compress_sparse_flavor(source, result);
150 if (result.window_data.size() > 0)
throw std::logic_error(
"window is not expected");
151 if (result.table_data.size() == 0)
throw std::logic_error(
"table is expected");
153 case cpc_sketch_alloc<A>::flavor::HYBRID:
154 compress_hybrid_flavor(source, result);
155 if (result.window_data.size() > 0)
throw std::logic_error(
"window is not expected");
156 if (result.table_data.size() == 0)
throw std::logic_error(
"table is expected");
158 case cpc_sketch_alloc<A>::flavor::PINNED:
159 compress_pinned_flavor(source, result);
160 if (result.window_data.size() == 0)
throw std::logic_error(
"window is not expected");
162 case cpc_sketch_alloc<A>::flavor::SLIDING:
163 compress_sliding_flavor(source, result);
164 if (result.window_data.size() == 0)
throw std::logic_error(
"window is expected");
166 default:
throw std::logic_error(
"Unknown sketch flavor");
171 void cpc_compressor<A>::uncompress(
const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons)
const {
172 switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
173 case cpc_sketch_alloc<A>::flavor::EMPTY:
174 target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
176 case cpc_sketch_alloc<A>::flavor::SPARSE:
177 uncompress_sparse_flavor(source, target, lg_k);
179 case cpc_sketch_alloc<A>::flavor::HYBRID:
180 uncompress_hybrid_flavor(source, target, lg_k);
182 case cpc_sketch_alloc<A>::flavor::PINNED:
183 if (source.window_data.size() == 0)
throw std::logic_error(
"window is expected");
184 uncompress_pinned_flavor(source, target, lg_k, num_coupons);
186 case cpc_sketch_alloc<A>::flavor::SLIDING:
187 uncompress_sliding_flavor(source, target, lg_k, num_coupons);
189 default: std::logic_error(
"Unknown sketch flavor");
194 void cpc_compressor<A>::compress_sparse_flavor(
const cpc_sketch_alloc<A>& source, compressed_state<A>& result)
const {
195 if (source.sliding_window.size() > 0)
throw std::logic_error(
"unexpected sliding window");
196 vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
197 u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
198 compress_surprising_values(pairs, source.get_lg_k(), result);
202 void cpc_compressor<A>::uncompress_sparse_flavor(
const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k)
const {
203 if (source.window_data.size() > 0)
throw std::logic_error(
"unexpected sliding window");
204 if (source.table_data.size() == 0)
throw std::logic_error(
"table is expected");
205 vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
206 lg_k, source.table_data.get_allocator());
207 target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
213 void cpc_compressor<A>::compress_hybrid_flavor(
const cpc_sketch_alloc<A>& source, compressed_state<A>& result)
const {
214 if (source.sliding_window.size() == 0)
throw std::logic_error(
"no sliding window");
215 if (source.window_offset != 0)
throw std::logic_error(
"window_offset != 0");
216 const uint32_t k = 1 << source.get_lg_k();
217 vector_u32 pairs_from_table = source.surprising_value_table.unwrapping_get_items();
218 const uint32_t num_pairs_from_table =
static_cast<uint32_t
>(pairs_from_table.size());
219 if (num_pairs_from_table > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, num_pairs_from_table);
220 const uint32_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table;
222 vector_u32 all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
225 pairs_from_table.data(), 0, pairs_from_table.size(),
226 all_pairs.data(), num_pairs_from_table, num_pairs_from_window,
230 compress_surprising_values(all_pairs, source.get_lg_k(), result);
234 void cpc_compressor<A>::uncompress_hybrid_flavor(
const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k)
const {
235 if (source.window_data.size() > 0)
throw std::logic_error(
"window is not expected");
236 if (source.table_data.size() == 0)
throw std::logic_error(
"table is expected");
237 vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
238 lg_k, source.table_data.get_allocator());
243 const uint32_t k = 1 << lg_k;
244 target.window.resize(k, 0);
245 uint32_t next_true_pair = 0;
246 for (uint32_t i = 0; i < source.table_num_entries; i++) {
247 const uint32_t row_col = pairs[i];
248 if (row_col == UINT32_MAX)
throw std::logic_error(
"empty marker is not expected");
249 const uint8_t col = row_col & 63;
251 const uint32_t row = row_col >> 6;
252 target.window[row] |= 1 << col;
254 pairs[next_true_pair++] = row_col;
257 target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k, pairs.get_allocator());
261 void cpc_compressor<A>::compress_pinned_flavor(
const cpc_sketch_alloc<A>& source, compressed_state<A>& result)
const {
262 compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
263 vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
264 if (pairs.size() > 0) {
270 for (
size_t i = 0; i < pairs.size(); i++) {
271 if ((pairs[i] & 63) < 8)
throw std::logic_error(
"(pairs[i] & 63) < 8");
275 if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
276 compress_surprising_values(pairs, source.get_lg_k(), result);
281 void cpc_compressor<A>::uncompress_pinned_flavor(
const compressed_state<A>& source, uncompressed_state<A>& target,
282 uint8_t lg_k, uint32_t num_coupons)
const {
283 if (source.window_data.size() == 0)
throw std::logic_error(
"window is expected");
284 uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
285 const uint32_t num_pairs = source.table_num_entries;
286 if (num_pairs == 0) {
287 target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
289 if (source.table_data.size() == 0)
throw std::logic_error(
"table is expected");
290 vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
291 lg_k, source.table_data.get_allocator());
293 for (uint32_t i = 0; i < num_pairs; i++) {
294 if ((pairs[i] & 63) >= 56)
throw std::logic_error(
"(pairs[i] & 63) >= 56");
297 target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
302 void cpc_compressor<A>::compress_sliding_flavor(
const cpc_sketch_alloc<A>& source, compressed_state<A>& result)
const {
303 compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
304 vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
305 if (pairs.size() > 0) {
309 const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
310 if (pseudo_phase >= 16)
throw std::logic_error(
"unexpected pseudo phase for sliding flavor");
311 const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
313 const uint8_t offset = source.window_offset;
314 if (offset > 56)
throw std::out_of_range(
"offset out of range");
316 for (
size_t i = 0; i < pairs.size(); i++) {
317 const uint32_t row_col = pairs[i];
318 const uint32_t row = row_col >> 6;
319 uint8_t col = row_col & 63;
321 col = (col + 56 - offset) & 63;
322 if (col >= 56)
throw std::out_of_range(
"col out of range");
324 col = permutation[col];
325 pairs[i] = (row << 6) | col;
328 if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
329 compress_surprising_values(pairs, source.get_lg_k(), result);
334 void cpc_compressor<A>::uncompress_sliding_flavor(
const compressed_state<A>& source, uncompressed_state<A>& target,
335 uint8_t lg_k, uint32_t num_coupons)
const {
336 if (source.window_data.size() == 0)
throw std::logic_error(
"window is expected");
337 uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
338 const uint32_t num_pairs = source.table_num_entries;
339 if (num_pairs == 0) {
340 target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
342 if (source.table_data.size() == 0)
throw std::logic_error(
"table is expected");
343 vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
344 lg_k, source.table_data.get_allocator());
346 const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
347 if (pseudo_phase >= 16)
throw std::logic_error(
"unexpected pseudo phase for sliding flavor");
348 const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
350 uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
351 if (offset > 56)
throw std::out_of_range(
"offset out of range");
353 for (uint32_t i = 0; i < num_pairs; i++) {
354 const uint32_t row_col = pairs[i];
355 const uint32_t row = row_col >> 6;
356 uint8_t col = row_col & 63;
358 col = permutation[col];
360 col = (col + (offset + 8)) & 63;
361 pairs[i] = (row << 6) | col;
364 target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
369 void cpc_compressor<A>::compress_surprising_values(
const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result)
const {
370 const uint32_t k = 1 << lg_k;
371 const uint32_t num_pairs =
static_cast<uint32_t
>(pairs.size());
372 const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
373 const uint64_t table_len = safe_length_for_compressed_pair_buf(k, num_pairs, num_base_bits);
374 result.table_data.resize(table_len);
376 uint32_t csv_length = low_level_compress_pairs(pairs.data(),
static_cast<uint32_t
>(pairs.size()), num_base_bits, result.table_data.data());
382 result.table_data_words = csv_length;
383 result.table_num_entries = num_pairs;
387 auto cpc_compressor<A>::uncompress_surprising_values(
const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
388 uint8_t lg_k,
const A& allocator)
const -> vector_u32 {
389 const uint32_t k = 1 << lg_k;
390 vector_u32 pairs(num_pairs, 0, allocator);
391 const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
392 low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
397 void cpc_compressor<A>::compress_sliding_window(
const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target)
const {
398 const uint32_t k = 1 << lg_k;
399 const size_t window_buf_len = safe_length_for_compressed_window_buf(k);
400 target.window_data.resize(window_buf_len);
401 const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
402 size_t data_words = low_level_compress_bytes(window, k, encoding_tables_for_high_entropy_byte[pseudo_phase], target.window_data.data());
408 target.window_data_words =
static_cast<uint32_t
>(data_words);
412 void cpc_compressor<A>::uncompress_sliding_window(
const uint32_t* data, uint32_t data_words, vector_bytes& window,
413 uint8_t lg_k, uint32_t num_coupons)
const {
414 const uint32_t k = 1 << lg_k;
416 const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
417 low_level_uncompress_bytes(window.data(), k, decoding_tables_for_high_entropy_byte[pseudo_phase], data, data_words);
421 size_t cpc_compressor<A>::safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits) {
426 const size_t ybits = num_pairs * (1 + num_base_bits) + (k >> num_base_bits);
427 const size_t xbits = 12 * num_pairs;
428 const size_t padding = num_base_bits > 10 ? 0 : 10 - num_base_bits;
429 return divide_longs_rounding_up(xbits + ybits + padding, 32);
439 size_t cpc_compressor<A>::safe_length_for_compressed_window_buf(uint32_t k) {
440 const size_t bits = 12 * k + 11;
441 return divide_longs_rounding_up(bits, 32);
445 uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint32_t c) {
446 const uint32_t k = 1 << lg_k;
449 if (1000 * c < 2375 * k) {
450 if ( 4 * c < 3 * k)
return 16 + 0;
451 else if ( 10 * c < 11 * k)
return 16 + 1;
452 else if ( 100 * c < 132 * k)
return 16 + 2;
453 else if ( 3 * c < 5 * k)
return 16 + 3;
454 else if (1000 * c < 1965 * k)
return 16 + 4;
455 else if (1000 * c < 2275 * k)
return 16 + 5;
459 if (lg_k < 4)
throw std::logic_error(
"lgK < 4");
460 const size_t tmp = c >> (lg_k - 4);
461 const uint8_t phase = tmp & 15;
462 if (phase >= 16)
throw std::out_of_range(
"wrong phase");
467 static inline void maybe_flush_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, uint32_t* wordarr, uint32_t& wordindex) {
469 wordarr[wordindex++] = bitbuf & 0xffffffff;
470 bitbuf = bitbuf >> 32;
475 static inline void maybe_fill_bitbuf(uint64_t& bitbuf, uint8_t& bufbits,
const uint32_t* wordarr, uint32_t& wordindex, uint8_t minbits) {
476 if (bufbits < minbits) {
477 bitbuf |=
static_cast<uint64_t
>(wordarr[wordindex++]) << bufbits;
485 uint32_t cpc_compressor<A>::low_level_compress_bytes(
486 const uint8_t* byte_array,
487 uint32_t num_bytes_to_encode,
488 const uint16_t* encoding_table,
489 uint32_t* compressed_words
493 uint32_t next_word_index = 0;
495 for (uint32_t byte_index = 0; byte_index < num_bytes_to_encode; byte_index++) {
496 const uint16_t code_info = encoding_table[byte_array[byte_index]];
497 const uint64_t code_val = code_info & 0xfff;
498 const uint8_t code_len = code_info >> 12;
499 bitbuf |= (code_val << bufbits);
501 maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
506 maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
509 if (bufbits >= 32)
throw std::logic_error(
"bufbits >= 32");
510 compressed_words[next_word_index++] = bitbuf & 0xffffffff;
511 bitbuf = 0; bufbits = 0;
513 return next_word_index;
517 void cpc_compressor<A>::low_level_uncompress_bytes(
519 uint32_t num_bytes_to_decode,
520 const uint16_t* decoding_table,
521 const uint32_t* compressed_words,
522 uint32_t num_compressed_words
524 uint32_t word_index = 0;
528 if (byte_array ==
nullptr)
throw std::logic_error(
"byte_array == NULL");
529 if (decoding_table ==
nullptr)
throw std::logic_error(
"decoding_table == NULL");
530 if (compressed_words ==
nullptr)
throw std::logic_error(
"compressed_words == NULL");
532 for (uint32_t byte_index = 0; byte_index < num_bytes_to_decode; byte_index++) {
533 maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12);
535 const size_t peek12 = bitbuf & 0xfff;
536 const uint16_t lookup = decoding_table[peek12];
537 const uint8_t code_word_length = lookup >> 8;
538 const uint8_t decoded_byte = lookup & 0xff;
539 byte_array[byte_index] = decoded_byte;
540 bitbuf >>= code_word_length;
541 bufbits -= code_word_length;
545 if (word_index > num_compressed_words)
throw std::logic_error(
"word_index > num_compressed_words");
548 static inline uint64_t read_unary(
549 const uint32_t* compressed_words,
550 uint32_t& next_word_index,
555 static inline void write_unary(
556 uint32_t* compressed_words,
557 uint32_t& next_word_index_ptr,
558 uint64_t& bit_buf_ptr,
559 uint8_t& buf_bits_ptr,
568 uint32_t cpc_compressor<A>::low_level_compress_pairs(
569 const uint32_t* pair_array,
570 uint32_t num_pairs_to_encode,
571 uint8_t num_base_bits,
572 uint32_t* compressed_words
576 uint32_t next_word_index = 0;
577 const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
578 uint32_t predicted_row_index = 0;
579 uint8_t predicted_col_index = 0;
581 for (uint32_t pair_index = 0; pair_index < num_pairs_to_encode; pair_index++) {
582 const uint32_t row_col = pair_array[pair_index];
583 const uint32_t row_index = row_col >> 6;
584 const uint8_t col_index = row_col & 63;
586 if (row_index != predicted_row_index) predicted_col_index = 0;
588 if (row_index < predicted_row_index)
throw std::logic_error(
"row_index < predicted_row_index");
589 if (col_index < predicted_col_index)
throw std::logic_error(
"col_index < predicted_col_index");
591 const uint32_t y_delta = row_index - predicted_row_index;
592 const uint8_t x_delta = col_index - predicted_col_index;
594 predicted_row_index = row_index;
595 predicted_col_index = col_index + 1;
597 const uint16_t code_info = length_limited_unary_encoding_table65[x_delta];
598 const uint64_t code_val = code_info & 0xfff;
599 const uint8_t code_len =
static_cast<uint8_t
>(code_info >> 12);
600 bitbuf |= code_val << bufbits;
602 maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
604 const uint64_t golomb_lo = y_delta & golomb_lo_mask;
605 const uint64_t golomb_hi = y_delta >> num_base_bits;
607 write_unary(compressed_words, next_word_index, bitbuf, bufbits, golomb_hi);
609 bitbuf |= golomb_lo << bufbits;
610 bufbits += num_base_bits;
611 maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
615 const uint8_t padding = (num_base_bits > 10) ? 0 : 10 - num_base_bits;
617 maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
620 if (bufbits >= 32)
throw std::logic_error(
"bufbits >= 32");
621 compressed_words[next_word_index++] = bitbuf & 0xffffffff;
622 bitbuf = 0; bufbits = 0;
625 return next_word_index;
629 void cpc_compressor<A>::low_level_uncompress_pairs(
630 uint32_t* pair_array,
631 uint32_t num_pairs_to_decode,
632 uint8_t num_base_bits,
633 const uint32_t* compressed_words,
634 uint32_t num_compressed_words
636 uint32_t word_index = 0;
639 const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
640 uint32_t predicted_row_index = 0;
641 uint8_t predicted_col_index = 0;
648 for (uint32_t pair_index = 0; pair_index < num_pairs_to_decode; pair_index++) {
649 maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12);
650 const size_t peek12 = bitbuf & 0xfff;
651 const uint16_t lookup = length_limited_unary_decoding_table65[peek12];
652 const uint8_t code_word_length = lookup >> 8;
653 const int8_t x_delta = lookup & 0xff;
654 bitbuf >>= code_word_length;
655 bufbits -= code_word_length;
657 const uint64_t golomb_hi = read_unary(compressed_words, word_index, bitbuf, bufbits);
659 maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, num_base_bits);
660 const uint64_t golomb_lo = bitbuf & golomb_lo_mask;
661 bitbuf >>= num_base_bits;
662 bufbits -= num_base_bits;
663 const int64_t y_delta = (golomb_hi << num_base_bits) | golomb_lo;
666 if (y_delta > 0) predicted_col_index = 0;
667 const uint32_t row_index =
static_cast<uint32_t
>(predicted_row_index + y_delta);
668 const uint8_t col_index = predicted_col_index + x_delta;
669 const uint32_t row_col = (row_index << 6) | col_index;
670 pair_array[pair_index] = row_col;
671 predicted_row_index = row_index;
672 predicted_col_index = col_index + 1;
674 if (word_index > num_compressed_words)
throw std::logic_error(
"word_index > num_compressed_words");
678 const uint32_t* compressed_words,
679 uint32_t& next_word_index,
683 if (compressed_words ==
nullptr)
throw std::logic_error(
"compressed_words == NULL");
686 maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, next_word_index, 8);
688 const uint8_t peek8 = bitbuf & 0xff;
689 const uint8_t trailing_zeros = byte_trailing_zeros_table[peek8];
691 if (trailing_zeros > 8)
throw std::out_of_range(
"trailing_zeros out of range");
692 if (trailing_zeros < 8) {
693 bufbits -= 1 + trailing_zeros;
694 bitbuf >>= 1 + trailing_zeros;
695 return subtotal + trailing_zeros;
705 uint32_t* compressed_words,
706 uint32_t& next_word_index,
711 if (compressed_words ==
nullptr)
throw std::logic_error(
"compressed_words == NULL");
712 if (bufbits > 31)
throw std::out_of_range(
"bufbits out of range");
714 uint64_t remaining = value;
716 while (remaining >= 16) {
721 maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
724 if (remaining > 15)
throw std::out_of_range(
"remaining out of range");
726 const uint64_t the_unary_code = 1ULL << remaining;
727 bitbuf |= the_unary_code << bufbits;
728 bufbits +=
static_cast<uint8_t
>(remaining + 1);
729 maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
735 auto cpc_compressor<A>::tricky_get_pairs_from_window(
const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
736 uint32_t empty_space,
const A& allocator) -> vector_u32 {
737 const size_t output_length = empty_space + num_pairs_to_get;
738 vector_u32 pairs(output_length, 0, allocator);
739 size_t pair_index = empty_space;
740 for (
unsigned row_index = 0; row_index < k; row_index++) {
741 uint8_t
byte = window[row_index];
743 const uint8_t col_index = byte_trailing_zeros_table[byte];
744 byte =
byte ^ (1 << col_index);
745 pairs[pair_index++] = (row_index << 6) | col_index;
748 if (pair_index != output_length)
throw std::logic_error(
"pair_index != output_length");
755 uint8_t cpc_compressor<A>::golomb_choose_number_of_base_bits(uint32_t k, uint64_t count) {
756 if (k < 1)
throw std::invalid_argument(
"golomb_choose_number_of_base_bits: k < 1");
757 if (count < 1)
throw std::invalid_argument(
"golomb_choose_number_of_base_bits: count < 1");
758 const uint64_t quotient = (k - count) / count;
759 if (quotient == 0)
return 0;
760 else return floor_log2_of_long(quotient);
DataSketches namespace.
Definition: binomial_bounds.hpp:38