datasketches-cpp
cpc_compressor_impl.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 // author Kevin Lang, Oath Research
21 
22 #ifndef CPC_COMPRESSOR_IMPL_HPP_
23 #define CPC_COMPRESSOR_IMPL_HPP_
24 
25 #include <cstdlib>
26 #include <memory>
27 #include <stdexcept>
28 
29 #include "common_defs.hpp"
30 #include "compression_data.hpp"
31 #include "cpc_util.hpp"
32 #include "cpc_common.hpp"
33 #include "count_zeros.hpp"
34 
35 namespace datasketches {
36 
37 // construct on first use
38 template<typename A>
39 cpc_compressor<A>& get_compressor() {
40  static cpc_compressor<A>* instance = new cpc_compressor<A>(); // use new for global initialization
41  static int reg_result = std::atexit(destroy_compressor<A>); // just to clean up a little more nicely; don't worry if it fails
42  unused(reg_result);
43  return *instance;
44 }
45 
46 // register to call compressor destructor at exit
47 template<typename A>
48 void destroy_compressor() {
49  delete std::addressof(get_compressor<A>());
50 }
51 
52 template<typename A>
53 cpc_compressor<A>::cpc_compressor() {
54  make_decoding_tables();
55 }
56 
57 template<typename A>
58 cpc_compressor<A>::~cpc_compressor() {
59  free_decoding_tables();
60 }
61 
62 template<typename A>
63 uint8_t* cpc_compressor<A>::make_inverse_permutation(const uint8_t* permu, unsigned length) {
64  uint8_t* inverse = new uint8_t[length]; // use new for global initialization
65  for (unsigned i = 0; i < length; i++) {
66  inverse[permu[i]] = static_cast<uint8_t>(i);
67  }
68  for (unsigned i = 0; i < length; i++) {
69  if (permu[inverse[i]] != i) throw std::logic_error("inverse permutation error");
70  }
71  return inverse;
72 }
73 
74 /* Given an encoding table that maps unsigned bytes to codewords
75  of length at most 12, this builds a size-4096 decoding table */
76 // The second argument is typically 256, but can be other values such as 65.
77 template<typename A>
78 uint16_t* cpc_compressor<A>::make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values) {
79  uint16_t* decoding_table = new uint16_t[4096]; // use new for global initialization
80  for (unsigned byte_value = 0; byte_value < num_byte_values; byte_value++) {
81  const uint16_t encoding_entry = encoding_table[byte_value];
82  const uint16_t code_value = encoding_entry & 0xfff;
83  const uint8_t code_length = encoding_entry >> 12;
84  const uint16_t decoding_entry = static_cast<uint16_t>((code_length << 8) | byte_value);
85  const uint8_t garbage_length = 12 - code_length;
86  const uint32_t num_copies = 1 << garbage_length;
87  for (uint32_t garbage_bits = 0; garbage_bits < num_copies; garbage_bits++) {
88  const uint16_t extended_code_value = static_cast<uint16_t>(code_value | (garbage_bits << code_length));
89  decoding_table[extended_code_value & 0xfff] = decoding_entry;
90  }
91  }
92  return decoding_table;
93 }
94 
95 template<typename A>
96 void cpc_compressor<A>::validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const {
97  for (int decode_this = 0; decode_this < 4096; decode_this++) {
98  const int tmp_d = decoding_table[decode_this];
99  const int decoded_byte = tmp_d & 0xff;
100  const int decoded_length = tmp_d >> 8;
101 
102  const int tmp_e = encoding_table[decoded_byte];
103  const int encoded_bit_pattern = tmp_e & 0xfff;
104  const int encoded_length = tmp_e >> 12;
105 
106  if (decoded_length != encoded_length) throw std::logic_error("decoded length error");
107  if (encoded_bit_pattern != (decode_this & ((1 << decoded_length) - 1))) throw std::logic_error("bit pattern error");
108  }
109 }
110 
111 template<typename A>
112 void cpc_compressor<A>::make_decoding_tables() {
113  length_limited_unary_decoding_table65 = make_decoding_table(length_limited_unary_encoding_table65, 65);
114  validate_decoding_table(
115  length_limited_unary_decoding_table65,
116  length_limited_unary_encoding_table65
117  );
118 
119  for (int i = 0; i < (16 + 6); i++) {
120  decoding_tables_for_high_entropy_byte[i] = make_decoding_table(encoding_tables_for_high_entropy_byte[i], 256);
121  validate_decoding_table(
122  decoding_tables_for_high_entropy_byte[i],
123  encoding_tables_for_high_entropy_byte[i]
124  );
125  }
126 
127  for (int i = 0; i < 16; i++) {
128  column_permutations_for_decoding[i] = make_inverse_permutation(column_permutations_for_encoding[i], 56);
129  }
130 }
131 
132 template<typename A>
133 void cpc_compressor<A>::free_decoding_tables() {
134  delete[] length_limited_unary_decoding_table65;
135  for (int i = 0; i < (16 + 6); i++) {
136  delete[] decoding_tables_for_high_entropy_byte[i];
137  }
138  for (int i = 0; i < 16; i++) {
139  delete[] column_permutations_for_decoding[i];
140  }
141 }
142 
143 template<typename A>
144 void cpc_compressor<A>::compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
145  switch (source.determine_flavor()) {
146  case cpc_sketch_alloc<A>::flavor::EMPTY:
147  break;
148  case cpc_sketch_alloc<A>::flavor::SPARSE:
149  compress_sparse_flavor(source, result);
150  if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
151  if (result.table_data.size() == 0) throw std::logic_error("table is expected");
152  break;
153  case cpc_sketch_alloc<A>::flavor::HYBRID:
154  compress_hybrid_flavor(source, result);
155  if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
156  if (result.table_data.size() == 0) throw std::logic_error("table is expected");
157  break;
158  case cpc_sketch_alloc<A>::flavor::PINNED:
159  compress_pinned_flavor(source, result);
160  if (result.window_data.size() == 0) throw std::logic_error("window is not expected");
161  break;
162  case cpc_sketch_alloc<A>::flavor::SLIDING:
163  compress_sliding_flavor(source, result);
164  if (result.window_data.size() == 0) throw std::logic_error("window is expected");
165  break;
166  default: throw std::logic_error("Unknown sketch flavor");
167  }
168 }
169 
170 template<typename A>
171 void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
172  switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
173  case cpc_sketch_alloc<A>::flavor::EMPTY:
174  target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
175  break;
176  case cpc_sketch_alloc<A>::flavor::SPARSE:
177  uncompress_sparse_flavor(source, target, lg_k);
178  break;
179  case cpc_sketch_alloc<A>::flavor::HYBRID:
180  uncompress_hybrid_flavor(source, target, lg_k);
181  break;
182  case cpc_sketch_alloc<A>::flavor::PINNED:
183  if (source.window_data.size() == 0) throw std::logic_error("window is expected");
184  uncompress_pinned_flavor(source, target, lg_k, num_coupons);
185  break;
186  case cpc_sketch_alloc<A>::flavor::SLIDING:
187  uncompress_sliding_flavor(source, target, lg_k, num_coupons);
188  break;
189  default: std::logic_error("Unknown sketch flavor");
190  }
191 }
192 
193 template<typename A>
194 void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
195  if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
196  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
197  u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
198  compress_surprising_values(pairs, source.get_lg_k(), result);
199 }
200 
201 template<typename A>
202 void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
203  if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
204  if (source.table_data.size() == 0) throw std::logic_error("table is expected");
205  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
206  lg_k, source.table_data.get_allocator());
207  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
208 }
209 
210 // This is complicated because it effectively builds a Sparse version
211 // of a Pinned sketch before compressing it. Hence the name Hybrid.
212 template<typename A>
213 void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
214  if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
215  if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
216  const uint32_t k = 1 << source.get_lg_k();
217  vector_u32 pairs_from_table = source.surprising_value_table.unwrapping_get_items();
218  const uint32_t num_pairs_from_table = static_cast<uint32_t>(pairs_from_table.size());
219  if (num_pairs_from_table > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, num_pairs_from_table);
220  const uint32_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
221 
222  vector_u32 all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
223 
224  u32_table<A>::merge(
225  pairs_from_table.data(), 0, pairs_from_table.size(),
226  all_pairs.data(), num_pairs_from_table, num_pairs_from_window,
227  all_pairs.data(), 0
228  ); // note the overlapping subarray trick
229 
230  compress_surprising_values(all_pairs, source.get_lg_k(), result);
231 }
232 
233 template<typename A>
234 void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
235  if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
236  if (source.table_data.size() == 0) throw std::logic_error("table is expected");
237  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
238  lg_k, source.table_data.get_allocator());
239 
240  // In the hybrid flavor, some of these pairs actually
241  // belong in the window, so we will separate them out,
242  // moving the "true" pairs to the bottom of the array.
243  const uint32_t k = 1 << lg_k;
244  target.window.resize(k, 0); // important: zero the memory
245  uint32_t next_true_pair = 0;
246  for (uint32_t i = 0; i < source.table_num_entries; i++) {
247  const uint32_t row_col = pairs[i];
248  if (row_col == UINT32_MAX) throw std::logic_error("empty marker is not expected");
249  const uint8_t col = row_col & 63;
250  if (col < 8) {
251  const uint32_t row = row_col >> 6;
252  target.window[row] |= 1 << col; // set the window bit
253  } else {
254  pairs[next_true_pair++] = row_col; // move true pair down
255  }
256  }
257  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k, pairs.get_allocator());
258 }
259 
260 template<typename A>
261 void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
262  compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
263  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
264  if (pairs.size() > 0) {
265  // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
266  // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
267  // we can simply subtract 8 from the pairs themselves.
268 
269  // shift the columns over by 8 positions before compressing (because of the window)
270  for (size_t i = 0; i < pairs.size(); i++) {
271  if ((pairs[i] & 63) < 8) throw std::logic_error("(pairs[i] & 63) < 8");
272  pairs[i] -= 8;
273  }
274 
275  if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
276  compress_surprising_values(pairs, source.get_lg_k(), result);
277  }
278 }
279 
280 template<typename A>
281 void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
282  uint8_t lg_k, uint32_t num_coupons) const {
283  if (source.window_data.size() == 0) throw std::logic_error("window is expected");
284  uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
285  const uint32_t num_pairs = source.table_num_entries;
286  if (num_pairs == 0) {
287  target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
288  } else {
289  if (source.table_data.size() == 0) throw std::logic_error("table is expected");
290  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
291  lg_k, source.table_data.get_allocator());
292  // undo the compressor's 8-column shift
293  for (uint32_t i = 0; i < num_pairs; i++) {
294  if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
295  pairs[i] += 8;
296  }
297  target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
298  }
299 }
300 
301 template<typename A>
302 void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
303  compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
304  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
305  if (pairs.size() > 0) {
306  // Here we apply a complicated transformation to the column indices, which
307  // changes the implied ordering of the pairs, so we must do it before sorting.
308 
309  const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
310  if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
311  const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
312 
313  const uint8_t offset = source.window_offset;
314  if (offset > 56) throw std::out_of_range("offset out of range");
315 
316  for (size_t i = 0; i < pairs.size(); i++) {
317  const uint32_t row_col = pairs[i];
318  const uint32_t row = row_col >> 6;
319  uint8_t col = row_col & 63;
320  // first rotate the columns into a canonical configuration: new = ((old - (offset+8)) + 64) mod 64
321  col = (col + 56 - offset) & 63;
322  if (col >= 56) throw std::out_of_range("col out of range");
323  // then apply the permutation
324  col = permutation[col];
325  pairs[i] = (row << 6) | col;
326  }
327 
328  if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
329  compress_surprising_values(pairs, source.get_lg_k(), result);
330  }
331 }
332 
333 template<typename A>
334 void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
335  uint8_t lg_k, uint32_t num_coupons) const {
336  if (source.window_data.size() == 0) throw std::logic_error("window is expected");
337  uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
338  const uint32_t num_pairs = source.table_num_entries;
339  if (num_pairs == 0) {
340  target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
341  } else {
342  if (source.table_data.size() == 0) throw std::logic_error("table is expected");
343  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
344  lg_k, source.table_data.get_allocator());
345 
346  const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
347  if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
348  const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
349 
350  uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
351  if (offset > 56) throw std::out_of_range("offset out of range");
352 
353  for (uint32_t i = 0; i < num_pairs; i++) {
354  const uint32_t row_col = pairs[i];
355  const uint32_t row = row_col >> 6;
356  uint8_t col = row_col & 63;
357  // first undo the permutation
358  col = permutation[col];
359  // then undo the rotation: old = (new + (offset+8)) mod 64
360  col = (col + (offset + 8)) & 63;
361  pairs[i] = (row << 6) | col;
362  }
363 
364  target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
365  }
366 }
367 
368 template<typename A>
369 void cpc_compressor<A>::compress_surprising_values(const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result) const {
370  const uint32_t k = 1 << lg_k;
371  const uint32_t num_pairs = static_cast<uint32_t>(pairs.size());
372  const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
373  const uint64_t table_len = safe_length_for_compressed_pair_buf(k, num_pairs, num_base_bits);
374  result.table_data.resize(table_len);
375 
376  uint32_t csv_length = low_level_compress_pairs(pairs.data(), static_cast<uint32_t>(pairs.size()), num_base_bits, result.table_data.data());
377 
378  // At this point we could free the unused portion of the compression output buffer,
379  // but it is not necessary if it is temporary
380  // Note: realloc caused strange timing spikes for lgK = 11 and 12.
381 
382  result.table_data_words = csv_length;
383  result.table_num_entries = num_pairs;
384 }
385 
386 template<typename A>
387 auto cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
388  uint8_t lg_k, const A& allocator) const -> vector_u32 {
389  const uint32_t k = 1 << lg_k;
390  vector_u32 pairs(num_pairs, 0, allocator);
391  const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
392  low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
393  return pairs;
394 }
395 
396 template<typename A>
397 void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const {
398  const uint32_t k = 1 << lg_k;
399  const size_t window_buf_len = safe_length_for_compressed_window_buf(k);
400  target.window_data.resize(window_buf_len);
401  const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
402  size_t data_words = low_level_compress_bytes(window, k, encoding_tables_for_high_entropy_byte[pseudo_phase], target.window_data.data());
403 
404  // At this point we could free the unused portion of the compression output buffer,
405  // but it is not necessary if it is temporary
406  // Note: realloc caused strange timing spikes for lgK = 11 and 12.
407 
408  target.window_data_words = static_cast<uint32_t>(data_words);
409 }
410 
411 template<typename A>
412 void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_bytes& window,
413  uint8_t lg_k, uint32_t num_coupons) const {
414  const uint32_t k = 1 << lg_k;
415  window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
416  const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
417  low_level_uncompress_bytes(window.data(), k, decoding_tables_for_high_entropy_byte[pseudo_phase], data, data_words);
418 }
419 
420 template<typename A>
421 size_t cpc_compressor<A>::safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits) {
422  // Long ybits = k + numPairs; // simpler and safer UB
423  // The following tighter UB on ybits is based on page 198
424  // of the textbook "Managing Gigabytes" by Witten, Moffat, and Bell.
425  // Notice that if numBaseBits == 0 it coincides with (k + numPairs).
426  const size_t ybits = num_pairs * (1 + num_base_bits) + (k >> num_base_bits);
427  const size_t xbits = 12 * num_pairs;
428  const size_t padding = num_base_bits > 10 ? 0 : 10 - num_base_bits;
429  return divide_longs_rounding_up(xbits + ybits + padding, 32);
430 }
431 
432 // Explanation of padding: we write
433 // 1) xdelta (huffman, provides at least 1 bit, requires 12-bit lookahead)
434 // 2) ydeltaGolombHi (unary, provides at least 1 bit, requires 8-bit lookahead)
435 // 3) ydeltaGolombLo (straight B bits).
436 // So the 12-bit lookahead is the tight constraint, but there are at least (2 + B) bits emitted,
437 // so we would be safe with max (0, 10 - B) bits of padding at the end of the bitstream.
438 template<typename A>
439 size_t cpc_compressor<A>::safe_length_for_compressed_window_buf(uint32_t k) { // measured in 32-bit words
440  const size_t bits = 12 * k + 11; // 11 bits of padding, due to 12-bit lookahead, with 1 bit certainly present.
441  return divide_longs_rounding_up(bits, 32);
442 }
443 
444 template<typename A>
445 uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint32_t c) {
446  const uint32_t k = 1 << lg_k;
447  // This mid-range logic produces pseudo-phases. They are used to select encoding tables.
448  // The thresholds were chosen by hand after looking at plots of measured compression.
449  if (1000 * c < 2375 * k) {
450  if ( 4 * c < 3 * k) return 16 + 0; // mid-range table
451  else if ( 10 * c < 11 * k) return 16 + 1; // mid-range table
452  else if ( 100 * c < 132 * k) return 16 + 2; // mid-range table
453  else if ( 3 * c < 5 * k) return 16 + 3; // mid-range table
454  else if (1000 * c < 1965 * k) return 16 + 4; // mid-range table
455  else if (1000 * c < 2275 * k) return 16 + 5; // mid-range table
456  else return 6; // steady-state table employed before its actual phase
457  } else { // This steady-state logic produces true phases. They are used to select
458  // encoding tables, and also column permutations for the "Sliding" flavor.
459  if (lg_k < 4) throw std::logic_error("lgK < 4");
460  const size_t tmp = c >> (lg_k - 4);
461  const uint8_t phase = tmp & 15;
462  if (phase >= 16) throw std::out_of_range("wrong phase");
463  return phase;
464  }
465 }
466 
467 static inline void maybe_flush_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, uint32_t* wordarr, uint32_t& wordindex) {
468  if (bufbits >= 32) {
469  wordarr[wordindex++] = bitbuf & 0xffffffff;
470  bitbuf = bitbuf >> 32;
471  bufbits -= 32;
472  }
473 }
474 
475 static inline void maybe_fill_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, const uint32_t* wordarr, uint32_t& wordindex, uint8_t minbits) {
476  if (bufbits < minbits) {
477  bitbuf |= static_cast<uint64_t>(wordarr[wordindex++]) << bufbits;
478  bufbits += 32;
479  }
480 }
481 
482 // This returns the number of compressed words that were actually used.
483 // It is the caller's responsibility to ensure that the compressed_words array is long enough.
484 template<typename A>
485 uint32_t cpc_compressor<A>::low_level_compress_bytes(
486  const uint8_t* byte_array, // input
487  uint32_t num_bytes_to_encode,
488  const uint16_t* encoding_table,
489  uint32_t* compressed_words // output
490 ) const {
491  uint64_t bitbuf = 0; // bits are packed into this first, then are flushed to compressed_words
492  uint8_t bufbits = 0; // number of bits currently in bitbuf; must be between 0 and 31
493  uint32_t next_word_index = 0;
494 
495  for (uint32_t byte_index = 0; byte_index < num_bytes_to_encode; byte_index++) {
496  const uint16_t code_info = encoding_table[byte_array[byte_index]];
497  const uint64_t code_val = code_info & 0xfff;
498  const uint8_t code_len = code_info >> 12;
499  bitbuf |= (code_val << bufbits);
500  bufbits += code_len;
501  maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
502  }
503 
504  // Pad the bitstream with 11 zero-bits so that the decompressor's 12-bit peek can't overrun its input.
505  bufbits += 11;
506  maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
507 
508  if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer.
509  if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
510  compressed_words[next_word_index++] = bitbuf & 0xffffffff;
511  bitbuf = 0; bufbits = 0; // not really necessary
512  }
513  return next_word_index;
514 }
515 
516 template<typename A>
517 void cpc_compressor<A>::low_level_uncompress_bytes(
518  uint8_t* byte_array, // output
519  uint32_t num_bytes_to_decode,
520  const uint16_t* decoding_table,
521  const uint32_t* compressed_words, // input
522  uint32_t num_compressed_words
523 ) const {
524  uint32_t word_index = 0;
525  uint64_t bitbuf = 0;
526  uint8_t bufbits = 0;
527 
528  if (byte_array == nullptr) throw std::logic_error("byte_array == NULL");
529  if (decoding_table == nullptr) throw std::logic_error("decoding_table == NULL");
530  if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
531 
532  for (uint32_t byte_index = 0; byte_index < num_bytes_to_decode; byte_index++) {
533  maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
534 
535  const size_t peek12 = bitbuf & 0xfff; // These 12 bits will include an entire Huffman codeword.
536  const uint16_t lookup = decoding_table[peek12];
537  const uint8_t code_word_length = lookup >> 8;
538  const uint8_t decoded_byte = lookup & 0xff;
539  byte_array[byte_index] = decoded_byte;
540  bitbuf >>= code_word_length;
541  bufbits -= code_word_length;
542  }
543  // Buffer over-run should be impossible unless there is a bug.
544  // However, we might as well check here.
545  if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words");
546 }
547 
548 static inline uint64_t read_unary(
549  const uint32_t* compressed_words,
550  uint32_t& next_word_index,
551  uint64_t& bitbuf,
552  uint8_t& bufbits
553 );
554 
555 static inline void write_unary(
556  uint32_t* compressed_words,
557  uint32_t& next_word_index_ptr,
558  uint64_t& bit_buf_ptr,
559  uint8_t& buf_bits_ptr,
560  uint64_t value
561 );
562 
563 // Here "pairs" refers to row/column pairs that specify
564 // the positions of surprising values in the bit matrix.
565 
566 // returns the number of compressed_words actually used
567 template<typename A>
568 uint32_t cpc_compressor<A>::low_level_compress_pairs(
569  const uint32_t* pair_array, // input
570  uint32_t num_pairs_to_encode,
571  uint8_t num_base_bits,
572  uint32_t* compressed_words // output
573 ) const {
574  uint64_t bitbuf = 0;
575  uint8_t bufbits = 0;
576  uint32_t next_word_index = 0;
577  const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
578  uint32_t predicted_row_index = 0;
579  uint8_t predicted_col_index = 0;
580 
581  for (uint32_t pair_index = 0; pair_index < num_pairs_to_encode; pair_index++) {
582  const uint32_t row_col = pair_array[pair_index];
583  const uint32_t row_index = row_col >> 6;
584  const uint8_t col_index = row_col & 63;
585 
586  if (row_index != predicted_row_index) predicted_col_index = 0;
587 
588  if (row_index < predicted_row_index) throw std::logic_error("row_index < predicted_row_index");
589  if (col_index < predicted_col_index) throw std::logic_error("col_index < predicted_col_index");
590 
591  const uint32_t y_delta = row_index - predicted_row_index;
592  const uint8_t x_delta = col_index - predicted_col_index;
593 
594  predicted_row_index = row_index;
595  predicted_col_index = col_index + 1;
596 
597  const uint16_t code_info = length_limited_unary_encoding_table65[x_delta];
598  const uint64_t code_val = code_info & 0xfff;
599  const uint8_t code_len = static_cast<uint8_t>(code_info >> 12);
600  bitbuf |= code_val << bufbits;
601  bufbits += code_len;
602  maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
603 
604  const uint64_t golomb_lo = y_delta & golomb_lo_mask;
605  const uint64_t golomb_hi = y_delta >> num_base_bits;
606 
607  write_unary(compressed_words, next_word_index, bitbuf, bufbits, golomb_hi);
608 
609  bitbuf |= golomb_lo << bufbits;
610  bufbits += num_base_bits;
611  maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
612  }
613 
614  // Pad the bitstream so that the decompressor's 12-bit peek can't overrun its input.
615  const uint8_t padding = (num_base_bits > 10) ? 0 : 10 - num_base_bits;
616  bufbits += padding;
617  maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
618 
619  if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer
620  if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
621  compressed_words[next_word_index++] = bitbuf & 0xffffffff;
622  bitbuf = 0; bufbits = 0; // not really necessary
623  }
624 
625  return next_word_index;
626 }
627 
628 template<typename A>
629 void cpc_compressor<A>::low_level_uncompress_pairs(
630  uint32_t* pair_array, // output
631  uint32_t num_pairs_to_decode,
632  uint8_t num_base_bits,
633  const uint32_t* compressed_words, // input
634  uint32_t num_compressed_words
635 ) const {
636  uint32_t word_index = 0;
637  uint64_t bitbuf = 0;
638  uint8_t bufbits = 0;
639  const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
640  uint32_t predicted_row_index = 0;
641  uint8_t predicted_col_index = 0;
642 
643  // for each pair we need to read:
644  // x_delta (12-bit length-limited unary)
645  // y_delta_hi (unary)
646  // y_delta_lo (basebits)
647 
648  for (uint32_t pair_index = 0; pair_index < num_pairs_to_decode; pair_index++) {
649  maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
650  const size_t peek12 = bitbuf & 0xfff;
651  const uint16_t lookup = length_limited_unary_decoding_table65[peek12];
652  const uint8_t code_word_length = lookup >> 8;
653  const int8_t x_delta = lookup & 0xff;
654  bitbuf >>= code_word_length;
655  bufbits -= code_word_length;
656 
657  const uint64_t golomb_hi = read_unary(compressed_words, word_index, bitbuf, bufbits);
658 
659  maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, num_base_bits); // ensure num_base_bits in bit buffer
660  const uint64_t golomb_lo = bitbuf & golomb_lo_mask;
661  bitbuf >>= num_base_bits;
662  bufbits -= num_base_bits;
663  const int64_t y_delta = (golomb_hi << num_base_bits) | golomb_lo;
664 
665  // Now that we have x_delta and y_delta, we can compute the pair's row and column
666  if (y_delta > 0) predicted_col_index = 0;
667  const uint32_t row_index = static_cast<uint32_t>(predicted_row_index + y_delta);
668  const uint8_t col_index = predicted_col_index + x_delta;
669  const uint32_t row_col = (row_index << 6) | col_index;
670  pair_array[pair_index] = row_col;
671  predicted_row_index = row_index;
672  predicted_col_index = col_index + 1;
673  }
674  if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words"); // check for buffer over-run
675 }
676 
677 uint64_t read_unary(
678  const uint32_t* compressed_words,
679  uint32_t& next_word_index,
680  uint64_t& bitbuf,
681  uint8_t& bufbits
682 ) {
683  if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
684  size_t subtotal = 0;
685  while (true) {
686  maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, next_word_index, 8); // ensure 8 bits in bit buffer
687 
688  const uint8_t peek8 = bitbuf & 0xff; // These 8 bits include either all or part of the Unary codeword
689  const uint8_t trailing_zeros = byte_trailing_zeros_table[peek8];
690 
691  if (trailing_zeros > 8) throw std::out_of_range("trailing_zeros out of range");
692  if (trailing_zeros < 8) {
693  bufbits -= 1 + trailing_zeros;
694  bitbuf >>= 1 + trailing_zeros;
695  return subtotal + trailing_zeros;
696  }
697  // The codeword was partial, so read some more
698  subtotal += 8;
699  bufbits -= 8;
700  bitbuf >>= 8;
701  }
702 }
703 
704 void write_unary(
705  uint32_t* compressed_words,
706  uint32_t& next_word_index,
707  uint64_t& bitbuf,
708  uint8_t& bufbits,
709  uint64_t value
710 ) {
711  if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
712  if (bufbits > 31) throw std::out_of_range("bufbits out of range");
713 
714  uint64_t remaining = value;
715 
716  while (remaining >= 16) {
717  remaining -= 16;
718  // Here we output 16 zeros, but we don't need to physically write them into bitbuf
719  // because it already contains zeros in that region.
720  bufbits += 16; // Record the fact that 16 bits of output have occurred.
721  maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
722  }
723 
724  if (remaining > 15) throw std::out_of_range("remaining out of range");
725 
726  const uint64_t the_unary_code = 1ULL << remaining;
727  bitbuf |= the_unary_code << bufbits;
728  bufbits += static_cast<uint8_t>(remaining + 1);
729  maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
730 }
731 
732 // The empty space that this leaves at the beginning of the output array
733 // will be filled in later by the caller.
734 template<typename A>
735 auto cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
736  uint32_t empty_space, const A& allocator) -> vector_u32 {
737  const size_t output_length = empty_space + num_pairs_to_get;
738  vector_u32 pairs(output_length, 0, allocator);
739  size_t pair_index = empty_space;
740  for (unsigned row_index = 0; row_index < k; row_index++) {
741  uint8_t byte = window[row_index];
742  while (byte != 0) {
743  const uint8_t col_index = byte_trailing_zeros_table[byte];
744  byte = byte ^ (1 << col_index); // erase the 1
745  pairs[pair_index++] = (row_index << 6) | col_index;
746  }
747  }
748  if (pair_index != output_length) throw std::logic_error("pair_index != output_length");
749  return pairs;
750 }
751 
752 // returns an integer that is between
753 // zero and ceiling(log_2(k)) - 1, inclusive
754 template<typename A>
755 uint8_t cpc_compressor<A>::golomb_choose_number_of_base_bits(uint32_t k, uint64_t count) {
756  if (k < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: k < 1");
757  if (count < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: count < 1");
758  const uint64_t quotient = (k - count) / count; // integer division
759  if (quotient == 0) return 0;
760  else return floor_log2_of_long(quotient);
761 }
762 
763 } /* namespace datasketches */
764 
765 #endif
DataSketches namespace.
Definition: binomial_bounds.hpp:38