20 #ifndef KLL_HELPER_IMPL_HPP_
21 #define KLL_HELPER_IMPL_HPP_
26 #include "common_defs.hpp"
30 bool kll_helper::is_even(uint32_t value) {
31 return (value & 1) == 0;
34 bool kll_helper::is_odd(uint32_t value) {
35 return (value & 1) > 0;
38 uint8_t kll_helper::floor_of_log2_of_fraction(uint64_t numer, uint64_t denom) {
39 if (denom > numer)
return 0;
43 if (denom > numer)
return count;
48 uint8_t kll_helper::ub_on_num_levels(uint64_t n) {
50 return 1 + floor_of_log2_of_fraction(n, 1);
53 uint32_t kll_helper::compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels) {
55 for (uint8_t h = 0; h < num_levels; h++) {
56 total += level_capacity(k, num_levels, h, m);
61 uint16_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
62 if (height >= numLevels)
throw std::invalid_argument(
"height >= numLevels");
63 const uint8_t depth = numLevels - height - 1;
64 return std::max<uint16_t>(min_wid, int_cap_aux(k, depth));
67 uint16_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
68 if (depth > 60)
throw std::invalid_argument(
"depth > 60");
69 if (depth <= 30)
return int_cap_aux_aux(k, depth);
70 const uint8_t half = depth / 2;
71 const uint8_t rest = depth - half;
72 const uint16_t tmp = int_cap_aux_aux(k, half);
73 return int_cap_aux_aux(tmp, rest);
76 uint16_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
77 if (depth > 30)
throw std::invalid_argument(
"depth > 30");
78 const uint64_t twok = k << 1;
79 const uint64_t tmp = (uint64_t) (((uint64_t) twok << depth) / powers_of_three[depth]);
80 const uint64_t result = (tmp + 1) >> 1;
81 if (result > k)
throw std::logic_error(
"result > k");
82 return static_cast<uint16_t
>(result);
85 uint64_t kll_helper::sum_the_sample_weights(uint8_t num_levels,
const uint32_t* levels) {
88 for (uint8_t lvl = 0; lvl < num_levels; lvl++) {
89 total += weight * (levels[lvl + 1] - levels[lvl]);
96 void kll_helper::randomly_halve_down(T* buf, uint32_t start, uint32_t length) {
97 if (!is_even(length))
throw std::invalid_argument(
"length must be even");
98 const uint32_t half_length = length / 2;
100 const uint32_t offset = deterministic_offset();
102 const uint32_t offset = random_utils::random_bit();
104 uint32_t j = start + offset;
105 for (uint32_t i = start; i < (start + half_length); i++) {
106 if (i != j) buf[i] = std::move(buf[j]);
111 template <
typename T>
112 void kll_helper::randomly_halve_up(T* buf, uint32_t start, uint32_t length) {
113 if (!is_even(length))
throw std::invalid_argument(
"length must be even");
114 const uint32_t half_length = length / 2;
115 #ifdef KLL_VALIDATION
116 const uint32_t offset = deterministic_offset();
118 const uint32_t offset = random_utils::random_bit();
120 uint32_t j = (start + length) - 1 - offset;
121 for (uint32_t i = (start + length) - 1; i >= (start + half_length); i--) {
122 if (i != j) buf[i] = std::move(buf[j]);
130 template <
typename T,
typename C>
131 void kll_helper::merge_sorted_arrays(T* buf, uint32_t start_a, uint32_t len_a, uint32_t start_b, uint32_t len_b, uint32_t start_c) {
132 const uint32_t len_c = len_a + len_b;
133 const uint32_t lim_a = start_a + len_a;
134 const uint32_t lim_b = start_b + len_b;
135 const uint32_t lim_c = start_c + len_c;
137 uint32_t a = start_a;
138 uint32_t b = start_b;
140 for (uint32_t c = start_c; c < lim_c; c++) {
142 if (b != c) buf[c] = std::move(buf[b]);
144 }
else if (b == lim_b) {
145 if (a != c) buf[c] = std::move(buf[a]);
147 }
else if (C()(buf[a], buf[b])) {
148 if (a != c) buf[c] = std::move(buf[a]);
151 if (b != c) buf[c] = std::move(buf[b]);
155 if (a != lim_a || b != lim_b)
throw std::logic_error(
"inconsistent state");
162 template <
typename T,
typename C>
163 void kll_helper::merge_sorted_arrays(
const T* buf_a, uint32_t start_a, uint32_t len_a,
const T* buf_b, uint32_t start_b, uint32_t len_b, T* buf_c, uint32_t start_c) {
164 const uint32_t len_c = len_a + len_b;
165 const uint32_t lim_a = start_a + len_a;
166 const uint32_t lim_b = start_b + len_b;
167 const uint32_t lim_c = start_c + len_c;
169 uint32_t a = start_a;
170 uint32_t b = start_b;
172 for (uint32_t c = start_c; c < lim_c; c++) {
174 new (&buf_c[c]) T(buf_b[b++]);
175 }
else if (b == lim_b) {
176 new (&buf_c[c]) T(std::move(buf_a[a]));
178 }
else if (C()(buf_a[a], buf_b[b])) {
179 new (&buf_c[c]) T(std::move(buf_a[a]));
182 new (&buf_c[c]) T(buf_b[b++]);
185 if (a != lim_a || b != lim_b)
throw std::logic_error(
"inconsistent state");
205 template <
typename T,
typename C>
206 kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m, uint8_t num_levels_in, T* items,
207 uint32_t* in_levels, uint32_t* out_levels,
bool is_level_zero_sorted)
209 if (num_levels_in == 0)
throw std::invalid_argument(
"num_levels_in == 0");
210 const uint32_t starting_item_count = in_levels[num_levels_in] - in_levels[0];
211 uint8_t current_num_levels = num_levels_in;
212 uint32_t current_item_count = starting_item_count;
213 uint32_t target_item_count = compute_total_capacity(k, m, current_num_levels);
214 bool done_yet =
false;
216 uint8_t current_level = 0;
221 if (current_level == (current_num_levels - 1)) {
222 in_levels[current_level + 2] = in_levels[current_level + 1];
225 const auto raw_beg = in_levels[current_level];
226 const auto raw_lim = in_levels[current_level + 1];
227 const auto raw_pop = raw_lim - raw_beg;
229 if ((current_item_count < target_item_count) || (raw_pop < level_capacity(k, current_num_levels, current_level, m))) {
232 if (raw_beg < out_levels[current_level])
throw std::logic_error(
"wrong move");
233 if (raw_beg != out_levels[current_level])
234 std::move(items + raw_beg, items + raw_lim, items + out_levels[current_level]);
235 out_levels[current_level + 1] = out_levels[current_level] + raw_pop;
240 const auto pop_above = in_levels[current_level + 2] - raw_lim;
241 const bool odd_pop = is_odd(raw_pop);
242 const auto adj_beg = odd_pop ? 1 + raw_beg : raw_beg;
243 const auto adj_pop = odd_pop ? raw_pop - 1 : raw_pop;
244 const auto half_adj_pop = adj_pop / 2;
247 if (out_levels[current_level] != raw_beg)
248 items[out_levels[current_level]] = std::move(items[raw_beg]);
249 out_levels[current_level + 1] = out_levels[current_level] + 1;
251 out_levels[current_level + 1] = out_levels[current_level];
255 if ((current_level == 0) && !is_level_zero_sorted) {
256 std::sort(items + adj_beg, items + adj_beg + adj_pop, C());
259 if (pop_above == 0) {
260 randomly_halve_up(items, adj_beg, adj_pop);
262 randomly_halve_down(items, adj_beg, adj_pop);
263 merge_sorted_arrays<T, C>(items, adj_beg, half_adj_pop, raw_lim, pop_above, adj_beg + half_adj_pop);
267 current_item_count -= half_adj_pop;
270 in_levels[current_level + 1] = in_levels[current_level + 1] - half_adj_pop;
274 if (current_level == (current_num_levels - 1)) {
275 current_num_levels++;
276 target_item_count += level_capacity(k, current_num_levels, 0, m);
283 if (current_level == (current_num_levels - 1)) done_yet =
true;
287 if ((out_levels[current_num_levels] - out_levels[0]) != current_item_count)
throw std::logic_error(
"inconsistent state");
289 for (uint32_t i = current_item_count; i < starting_item_count; i++) items[i].~T();
291 compress_result result;
292 result.final_num_levels = current_num_levels;
293 result.final_capacity = target_item_count;
294 result.final_num_items = current_item_count;
299 void kll_helper::copy_construct(
const T* src,
size_t src_first,
size_t src_last, T* dst,
size_t dst_first) {
300 while (src_first != src_last) {
301 new (&dst[dst_first++]) T(src[src_first++]);
306 void kll_helper::move_construct(T* src,
size_t src_first,
size_t src_last, T* dst,
size_t dst_first,
bool destroy) {
307 while (src_first != src_last) {
308 new (&dst[dst_first++]) T(std::move(src[src_first]));
309 if (destroy) src[src_first].~T();
314 #ifdef KLL_VALIDATION
315 uint32_t kll_helper::deterministic_offset() {
316 const uint32_t result(kll_next_offset);
317 kll_next_offset = 1 - kll_next_offset;
DataSketches namespace.
Definition: binomial_bounds.hpp:38