datasketches-cpp
kll_sketch_impl.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef KLL_SKETCH_IMPL_HPP_
21 #define KLL_SKETCH_IMPL_HPP_
22 
23 #include <iostream>
24 #include <iomanip>
25 #include <sstream>
26 #include <stdexcept>
27 
28 #include "conditional_forward.hpp"
29 #include "count_zeros.hpp"
30 #include "memory_operations.hpp"
31 #include "kll_helper.hpp"
32 
33 namespace datasketches {
34 
35 template<typename T, typename C, typename A>
36 kll_sketch<T, C, A>::kll_sketch(uint16_t k, const C& comparator, const A& allocator):
37 comparator_(comparator),
38 allocator_(allocator),
39 k_(k),
40 m_(kll_constants::DEFAULT_M),
41 min_k_(k),
42 num_levels_(1),
43 is_level_zero_sorted_(false),
44 n_(0),
45 levels_(2, 0, allocator),
46 items_(nullptr),
47 items_size_(k_),
48 min_item_(),
49 max_item_(),
50 sorted_view_(nullptr)
51 {
52  if (k < kll_constants::MIN_K || k > kll_constants::MAX_K) {
53  throw std::invalid_argument("K must be >= " + std::to_string(kll_constants::MIN_K) + " and <= "
54  + std::to_string(kll_constants::MAX_K) + ": " + std::to_string(k));
55  }
56  levels_[0] = levels_[1] = k;
57  items_ = allocator_.allocate(items_size_);
58 }
59 
60 template<typename T, typename C, typename A>
62 comparator_(other.comparator_),
63 allocator_(other.allocator_),
64 k_(other.k_),
65 m_(other.m_),
66 min_k_(other.min_k_),
67 num_levels_(other.num_levels_),
68 is_level_zero_sorted_(other.is_level_zero_sorted_),
69 n_(other.n_),
70 levels_(other.levels_),
71 items_(nullptr),
72 items_size_(other.items_size_),
73 min_item_(other.min_item_),
74 max_item_(other.max_item_),
75 sorted_view_(nullptr)
76 {
77  items_ = allocator_.allocate(items_size_);
78  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
79 }
80 
81 template<typename T, typename C, typename A>
83 comparator_(std::move(other.comparator_)),
84 allocator_(std::move(other.allocator_)),
85 k_(other.k_),
86 m_(other.m_),
87 min_k_(other.min_k_),
88 num_levels_(other.num_levels_),
89 is_level_zero_sorted_(other.is_level_zero_sorted_),
90 n_(other.n_),
91 levels_(std::move(other.levels_)),
92 items_(other.items_),
93 items_size_(other.items_size_),
94 min_item_(std::move(other.min_item_)),
95 max_item_(std::move(other.max_item_)),
96 sorted_view_(nullptr)
97 {
98  other.items_ = nullptr;
99 }
100 
101 template<typename T, typename C, typename A>
103  kll_sketch copy(other);
104  std::swap(comparator_, copy.comparator_);
105  std::swap(allocator_, copy.allocator_);
106  std::swap(k_, copy.k_);
107  std::swap(m_, copy.m_);
108  std::swap(min_k_, copy.min_k_);
109  std::swap(num_levels_, copy.num_levels_);
110  std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
111  std::swap(n_, copy.n_);
112  std::swap(levels_, copy.levels_);
113  std::swap(items_, copy.items_);
114  std::swap(items_size_, copy.items_size_);
115  std::swap(min_item_, copy.min_item_);
116  std::swap(max_item_, copy.max_item_);
117  reset_sorted_view();
118  return *this;
119 }
120 
121 template<typename T, typename C, typename A>
123  std::swap(comparator_, other.comparator_);
124  std::swap(allocator_, other.allocator_);
125  std::swap(k_, other.k_);
126  std::swap(m_, other.m_);
127  std::swap(min_k_, other.min_k_);
128  std::swap(num_levels_, other.num_levels_);
129  std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
130  std::swap(n_, other.n_);
131  std::swap(levels_, other.levels_);
132  std::swap(items_, other.items_);
133  std::swap(items_size_, other.items_size_);
134  std::swap(min_item_, other.min_item_);
135  std::swap(max_item_, other.max_item_);
136  reset_sorted_view();
137  return *this;
138 }
139 
140 template<typename T, typename C, typename A>
142  if (items_ != nullptr) {
143  const uint32_t begin = levels_[0];
144  const uint32_t end = levels_[num_levels_];
145  for (uint32_t i = begin; i < end; i++) items_[i].~T();
146  allocator_.deallocate(items_, items_size_);
147  }
148  reset_sorted_view();
149 }
150 
151 template<typename T, typename C, typename A>
152 template<typename TT, typename CC, typename AA>
153 kll_sketch<T, C, A>::kll_sketch(const kll_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
154 comparator_(comparator),
155 allocator_(allocator),
156 k_(other.k_),
157 m_(other.m_),
158 min_k_(other.min_k_),
159 num_levels_(other.num_levels_),
160 is_level_zero_sorted_(other.is_level_zero_sorted_),
161 n_(other.n_),
162 levels_(other.levels_, allocator_),
163 items_(nullptr),
164 items_size_(other.items_size_),
165 min_item_(other.min_item_),
166 max_item_(other.max_item_),
167 sorted_view_(nullptr)
168 {
169  static_assert(
170  std::is_constructible<T, TT>::value,
171  "Type converting constructor requires new type to be constructible from existing type"
172  );
173  items_ = allocator_.allocate(items_size_);
174  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
175  check_sorting();
176 }
177 
178 template<typename T, typename C, typename A>
179 template<typename FwdT>
180 void kll_sketch<T, C, A>::update(FwdT&& item) {
181  if (!check_update_item(item)) { return; }
182  update_min_max(static_cast<const T&>(item)); // min and max are always copies
183  const uint32_t index = internal_update();
184  new (&items_[index]) T(std::forward<FwdT>(item));
185  reset_sorted_view();
186 }
187 
188 template<typename T, typename C, typename A>
189 void kll_sketch<T, C, A>::update_min_max(const T& item) {
190  if (is_empty()) {
191  min_item_.emplace(item);
192  max_item_.emplace(item);
193  } else {
194  if (comparator_(item, *min_item_)) *min_item_ = item;
195  if (comparator_(*max_item_, item)) *max_item_ = item;
196  }
197 }
198 
199 template<typename T, typename C, typename A>
200 uint32_t kll_sketch<T, C, A>::internal_update() {
201  if (levels_[0] == 0) compress_while_updating();
202  n_++;
203  is_level_zero_sorted_ = false;
204  return --levels_[0];
205 }
206 
207 template<typename T, typename C, typename A>
208 template<typename FwdSk>
209 void kll_sketch<T, C, A>::merge(FwdSk&& other) {
210  if (other.is_empty()) return;
211  if (m_ != other.m_) {
212  throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
213  }
214  if (is_empty()) {
215  min_item_.emplace(conditional_forward<FwdSk>(*other.min_item_));
216  max_item_.emplace(conditional_forward<FwdSk>(*other.max_item_));
217  } else {
218  if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
219  if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
220  }
221  const uint64_t final_n = n_ + other.n_;
222  for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
223  const uint32_t index = internal_update();
224  new (&items_[index]) T(conditional_forward<FwdSk>(other.items_[i]));
225  }
226  if (other.num_levels_ >= 2) merge_higher_levels(other, final_n);
227  n_ = final_n;
228  if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
229  assert_correct_total_weight();
230  reset_sorted_view();
231 }
232 
233 template<typename T, typename C, typename A>
235  return n_ == 0;
236 }
237 
238 template<typename T, typename C, typename A>
239 uint16_t kll_sketch<T, C, A>::get_k() const {
240  return k_;
241 }
242 
243 template<typename T, typename C, typename A>
244 uint64_t kll_sketch<T, C, A>::get_n() const {
245  return n_;
246 }
247 
248 template<typename T, typename C, typename A>
250  return levels_[num_levels_] - levels_[0];
251 }
252 
253 template<typename T, typename C, typename A>
255  return num_levels_ > 1;
256 }
257 
258 template<typename T, typename C, typename A>
260  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
261  return *min_item_;
262 }
263 
264 template<typename T, typename C, typename A>
266  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
267  return *max_item_;
268 }
269 
270 template<typename T, typename C, typename A>
272  return comparator_;
273 }
274 
275 template<typename T, typename C, typename A>
277  return allocator_;
278 }
279 
280 template<typename T, typename C, typename A>
281 double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
282  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
283  setup_sorted_view();
284  return sorted_view_->get_rank(item, inclusive);
285 }
286 
287 template<typename T, typename C, typename A>
288 auto kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
289  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
290  setup_sorted_view();
291  return sorted_view_->get_PMF(split_points, size, inclusive);
292 }
293 
294 template<typename T, typename C, typename A>
295 auto kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
296  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
297  setup_sorted_view();
298  return sorted_view_->get_CDF(split_points, size, inclusive);
299 }
300 
301 template<typename T, typename C, typename A>
302 auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
303  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
304  if ((rank < 0.0) || (rank > 1.0)) {
305  throw std::invalid_argument("normalized rank cannot be less than zero or greater than 1.0");
306  }
307  // may have a side effect of sorting level zero if needed
308  setup_sorted_view();
309  return sorted_view_->get_quantile(rank, inclusive);
310 }
311 
312 template<typename T, typename C, typename A>
314  return get_normalized_rank_error(min_k_, pmf);
315 }
316 
317 // implementation for fixed-size arithmetic types (integral and floating point)
318 template<typename T, typename C, typename A>
319 template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
321  if (is_empty()) { return EMPTY_SIZE_BYTES; }
322  if (num_levels_ == 1 && get_num_retained() == 1) {
323  return DATA_START_SINGLE_ITEM + sizeof(TT);
324  }
325  // the last integer in the levels_ array is not serialized because it can be derived
326  return DATA_START + num_levels_ * sizeof(uint32_t) + (get_num_retained() + 2) * sizeof(TT);
327 }
328 
329 // implementation for all other types
330 template<typename T, typename C, typename A>
331 template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
332 size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
333  if (is_empty()) { return EMPTY_SIZE_BYTES; }
334  if (num_levels_ == 1 && get_num_retained() == 1) {
335  return DATA_START_SINGLE_ITEM + sd.size_of_item(items_[levels_[0]]);
336  }
337  // the last integer in the levels_ array is not serialized because it can be derived
338  size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
339  size += sd.size_of_item(*min_item_);
340  size += sd.size_of_item(*max_item_);
341  for (auto it: *this) size += sd.size_of_item(it.first);
342  return size;
343 }
344 
345 // implementation for fixed-size arithmetic types (integral and floating point)
346 template<typename T, typename C, typename A>
347 template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
349  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
350  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, kll_constants::DEFAULT_M, num_levels);
351  // the last integer in the levels_ array is not serialized because it can be derived
352  return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * sizeof(TT);
353 }
354 
355 // implementation for all other types
356 template<typename T, typename C, typename A>
357 template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
358 size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
359  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
360  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, kll_constants::DEFAULT_M, num_levels);
361  // the last integer in the levels_ array is not serialized because it can be derived
362  return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * max_item_size_bytes;
363 }
364 
365 template<typename T, typename C, typename A>
366 template<typename SerDe>
367 void kll_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
368  const bool is_single_item = n_ == 1;
369  const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
370  write(os, preamble_ints);
371  const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
372  write(os, serial_version);
373  const uint8_t family(FAMILY);
374  write(os, family);
375  const uint8_t flags_byte(
376  (is_empty() ? 1 << flags::IS_EMPTY : 0)
377  | (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
378  | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
379  );
380  write(os, flags_byte);
381  write(os, k_);
382  write(os, m_);
383  const uint8_t unused = 0;
384  write(os, unused);
385  if (is_empty()) return;
386  if (!is_single_item) {
387  write(os, n_);
388  write(os, min_k_);
389  write(os, num_levels_);
390  write(os, unused);
391  write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
392  sd.serialize(os, &*min_item_, 1);
393  sd.serialize(os, &*max_item_, 1);
394  }
395  sd.serialize(os, &items_[levels_[0]], get_num_retained());
396 }
397 
398 template<typename T, typename C, typename A>
399 template<typename SerDe>
400 auto kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
401  const bool is_single_item = n_ == 1;
402  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
403  vector_bytes bytes(size, 0, allocator_);
404  uint8_t* ptr = bytes.data() + header_size_bytes;
405  const uint8_t* end_ptr = ptr + size;
406  const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
407  ptr += copy_to_mem(preamble_ints, ptr);
408  const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
409  ptr += copy_to_mem(serial_version, ptr);
410  const uint8_t family(FAMILY);
411  ptr += copy_to_mem(family, ptr);
412  const uint8_t flags_byte(
413  (is_empty() ? 1 << flags::IS_EMPTY : 0)
414  | (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
415  | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
416  );
417  ptr += copy_to_mem(flags_byte, ptr);
418  ptr += copy_to_mem(k_, ptr);
419  ptr += copy_to_mem(m_, ptr);
420  ptr += sizeof(uint8_t); // unused
421  if (!is_empty()) {
422  if (!is_single_item) {
423  ptr += copy_to_mem(n_, ptr);
424  ptr += copy_to_mem(min_k_, ptr);
425  ptr += copy_to_mem(num_levels_, ptr);
426  ptr += sizeof(uint8_t); // unused
427  ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
428  ptr += sd.serialize(ptr, end_ptr - ptr, &*min_item_, 1);
429  ptr += sd.serialize(ptr, end_ptr - ptr, &*max_item_, 1);
430  }
431  const size_t bytes_remaining = end_ptr - ptr;
432  ptr += sd.serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
433  }
434  const size_t delta = ptr - bytes.data();
435  if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta)
436  + " != " + std::to_string(size));
437  return bytes;
438 }
439 
440 template<typename T, typename C, typename A>
441 template<typename SerDe>
442 kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd,
443  const C& comparator, const A& allocator) {
444  const auto preamble_ints = read<uint8_t>(is);
445  const auto serial_version = read<uint8_t>(is);
446  const auto family_id = read<uint8_t>(is);
447  const auto flags_byte = read<uint8_t>(is);
448  const auto k = read<uint16_t>(is);
449  const auto m = read<uint8_t>(is);
450  read<uint8_t>(is); // skip unused byte
451 
452  check_m(m);
453  check_preamble_ints(preamble_ints, flags_byte);
454  check_serial_version(serial_version);
455  check_family_id(family_id);
456 
457  if (!is.good()) throw std::runtime_error("error reading from std::istream");
458  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
459  if (is_empty) return kll_sketch(k, comparator, allocator);
460 
461  uint64_t n;
462  uint16_t min_k;
463  uint8_t num_levels;
464  const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM)); // used in serial version 2
465  if (is_single_item) {
466  n = 1;
467  min_k = k;
468  num_levels = 1;
469  } else {
470  n = read<uint64_t>(is);
471  min_k = read<uint16_t>(is);
472  num_levels = read<uint8_t>(is);
473  read<uint8_t>(is); // skip unused byte
474  }
475  vector_u32 levels(num_levels + 1, 0, allocator);
476  const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
477  if (is_single_item) {
478  levels[0] = capacity - 1;
479  } else {
480  // the last integer in levels_ is not serialized because it can be derived
481  read(is, levels.data(), sizeof(levels[0]) * num_levels);
482  }
483  levels[num_levels] = capacity;
484  optional<T> tmp; // space to deserialize min and max
485  optional<T> min_item;
486  optional<T> max_item;
487  if (!is_single_item) {
488  sd.deserialize(is, &*tmp, 1);
489  // serde call did not throw, repackage and cleanup
490  min_item.emplace(*tmp);
491  (*tmp).~T();
492  sd.deserialize(is, &*tmp, 1);
493  // serde call did not throw, repackage and cleanup
494  max_item.emplace(*tmp);
495  (*tmp).~T();
496  }
497  A alloc(allocator);
498  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
499  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
500  const auto num_items = levels[num_levels] - levels[0];
501  sd.deserialize(is, &items_buffer.get()[levels[0]], num_items);
502  // serde call did not throw, repackage with destrtuctors
503  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
504  const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
505  if (is_single_item) {
506  min_item.emplace(items.get()[levels[0]]);
507  max_item.emplace(items.get()[levels[0]]);
508  }
509  if (!is.good())
510  throw std::runtime_error("error reading from std::istream");
511  return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
512  std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
513 }
514 
515 template<typename T, typename C, typename A>
516 template<typename SerDe>
517 kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd,
518  const C& comparator, const A& allocator) {
519  ensure_minimum_memory(size, 8);
520  const char* ptr = static_cast<const char*>(bytes);
521  uint8_t preamble_ints;
522  ptr += copy_from_mem(ptr, preamble_ints);
523  uint8_t serial_version;
524  ptr += copy_from_mem(ptr, serial_version);
525  uint8_t family_id;
526  ptr += copy_from_mem(ptr, family_id);
527  uint8_t flags_byte;
528  ptr += copy_from_mem(ptr, flags_byte);
529  uint16_t k;
530  ptr += copy_from_mem(ptr, k);
531  uint8_t m;
532  ptr += copy_from_mem(ptr, m);
533  ptr += sizeof(uint8_t); // skip unused byte
534 
535  check_m(m);
536  check_preamble_ints(preamble_ints, flags_byte);
537  check_serial_version(serial_version);
538  check_family_id(family_id);
539  ensure_minimum_memory(size, preamble_ints * sizeof(uint32_t));
540 
541  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
542  if (is_empty) return kll_sketch(k, comparator, allocator);
543 
544  uint64_t n;
545  uint16_t min_k;
546  uint8_t num_levels;
547  const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM)); // used in serial version 2
548  const char* end_ptr = static_cast<const char*>(bytes) + size;
549  if (is_single_item) {
550  n = 1;
551  min_k = k;
552  num_levels = 1;
553  } else {
554  ptr += copy_from_mem(ptr, n);
555  ptr += copy_from_mem(ptr, min_k);
556  ptr += copy_from_mem(ptr, num_levels);
557  ptr += sizeof(uint8_t); // skip unused byte
558  }
559  vector_u32 levels(num_levels + 1, 0, allocator);
560  const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
561  if (is_single_item) {
562  levels[0] = capacity - 1;
563  } else {
564  // the last integer in levels_ is not serialized because it can be derived
565  ptr += copy_from_mem(ptr, levels.data(), sizeof(levels[0]) * num_levels);
566  }
567  levels[num_levels] = capacity;
568  optional<T> tmp; // space to deserialize min and max
569  optional<T> min_item;
570  optional<T> max_item;
571  if (!is_single_item) {
572  ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
573  // serde call did not throw, repackage and cleanup
574  min_item.emplace(*tmp);
575  (*tmp).~T();
576  ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
577  // serde call did not throw, repackage and cleanup
578  max_item.emplace(*tmp);
579  (*tmp).~T();
580  }
581  A alloc(allocator);
582  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
583  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
584  const auto num_items = levels[num_levels] - levels[0];
585  ptr += sd.deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
586  // serde call did not throw, repackage with destrtuctors
587  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
588  const size_t delta = ptr - static_cast<const char*>(bytes);
589  if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
590  const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
591  if (is_single_item) {
592  min_item.emplace(items.get()[levels[0]]);
593  max_item.emplace(items.get()[levels[0]]);
594  }
595  return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
596  std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
597 }
598 
599 /*
600  * Gets the normalized rank error given k and pmf.
601  * k - the configuration parameter
602  * pmf - if true, returns the "double-sided" normalized rank error for the get_PMF() function.
603  * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
604  * Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials
605  */
606 template<typename T, typename C, typename A>
607 double kll_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
608  return pmf
609  ? 2.446 / pow(k, 0.9433)
610  : 2.296 / pow(k, 0.9723);
611 }
612 
613 // for deserialization
614 template<typename T, typename C, typename A>
615 kll_sketch<T, C, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
616  std::unique_ptr<T, items_deleter> items, uint32_t items_size, optional<T>&& min_item,
617  optional<T>&& max_item, bool is_level_zero_sorted, const C& comparator):
618 comparator_(comparator),
619 allocator_(levels.get_allocator()),
620 k_(k),
621 m_(kll_constants::DEFAULT_M),
622 min_k_(min_k),
623 num_levels_(num_levels),
624 is_level_zero_sorted_(is_level_zero_sorted),
625 n_(n),
626 levels_(std::move(levels)),
627 items_(items.release()),
628 items_size_(items_size),
629 min_item_(std::move(min_item)),
630 max_item_(std::move(max_item)),
631 sorted_view_(nullptr)
632 {}
633 
634 // The following code is only valid in the special case of exactly reaching capacity while updating.
635 // It cannot be used while merging, while reducing k, or anything else.
636 template<typename T, typename C, typename A>
637 void kll_sketch<T, C, A>::compress_while_updating(void) {
638  const uint8_t level = find_level_to_compact();
639 
640  // It is important to add the new top level right here. Be aware that this operation
641  // grows the buffer and shifts the data and also the boundaries of the data and grows the
642  // levels array and increments num_levels_
643  if (level == (num_levels_ - 1)) {
644  add_empty_top_level_to_completely_full_sketch();
645  }
646 
647  const uint32_t raw_beg = levels_[level];
648  const uint32_t raw_lim = levels_[level + 1];
649  // +2 is OK because we already added a new top level if necessary
650  const uint32_t pop_above = levels_[level + 2] - raw_lim;
651  const uint32_t raw_pop = raw_lim - raw_beg;
652  const bool odd_pop = kll_helper::is_odd(raw_pop);
653  const uint32_t adj_beg = odd_pop ? raw_beg + 1 : raw_beg;
654  const uint32_t adj_pop = odd_pop ? raw_pop - 1 : raw_pop;
655  const uint32_t half_adj_pop = adj_pop / 2;
656  const uint32_t destroy_beg = levels_[0];
657 
658  // level zero might not be sorted, so we must sort it if we wish to compact it
659  // sort_level_zero() is not used here because of the adjustment for odd number of items
660  if ((level == 0) && !is_level_zero_sorted_) {
661  std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, comparator_);
662  }
663  if (pop_above == 0) {
664  kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
665  } else {
666  kll_helper::randomly_halve_down(items_, adj_beg, adj_pop);
667  kll_helper::merge_sorted_arrays<T, C>(items_, adj_beg, half_adj_pop, raw_lim, pop_above, adj_beg + half_adj_pop);
668  }
669  levels_[level + 1] -= half_adj_pop; // adjust boundaries of the level above
670  if (odd_pop) {
671  levels_[level] = levels_[level + 1] - 1; // the current level now contains one item
672  if (levels_[level] != raw_beg) items_[levels_[level]] = std::move(items_[raw_beg]); // namely this leftover guy
673  } else {
674  levels_[level] = levels_[level + 1]; // the current level is now empty
675  }
676 
677  // verify that we freed up half_adj_pop array slots just below the current level
678  if (levels_[level] != (raw_beg + half_adj_pop)) throw std::logic_error("compaction error");
679 
680  // finally, we need to shift up the data in the levels below
681  // so that the freed-up space can be used by level zero
682  if (level > 0) {
683  const uint32_t amount = raw_beg - levels_[0];
684  std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
685  for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
686  }
687  for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
688 }
689 
690 template<typename T, typename C, typename A>
691 uint8_t kll_sketch<T, C, A>::find_level_to_compact() const {
692  uint8_t level = 0;
693  while (true) {
694  if (level >= num_levels_) throw std::logic_error("capacity calculation error");
695  const uint32_t pop = levels_[level + 1] - levels_[level];
696  const uint32_t cap = kll_helper::level_capacity(k_, num_levels_, level, m_);
697  if (pop >= cap) {
698  return level;
699  }
700  level++;
701  }
702 }
703 
704 template<typename T, typename C, typename A>
705 void kll_sketch<T, C, A>::add_empty_top_level_to_completely_full_sketch() {
706  const uint32_t cur_total_cap = levels_[num_levels_];
707 
708  // make sure that we are following a certain growth scheme
709  if (levels_[0] != 0) throw std::logic_error("full sketch expected");
710  if (items_size_ != cur_total_cap) throw std::logic_error("current capacity mismatch");
711 
712  // note that merging MIGHT over-grow levels_, in which case we might not have to grow it here
713  const uint8_t new_levels_size = num_levels_ + 2;
714  if (levels_.size() < new_levels_size) {
715  levels_.resize(new_levels_size);
716  }
717 
718  const uint32_t delta_cap = kll_helper::level_capacity(k_, num_levels_ + 1, 0, m_);
719  const uint32_t new_total_cap = cur_total_cap + delta_cap;
720 
721  // move (and shift) the current data into the new buffer
722  T* new_buf = allocator_.allocate(new_total_cap);
723  kll_helper::move_construct<T>(items_, 0, cur_total_cap, new_buf, delta_cap, true);
724  allocator_.deallocate(items_, items_size_);
725  items_ = new_buf;
726  items_size_ = new_total_cap;
727 
728  // this loop includes the old "extra" index at the top
729  for (uint8_t i = 0; i <= num_levels_; i++) {
730  levels_[i] += delta_cap;
731  }
732 
733  if (levels_[num_levels_] != new_total_cap) throw std::logic_error("new capacity mismatch");
734 
735  num_levels_++;
736  levels_[num_levels_] = new_total_cap; // initialize the new "extra" index at the top
737 }
738 
739 template<typename T, typename C, typename A>
740 void kll_sketch<T, C, A>::sort_level_zero() {
741  if (!is_level_zero_sorted_) {
742  std::sort(items_ + levels_[0], items_ + levels_[1], comparator_);
743  is_level_zero_sorted_ = true;
744  }
745 }
746 
747 template<typename T, typename C, typename A>
748 void kll_sketch<T, C, A>::check_sorting() const {
749  // not checking level 0
750  for (uint8_t level = 1; level < num_levels_; ++level) {
751  const auto from = items_ + levels_[level];
752  const auto to = items_ + levels_[level + 1];
753  if (!std::is_sorted(from, to, comparator_)) {
754  throw std::logic_error("levels must be sorted");
755  }
756  }
757 }
758 
759 template<typename T, typename C, typename A>
761  const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
762  quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
763  for (uint8_t level = 0; level < num_levels_; ++level) {
764  const auto from = items_ + levels_[level];
765  const auto to = items_ + levels_[level + 1]; // exclusive
766  view.add(from, to, 1ULL << level);
767  }
768  view.convert_to_cummulative();
769  return view;
770 }
771 
772 template<typename T, typename C, typename A>
773 template<typename O>
774 void kll_sketch<T, C, A>::merge_higher_levels(O&& other, uint64_t final_n) {
775  const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
776  A alloc(allocator_);
777  auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
778  const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
779  const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
780  const size_t work_levels_size = ub + 2; // ub+1 does not work
781  vector_u32 worklevels(work_levels_size, 0, allocator_);
782  vector_u32 outlevels(work_levels_size, 0, allocator_);
783 
784  const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
785 
786  populate_work_arrays(std::forward<O>(other), workbuf.get(), worklevels.data(), provisional_num_levels);
787 
788  const kll_helper::compress_result result = kll_helper::general_compress<T, C>(k_, m_, provisional_num_levels, workbuf.get(),
789  worklevels.data(), outlevels.data(), is_level_zero_sorted_);
790 
791  // ub can sometimes be much bigger
792  if (result.final_num_levels > ub) throw std::logic_error("merge error");
793 
794  // now we need to transfer the results back into "this" sketch
795  if (result.final_capacity != items_size_) {
796  allocator_.deallocate(items_, items_size_);
797  items_size_ = result.final_capacity;
798  items_ = allocator_.allocate(items_size_);
799  }
800  const uint32_t free_space_at_bottom = result.final_capacity - result.final_num_items;
801  kll_helper::move_construct<T>(workbuf.get(), outlevels[0], outlevels[0] + result.final_num_items, items_, free_space_at_bottom, true);
802 
803  const size_t new_levels_size = result.final_num_levels + 1;
804  if (levels_.size() < new_levels_size) {
805  levels_.resize(new_levels_size);
806  }
807  const uint32_t offset = free_space_at_bottom - outlevels[0];
808  for (uint8_t lvl = 0; lvl < levels_.size(); lvl++) { // includes the "extra" index
809  levels_[lvl] = outlevels[lvl] + offset;
810  }
811  num_levels_ = result.final_num_levels;
812 }
813 
814 // this leaves items_ uninitialized (all objects moved out and destroyed)
815 template<typename T, typename C, typename A>
816 template<typename FwdSk>
817 void kll_sketch<T, C, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
818  worklevels[0] = 0;
819 
820  // the level zero data from "other" was already inserted into "this"
821  kll_helper::move_construct<T>(items_, levels_[0], levels_[1], workbuf, 0, true);
822  worklevels[1] = safe_level_size(0);
823 
824  for (uint8_t lvl = 1; lvl < provisional_num_levels; lvl++) {
825  const uint32_t self_pop = safe_level_size(lvl);
826  const uint32_t other_pop = other.safe_level_size(lvl);
827  worklevels[lvl + 1] = worklevels[lvl] + self_pop + other_pop;
828 
829  if ((self_pop > 0) && (other_pop == 0)) {
830  kll_helper::move_construct<T>(items_, levels_[lvl], levels_[lvl] + self_pop, workbuf, worklevels[lvl], true);
831  } else if ((self_pop == 0) && (other_pop > 0)) {
832  for (auto i = other.levels_[lvl], j = worklevels[lvl]; i < other.levels_[lvl] + other_pop; ++i, ++j) {
833  new (&workbuf[j]) T(conditional_forward<FwdSk>(other.items_[i]));
834  }
835  } else if ((self_pop > 0) && (other_pop > 0)) {
836  kll_helper::merge_sorted_arrays<T, C>(items_, levels_[lvl], self_pop, other.items_, other.levels_[lvl], other_pop, workbuf, worklevels[lvl]);
837  }
838  }
839 }
840 
841 template<typename T, typename C, typename A>
842 void kll_sketch<T, C, A>::assert_correct_total_weight() const {
843  const uint64_t total(kll_helper::sum_the_sample_weights(num_levels_, levels_.data()));
844  if (total != n_) {
845  throw std::logic_error("Total weight does not match N");
846  }
847 }
848 
849 template<typename T, typename C, typename A>
850 uint32_t kll_sketch<T, C, A>::safe_level_size(uint8_t level) const {
851  if (level >= num_levels_) return 0;
852  return levels_[level + 1] - levels_[level];
853 }
854 
855 template<typename T, typename C, typename A>
856 uint32_t kll_sketch<T, C, A>::get_num_retained_above_level_zero() const {
857  if (num_levels_ == 1) return 0;
858  return levels_[num_levels_] - levels_[1];
859 }
860 
861 template<typename T, typename C, typename A>
862 void kll_sketch<T, C, A>::check_m(uint8_t m) {
863  if (m != kll_constants::DEFAULT_M) {
864  throw std::invalid_argument("Possible corruption: M must be " + std::to_string(kll_constants::DEFAULT_M)
865  + ": " + std::to_string(m));
866  }
867 }
868 
869 template<typename T, typename C, typename A>
870 void kll_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
871  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
872  const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM));
873  if (is_empty || is_single_item) {
874  if (preamble_ints != PREAMBLE_INTS_SHORT) {
875  throw std::invalid_argument("Possible corruption: preamble ints must be "
876  + std::to_string(PREAMBLE_INTS_SHORT) + " for an empty or single item sketch: " + std::to_string(preamble_ints));
877  }
878  } else {
879  if (preamble_ints != PREAMBLE_INTS_FULL) {
880  throw std::invalid_argument("Possible corruption: preamble ints must be "
881  + std::to_string(PREAMBLE_INTS_FULL) + " for a sketch with more than one item: " + std::to_string(preamble_ints));
882  }
883  }
884 }
885 
886 template<typename T, typename C, typename A>
887 void kll_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
888  if (serial_version != SERIAL_VERSION_1 && serial_version != SERIAL_VERSION_2) {
889  throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
890  + std::to_string(SERIAL_VERSION_1) + " or " + std::to_string(SERIAL_VERSION_2)
891  + ", got " + std::to_string(serial_version));
892  }
893 }
894 
895 template<typename T, typename C, typename A>
896 void kll_sketch<T, C, A>::check_family_id(uint8_t family_id) {
897  if (family_id != FAMILY) {
898  throw std::invalid_argument("Possible corruption: family mismatch: expected "
899  + std::to_string(FAMILY) + ", got " + std::to_string(family_id));
900  }
901 }
902 
903 template <typename T, typename C, typename A>
904 string<A> kll_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
905  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
906  // The stream does not support passing an allocator instance, and alternatives are complicated.
907  std::ostringstream os;
908  os << "### KLL sketch summary:" << std::endl;
909  os << " K : " << k_ << std::endl;
910  os << " min K : " << min_k_ << std::endl;
911  os << " M : " << (unsigned int) m_ << std::endl;
912  os << " N : " << n_ << std::endl;
913  os << " Epsilon : " << std::setprecision(3) << get_normalized_rank_error(false) * 100 << "%" << std::endl;
914  os << " Epsilon PMF : " << get_normalized_rank_error(true) * 100 << "%" << std::endl;
915  os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
916  os << " Estimation mode: " << (is_estimation_mode() ? "true" : "false") << std::endl;
917  os << " Levels : " << (unsigned int) num_levels_ << std::endl;
918  os << " Sorted : " << (is_level_zero_sorted_ ? "true" : "false") << std::endl;
919  os << " Capacity items : " << items_size_ << std::endl;
920  os << " Retained items : " << get_num_retained() << std::endl;
921  if (!is_empty()) {
922  os << " Min item : " << *min_item_ << std::endl;
923  os << " Max item : " << *max_item_ << std::endl;
924  }
925  os << "### End sketch summary" << std::endl;
926 
927  if (print_levels) {
928  os << "### KLL sketch levels:" << std::endl;
929  os << " index: nominal capacity, actual size" << std::endl;
930  for (uint8_t i = 0; i < num_levels_; i++) {
931  os << " " << (unsigned int) i << ": " << kll_helper::level_capacity(k_, num_levels_, i, m_) << ", " << safe_level_size(i) << std::endl;
932  }
933  os << "### End sketch levels" << std::endl;
934  }
935 
936  if (print_items) {
937  os << "### KLL sketch data:" << std::endl;
938  uint8_t level = 0;
939  while (level < num_levels_) {
940  const uint32_t from_index = levels_[level];
941  const uint32_t to_index = levels_[level + 1]; // exclusive
942  if (from_index < to_index) {
943  os << " level " << (unsigned int) level << ":" << std::endl;
944  }
945  for (uint32_t i = from_index; i < to_index; i++) {
946  os << " " << items_[i] << std::endl;
947  }
948  level++;
949  }
950  os << "### End sketch data" << std::endl;
951  }
952  return string<A>(os.str().c_str(), allocator_);
953 }
954 
955 template <typename T, typename C, typename A>
957  return kll_sketch<T, C, A>::const_iterator(items_, levels_.data(), num_levels_);
958 }
959 
960 template <typename T, typename C, typename A>
962  return kll_sketch<T, C, A>::const_iterator(nullptr, levels_.data(), num_levels_);
963 }
964 
965 template<typename T, typename C, typename A>
966 class kll_sketch<T, C, A>::items_deleter {
967  public:
968  items_deleter(uint32_t start, uint32_t num, const A& allocator):
969  allocator_(allocator), start_(start), num_(num) {}
970  void operator() (T* ptr) {
971  if (ptr != nullptr) {
972  for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
973  allocator_.deallocate(ptr, num_);
974  }
975  }
976  private:
977  A allocator_;
978  uint32_t start_;
979  uint32_t num_;
980 };
981 
982 template<typename T, typename C, typename A>
983 void kll_sketch<T, C, A>::setup_sorted_view() const {
984  if (sorted_view_ == nullptr) {
985  using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
986  sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
987  }
988 }
989 
990 template<typename T, typename C, typename A>
991 void kll_sketch<T, C, A>::reset_sorted_view() {
992  if (sorted_view_ != nullptr) {
993  sorted_view_->~quantiles_sorted_view();
994  using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
995  AllocSortedView(allocator_).deallocate(sorted_view_, 1);
996  sorted_view_ = nullptr;
997  }
998 }
999 
1000 // kll_sketch::const_iterator implementation
1001 
1002 template<typename T, typename C, typename A>
1003 kll_sketch<T, C, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
1004 items(items), levels(levels), num_levels(num_levels), index(items == nullptr ? levels[num_levels] : levels[0]), level(items == nullptr ? num_levels : 0), weight(1)
1005 {}
1006 
1007 template<typename T, typename C, typename A>
1008 typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++() {
1009  ++index;
1010  if (index == levels[level + 1]) { // go to the next non-empty level
1011  do {
1012  ++level;
1013  weight *= 2;
1014  } while (level < num_levels && levels[level] == levels[level + 1]);
1015  }
1016  return *this;
1017 }
1018 
1019 template<typename T, typename C, typename A>
1020 typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++(int) {
1021  const_iterator tmp(*this);
1022  operator++();
1023  return tmp;
1024 }
1025 
1026 template<typename T, typename C, typename A>
1027 bool kll_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
1028  return index == other.index;
1029 }
1030 
1031 template<typename T, typename C, typename A>
1032 bool kll_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
1033  return !operator==(other);
1034 }
1035 
1036 template<typename T, typename C, typename A>
1037 auto kll_sketch<T, C, A>::const_iterator::operator*() const -> reference {
1038  return value_type(items[index], weight);
1039 }
1040 
1041 template<typename T, typename C, typename A>
1042 auto kll_sketch<T, C, A>::const_iterator::operator->() const -> pointer {
1043  return **this;
1044 }
1045 
1046 } /* namespace datasketches */
1047 
1048 #endif
Implementation of a very compact quantiles sketch with lazy compaction scheme and nearly optimal accu...
Definition: kll_sketch.hpp:166
C get_comparator() const
Returns an instance of the comparator for this sketch.
Definition: kll_sketch_impl.hpp:271
quantiles_sorted_view< T, C, A > get_sorted_view() const
Gets the sorted view of this sketch.
Definition: kll_sketch_impl.hpp:760
kll_sketch & operator=(const kll_sketch &other)
Copy assignment.
Definition: kll_sketch_impl.hpp:102
vector_double get_CDF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analo...
Definition: kll_sketch_impl.hpp:295
const_iterator begin() const
Iterator pointing to the first item in the sketch.
Definition: kll_sketch_impl.hpp:956
uint32_t get_num_retained() const
Returns the number of retained items (samples) in the sketch.
Definition: kll_sketch_impl.hpp:249
static kll_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const C &comparator=C(), const A &allocator=A())
This method deserializes a sketch from a given stream.
vector_double get_PMF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of sp...
Definition: kll_sketch_impl.hpp:288
void merge(FwdSk &&other)
Merges another sketch into this one.
Definition: kll_sketch_impl.hpp:209
T get_max_item() const
Returns the max item of the stream.
Definition: kll_sketch_impl.hpp:265
const_iterator end() const
Iterator pointing to the past-the-end item in the sketch.
Definition: kll_sketch_impl.hpp:961
void update(FwdT &&item)
Updates this sketch with the given data item.
Definition: kll_sketch_impl.hpp:180
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition: kll_sketch_impl.hpp:367
string< A > to_string(bool print_levels=false, bool print_items=false) const
Prints a summary of the sketch.
Definition: kll_sketch_impl.hpp:904
uint16_t get_k() const
Returns configured parameter k.
Definition: kll_sketch_impl.hpp:239
bool is_empty() const
Returns true if this sketch is empty.
Definition: kll_sketch_impl.hpp:234
double get_rank(const T &item, bool inclusive=true) const
Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
Definition: kll_sketch_impl.hpp:281
A get_allocator() const
Returns an instance of the allocator for this sketch.
Definition: kll_sketch_impl.hpp:276
T get_min_item() const
Returns the min item of the stream.
Definition: kll_sketch_impl.hpp:259
typename quantiles_sorted_view< T, C, A >::quantile_return_type quantile_return_type
Quantile return type.
Definition: kll_sketch.hpp:178
quantile_return_type get_quantile(double rank, bool inclusive=true) const
Returns an item from the sketch that is the best approximation to an item from the original stream wi...
Definition: kll_sketch_impl.hpp:302
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the sketch.
Definition: kll_sketch_impl.hpp:320
static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n)
Returns upper bound on the serialized size of a sketch given a parameter k and stream length.
Definition: kll_sketch_impl.hpp:348
double get_normalized_rank_error(bool pmf) const
Gets the approximate rank error of this sketch normalized as a fraction between zero and one.
Definition: kll_sketch_impl.hpp:313
bool is_estimation_mode() const
Returns true if this sketch is in estimation mode.
Definition: kll_sketch_impl.hpp:254
uint64_t get_n() const
Returns the length of the input stream.
Definition: kll_sketch_impl.hpp:244
const uint16_t MAX_K
max value of parameter K
Definition: kll_sketch.hpp:41
const uint16_t MIN_K
min value of parameter K
Definition: kll_sketch.hpp:39
DataSketches namespace.
Definition: binomial_bounds.hpp:38