datasketches-cpp
tuple_sketch_impl.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #include <sstream>
21 #include <stdexcept>
22 
23 #include "binomial_bounds.hpp"
24 #include "theta_helpers.hpp"
25 
26 namespace datasketches {
27 
28 template<typename S, typename A>
30  return get_theta64() < theta_constants::MAX_THETA && !is_empty();
31 }
32 
33 template<typename S, typename A>
35  return static_cast<double>(get_theta64()) /
36  static_cast<double>(theta_constants::MAX_THETA);
37 }
38 
39 template<typename S, typename A>
41  return get_num_retained() / get_theta();
42 }
43 
44 template<typename S, typename A>
45 double tuple_sketch<S, A>::get_lower_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const {
46  num_subset_entries = std::min(num_subset_entries, get_num_retained()) ;
47  if (!is_estimation_mode()) return num_subset_entries;
48  return binomial_bounds::get_lower_bound(num_subset_entries, get_theta(), num_std_devs);
49 }
50 
51 template<typename S, typename A>
52 double tuple_sketch<S, A>::get_lower_bound(uint8_t num_std_devs) const {
53  return get_lower_bound(num_std_devs, get_num_retained()) ;
54 }
55 
56 template<typename S, typename A>
57 double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const {
58  num_subset_entries = std::min(num_subset_entries, get_num_retained()) ;
59  if (!is_estimation_mode()) return num_subset_entries;
60  return binomial_bounds::get_upper_bound(num_subset_entries, get_theta(), num_std_devs);
61 }
62 
63 template<typename S, typename A>
64 double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
65  return get_upper_bound(num_std_devs, get_num_retained()) ;
66 }
67 
68 template<typename S, typename A>
69 string<A> tuple_sketch<S, A>::to_string(bool detail) const {
70  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
71  // The stream does not support passing an allocator instance, and alternatives are complicated.
72  std::ostringstream os;
73  os << "### Tuple sketch summary:" << std::endl;
74  os << " num retained entries : " << get_num_retained() << std::endl;
75  os << " seed hash : " << get_seed_hash() << std::endl;
76  os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
77  os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
78  os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
79  os << " theta (fraction) : " << get_theta() << std::endl;
80  os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
81  os << " estimate : " << this->get_estimate() << std::endl;
82  os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
83  os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
84  print_specifics(os);
85  os << "### End sketch summary" << std::endl;
86  if (detail) {
87  os << "### Retained entries" << std::endl;
88  for (const auto& it: *this) {
89  os << it.first << ": " << it.second << std::endl;
90  }
91  os << "### End retained entries" << std::endl;
92  }
93  return string<A>(os.str().c_str(), get_allocator());
94 }
95 
96 // update sketch
97 
98 template<typename S, typename U, typename P, typename A>
99 update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
100 policy_(policy),
101 map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
102 {}
103 
104 template<typename S, typename U, typename P, typename A>
106  return map_.allocator_;
107 }
108 
109 template<typename S, typename U, typename P, typename A>
111  return map_.is_empty_;
112 }
113 
114 template<typename S, typename U, typename P, typename A>
116  return map_.num_entries_ > 1 ? false : true;;
117 }
118 
119 template<typename S, typename U, typename P, typename A>
121  return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
122 }
123 
124 template<typename S, typename U, typename P, typename A>
126  return map_.num_entries_;
127 }
128 
129 template<typename S, typename U, typename P, typename A>
131  return compute_seed_hash(map_.seed_);
132 }
133 
134 template<typename S, typename U, typename P, typename A>
136  return map_.lg_nom_size_;
137 }
138 
139 template<typename S, typename U, typename P, typename A>
140 auto update_tuple_sketch<S, U, P, A>::get_rf() const -> resize_factor {
141  return map_.rf_;
142 }
143 
144 template<typename S, typename U, typename P, typename A>
145 template<typename UU>
146 void update_tuple_sketch<S, U, P, A>::update(uint64_t key, UU&& value) {
147  update(&key, sizeof(key), std::forward<UU>(value));
148 }
149 
150 template<typename S, typename U, typename P, typename A>
151 template<typename UU>
152 void update_tuple_sketch<S, U, P, A>::update(int64_t key, UU&& value) {
153  update(&key, sizeof(key), std::forward<UU>(value));
154 }
155 
156 template<typename S, typename U, typename P, typename A>
157 template<typename UU>
158 void update_tuple_sketch<S, U, P, A>::update(uint32_t key, UU&& value) {
159  update(static_cast<int32_t>(key), std::forward<UU>(value));
160 }
161 
162 template<typename S, typename U, typename P, typename A>
163 template<typename UU>
164 void update_tuple_sketch<S, U, P, A>::update(int32_t key, UU&& value) {
165  update(static_cast<int64_t>(key), std::forward<UU>(value));
166 }
167 
168 template<typename S, typename U, typename P, typename A>
169 template<typename UU>
170 void update_tuple_sketch<S, U, P, A>::update(uint16_t key, UU&& value) {
171  update(static_cast<int16_t>(key), std::forward<UU>(value));
172 }
173 
174 template<typename S, typename U, typename P, typename A>
175 template<typename UU>
176 void update_tuple_sketch<S, U, P, A>::update(int16_t key, UU&& value) {
177  update(static_cast<int64_t>(key), std::forward<UU>(value));
178 }
179 
180 template<typename S, typename U, typename P, typename A>
181 template<typename UU>
182 void update_tuple_sketch<S, U, P, A>::update(uint8_t key, UU&& value) {
183  update(static_cast<int8_t>(key), std::forward<UU>(value));
184 }
185 
186 template<typename S, typename U, typename P, typename A>
187 template<typename UU>
188 void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
189  update(static_cast<int64_t>(key), std::forward<UU>(value));
190 }
191 
192 template<typename S, typename U, typename P, typename A>
193 template<typename UU>
194 void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
195  if (key.empty()) return;
196  update(key.c_str(), key.length(), std::forward<UU>(value));
197 }
198 
199 template<typename S, typename U, typename P, typename A>
200 template<typename UU>
201 void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
202  update(canonical_double(key), std::forward<UU>(value));
203 }
204 
205 template<typename S, typename U, typename P, typename A>
206 template<typename UU>
207 void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
208  update(static_cast<double>(key), std::forward<UU>(value));
209 }
210 
211 template<typename S, typename U, typename P, typename A>
212 template<typename UU>
213 void update_tuple_sketch<S, U, P, A>::update(const void* key, size_t length, UU&& value) {
214  const uint64_t hash = map_.hash_and_screen(key, length);
215  if (hash == 0) return;
216  auto result = map_.find(hash);
217  if (!result.second) {
218  S summary = policy_.create();
219  policy_.update(summary, std::forward<UU>(value));
220  map_.insert(result.first, Entry(hash, std::move(summary)));
221  } else {
222  policy_.update((*result.first).second, std::forward<UU>(value));
223  }
224 }
225 
226 template<typename S, typename U, typename P, typename A>
228  map_.trim();
229 }
230 
231 template<typename S, typename U, typename P, typename A>
233  map_.reset();
234 }
235 
236 template<typename S, typename U, typename P, typename A>
238  return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
239 }
240 
241 template<typename S, typename U, typename P, typename A>
243  return iterator(nullptr, 0, 1 << map_.lg_cur_size_);
244 }
245 
246 template<typename S, typename U, typename P, typename A>
247 auto update_tuple_sketch<S, U, P, A>::begin() const -> const_iterator {
248  return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
249 }
250 
251 template<typename S, typename U, typename P, typename A>
252 auto update_tuple_sketch<S, U, P, A>::end() const -> const_iterator {
253  return const_iterator(nullptr, 0, 1 << map_.lg_cur_size_);
254 }
255 
256 template<typename S, typename U, typename P, typename A>
258  return compact_tuple_sketch<S, A>(*this, ordered);
259 }
260 
261 template<typename S, typename U, typename P, typename A>
262 void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
263  os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
264  os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
265  os << " resize factor : " << (1 << map_.rf_) << std::endl;
266 }
267 
268 // compact sketch
269 
270 template<typename S, typename A>
271 compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
272  std::vector<Entry, AllocEntry>&& entries):
273 is_empty_(is_empty),
274 is_ordered_(is_ordered || (entries.size() <= 1ULL)),
275 seed_hash_(seed_hash),
276 theta_(theta),
277 entries_(std::move(entries))
278 {}
279 
280 template<typename S, typename A>
282 is_empty_(other.is_empty()),
283 is_ordered_(other.is_ordered() || ordered),
284 seed_hash_(other.get_seed_hash()),
285 theta_(other.get_theta64()),
286 entries_(other.get_allocator())
287 {
288  entries_.reserve(other.get_num_retained());
289  std::copy(other.begin(), other.end(), std::back_inserter(entries_));
290  if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
291 }
292 
293 template<typename S, typename A>
295 is_empty_(other.is_empty()),
296 is_ordered_(other.is_ordered()),
297 seed_hash_(other.get_seed_hash()),
298 theta_(other.get_theta64()),
299 entries_(std::move(other.entries_))
300 {}
301 
302 template<typename S, typename A>
303 compact_tuple_sketch<S, A>::compact_tuple_sketch(const theta_sketch_alloc<AllocU64>& other, const S& summary, bool ordered):
304 is_empty_(other.is_empty()),
305 is_ordered_(other.is_ordered() || ordered),
306 seed_hash_(other.get_seed_hash()),
307 theta_(other.get_theta64()),
308 entries_(other.get_allocator())
309 {
310  entries_.reserve(other.get_num_retained());
311  for (uint64_t hash: other) {
312  entries_.push_back(Entry(hash, summary));
313  }
314  if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
315 }
316 
317 template<typename S, typename A>
319  return entries_.get_allocator();
320 }
321 
322 template<typename S, typename A>
324  return is_empty_;
325 }
326 
327 template<typename S, typename A>
329  return is_ordered_;
330 }
331 
332 template<typename S, typename A>
334  return theta_;
335 }
336 
337 template<typename S, typename A>
339  return static_cast<uint32_t>(entries_.size());
340 }
341 
342 template<typename S, typename A>
344  return seed_hash_;
345 }
346 
347 // implementation for fixed-size arithmetic types (integral and floating point)
348 template<typename S, typename A>
349 template<typename SD, typename SS, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type>
351  unused(sd);
352  return entries_.size() * sizeof(SS);
353 }
354 
355 // implementation for all other types (non-arithmetic)
356 template<typename S, typename A>
357 template<typename SD, typename SS, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type>
359  size_t size = 0;
360  for (const auto& it: entries_) {
361  size += sd.size_of_item(it.second);
362  }
363  return size;
364 }
365 
366 template<typename S, typename A>
367 template<typename SerDe>
368 void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
369  const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
370  write(os, preamble_longs);
371  const uint8_t serial_version = SERIAL_VERSION;
372  write(os, serial_version);
373  const uint8_t family = SKETCH_FAMILY;
374  write(os, family);
375  const uint8_t type = SKETCH_TYPE;
376  write(os, type);
377  const uint8_t unused8 = 0;
378  write(os, unused8);
379  const uint8_t flags_byte(
380  (1 << flags::IS_COMPACT) |
381  (1 << flags::IS_READ_ONLY) |
382  (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
383  (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
384  );
385  write(os, flags_byte);
386  const uint16_t seed_hash = get_seed_hash();
387  write(os, seed_hash);
388  if (preamble_longs > 1) {
389  const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
390  write(os, num_entries);
391  const uint32_t unused32 = 0;
392  write(os, unused32);
393  }
394  if (this->is_estimation_mode()) {
395  write(os, this->theta_);
396  }
397  for (const auto& it: entries_) {
398  write(os, it.first);
399  sd.serialize(os, &it.second, 1);
400  }
401 }
402 
403 template<typename S, typename A>
404 template<typename SerDe>
405 auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
406  const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
407  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
408  + sizeof(uint64_t) * entries_.size() + get_serialized_size_summaries_bytes(sd);
409  vector_bytes bytes(size, 0, entries_.get_allocator());
410  uint8_t* ptr = bytes.data() + header_size_bytes;
411  const uint8_t* end_ptr = ptr + size;
412 
413  ptr += copy_to_mem(preamble_longs, ptr);
414  const uint8_t serial_version = SERIAL_VERSION;
415  ptr += copy_to_mem(serial_version, ptr);
416  const uint8_t family = SKETCH_FAMILY;
417  ptr += copy_to_mem(family, ptr);
418  const uint8_t type = SKETCH_TYPE;
419  ptr += copy_to_mem(type, ptr);
420  ptr += sizeof(uint8_t); // unused
421  const uint8_t flags_byte(
422  (1 << flags::IS_COMPACT) |
423  (1 << flags::IS_READ_ONLY) |
424  (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
425  (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
426  );
427  ptr += copy_to_mem(flags_byte, ptr);
428  const uint16_t seed_hash = get_seed_hash();
429  ptr += copy_to_mem(seed_hash, ptr);
430  if (preamble_longs > 1) {
431  const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
432  ptr += copy_to_mem(num_entries, ptr);
433  ptr += sizeof(uint32_t); // unused
434  }
435  if (this->is_estimation_mode()) {
436  ptr += copy_to_mem(theta_, ptr);
437  }
438  for (const auto& it: entries_) {
439  ptr += copy_to_mem(it.first, ptr);
440  ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
441  }
442  return bytes;
443 }
444 
445 template<typename S, typename A>
446 template<typename SerDe>
447 compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
448  const auto preamble_longs = read<uint8_t>(is);
449  const auto serial_version = read<uint8_t>(is);
450  const auto family = read<uint8_t>(is);
451  const auto type = read<uint8_t>(is);
452  read<uint8_t>(is); // unused
453  const auto flags_byte = read<uint8_t>(is);
454  const auto seed_hash = read<uint16_t>(is);
455  if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
456  throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
457  + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
458  }
459  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
460  if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
461  throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
462  + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
463  }
464  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
465  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
466 
467  uint64_t theta = theta_constants::MAX_THETA;
468  uint32_t num_entries = 0;
469  if (!is_empty) {
470  if (preamble_longs == 1) {
471  num_entries = 1;
472  } else {
473  num_entries = read<uint32_t>(is);
474  read<uint32_t>(is); // unused
475  if (preamble_longs > 2) {
476  theta = read<uint64_t>(is);
477  }
478  }
479  }
480  A alloc(allocator);
481  std::vector<Entry, AllocEntry> entries(alloc);
482  if (!is_empty) {
483  entries.reserve(num_entries);
484  std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
485  for (size_t i = 0; i < num_entries; ++i) {
486  const auto key = read<uint64_t>(is);
487  sd.deserialize(is, summary.get(), 1);
488  entries.push_back(Entry(key, std::move(*summary)));
489  (*summary).~S();
490  }
491  }
492  if (!is.good()) throw std::runtime_error("error reading from std::istream");
493  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
494  return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
495 }
496 
497 template<typename S, typename A>
498 template<typename SerDe>
499 compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* bytes, size_t size, uint64_t seed, const SerDe& sd, const A& allocator) {
500  ensure_minimum_memory(size, 8);
501  const char* ptr = static_cast<const char*>(bytes);
502  const char* base = ptr;
503  uint8_t preamble_longs;
504  ptr += copy_from_mem(ptr, preamble_longs);
505  uint8_t serial_version;
506  ptr += copy_from_mem(ptr, serial_version);
507  uint8_t family;
508  ptr += copy_from_mem(ptr, family);
509  uint8_t type;
510  ptr += copy_from_mem(ptr, type);
511  ptr += sizeof(uint8_t); // unused
512  uint8_t flags_byte;
513  ptr += copy_from_mem(ptr, flags_byte);
514  uint16_t seed_hash;
515  ptr += copy_from_mem(ptr, seed_hash);
516  if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
517  throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
518  + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
519  }
520  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
521  if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
522  throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
523  + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
524  }
525  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
526  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
527 
528  uint64_t theta = theta_constants::MAX_THETA;
529  uint32_t num_entries = 0;
530 
531  if (!is_empty) {
532  if (preamble_longs == 1) {
533  num_entries = 1;
534  } else {
535  ensure_minimum_memory(size, 8); // read the first prelong before this method
536  ptr += copy_from_mem(ptr, num_entries);
537  ptr += sizeof(uint32_t); // unused
538  if (preamble_longs > 2) {
539  ensure_minimum_memory(size, (preamble_longs - 1) << 3);
540  ptr += copy_from_mem(ptr, theta);
541  }
542  }
543  }
544  const size_t keys_size_bytes = sizeof(uint64_t) * num_entries;
545  ensure_minimum_memory(size, ptr - base + keys_size_bytes);
546  A alloc(allocator);
547  std::vector<Entry, AllocEntry> entries(alloc);
548  if (!is_empty) {
549  entries.reserve(num_entries);
550  std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
551  for (size_t i = 0; i < num_entries; ++i) {
552  uint64_t key;
553  ptr += copy_from_mem(ptr, key);
554  ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
555  entries.push_back(Entry(key, std::move(*summary)));
556  (*summary).~S();
557  }
558  }
559  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
560  return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
561 }
562 
563 template<typename S, typename A>
565  return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
566 }
567 
568 template<typename S, typename A>
570  return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
571 }
572 
573 template<typename S, typename A>
574 auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
575  return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
576 }
577 
578 template<typename S, typename A>
579 auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
580  return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
581 }
582 
583 template<typename S, typename A>
584 void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
585 
586 // builder
587 
588 template<typename D, typename P, typename A>
589 tuple_base_builder<D, P, A>::tuple_base_builder(const P& policy, const A& allocator):
590 theta_base_builder<D, A>(allocator), policy_(policy) {}
591 
592 template<typename S, typename U, typename P, typename A>
593 update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy, const A& allocator):
594 tuple_base_builder<builder, P, A>(policy, allocator) {}
595 
596 template<typename S, typename U, typename P, typename A>
598  return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
599 }
600 
601 } /* namespace datasketches */
virtual uint32_t get_num_retained() const =0
Compact Tuple sketch.
Definition: tuple_sketch.hpp:407
virtual uint64_t get_theta64() const
Definition: tuple_sketch_impl.hpp:333
virtual uint32_t get_num_retained() const
Definition: tuple_sketch_impl.hpp:338
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition: tuple_sketch_impl.hpp:368
virtual bool is_empty() const
Definition: tuple_sketch_impl.hpp:323
virtual bool is_ordered() const
Definition: tuple_sketch_impl.hpp:328
virtual uint16_t get_seed_hash() const
Definition: tuple_sketch_impl.hpp:343
virtual iterator end()
Iterator pointing past the valid range.
Definition: tuple_sketch_impl.hpp:569
compact_tuple_sketch(const Base &other, bool ordered)
Copy constructor.
Definition: tuple_sketch_impl.hpp:281
virtual Allocator get_allocator() const
Definition: tuple_sketch_impl.hpp:318
static compact_tuple_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const SerDe &sd=SerDe(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
virtual iterator begin()
Iterator over entries in this sketch.
Definition: tuple_sketch_impl.hpp:564
size_t get_serialized_size_summaries_bytes(const SerDe &sd) const
Computes size needed to serialize summaries in the sketch.
Base class for the Theta Sketch, a generalization of the Kth Minimum Value (KMV) sketch.
Definition: theta_sketch.hpp:127
Tuple base builder.
Definition: tuple_sketch.hpp:587
Base class for Tuple sketch.
Definition: tuple_sketch.hpp:54
double get_upper_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const
Returns the approximate upper error bound given a number of standard deviations over an arbitrary num...
Definition: tuple_sketch_impl.hpp:57
double get_estimate() const
Definition: tuple_sketch_impl.hpp:40
virtual bool is_ordered() const =0
double get_lower_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const
Returns the approximate lower error bound given a number of standard deviations over an arbitrary num...
Definition: tuple_sketch_impl.hpp:45
string< Allocator > to_string(bool print_items=false) const
Provides a human-readable summary of this sketch as a string.
Definition: tuple_sketch_impl.hpp:69
virtual uint32_t get_num_retained() const =0
double get_theta() const
Definition: tuple_sketch_impl.hpp:34
virtual iterator end()=0
Iterator pointing past the valid range.
virtual iterator begin()=0
Iterator over entries in this sketch.
bool is_estimation_mode() const
Definition: tuple_sketch_impl.hpp:29
Update Tuple sketch builder.
Definition: tuple_sketch.hpp:597
builder(const P &policy=P(), const A &allocator=A())
Constructor Creates and instance of the builder with default parameters.
Definition: tuple_sketch_impl.hpp:593
update_tuple_sketch< S, U, P, A > build() const
This is to create an instance of the sketch with predefined parameters.
Definition: tuple_sketch_impl.hpp:597
Update Tuple sketch.
Definition: tuple_sketch.hpp:217
void trim()
Remove retained entries in excess of the nominal size k (if any)
Definition: tuple_sketch_impl.hpp:227
void reset()
Reset the sketch to the initial empty state.
Definition: tuple_sketch_impl.hpp:232
const uint64_t MAX_THETA
max theta - signed max for compatibility with Java
Definition: theta_constants.hpp:36
DataSketches namespace.
Definition: binomial_bounds.hpp:38