datasketches-cpp
Loading...
Searching...
No Matches
tuple_sketch_impl.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#include <sstream>
21#include <stdexcept>
22
23#include "binomial_bounds.hpp"
24#include "theta_helpers.hpp"
25
26namespace datasketches {
27
28template<typename S, typename A>
30 return get_theta64() < theta_constants::MAX_THETA && !is_empty();
31}
32
33template<typename S, typename A>
35 return static_cast<double>(get_theta64()) /
36 static_cast<double>(theta_constants::MAX_THETA);
37}
38
39template<typename S, typename A>
41 return get_num_retained() / get_theta();
42}
43
44template<typename S, typename A>
45double tuple_sketch<S, A>::get_lower_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const {
46 num_subset_entries = std::min(num_subset_entries, get_num_retained()) ;
47 if (!is_estimation_mode()) return num_subset_entries;
48 return binomial_bounds::get_lower_bound(num_subset_entries, get_theta(), num_std_devs);
49}
50
51template<typename S, typename A>
52double tuple_sketch<S, A>::get_lower_bound(uint8_t num_std_devs) const {
53 return get_lower_bound(num_std_devs, get_num_retained()) ;
54}
55
56template<typename S, typename A>
57double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const {
58 num_subset_entries = std::min(num_subset_entries, get_num_retained()) ;
59 if (!is_estimation_mode()) return num_subset_entries;
60 return binomial_bounds::get_upper_bound(num_subset_entries, get_theta(), num_std_devs);
61}
62
63template<typename S, typename A>
64double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
65 return get_upper_bound(num_std_devs, get_num_retained()) ;
66}
67
68template<typename S, typename A>
69string<A> tuple_sketch<S, A>::to_string(bool detail) const {
70 // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
71 // The stream does not support passing an allocator instance, and alternatives are complicated.
72 std::ostringstream os;
73 os << "### Tuple sketch summary:" << std::endl;
74 os << " num retained entries : " << get_num_retained() << std::endl;
75 os << " seed hash : " << get_seed_hash() << std::endl;
76 os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
77 os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
78 os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
79 os << " theta (fraction) : " << get_theta() << std::endl;
80 os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
81 os << " estimate : " << this->get_estimate() << std::endl;
82 os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
83 os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
84 print_specifics(os);
85 os << "### End sketch summary" << std::endl;
86 if (detail) {
87 os << "### Retained entries" << std::endl;
88 for (const auto& it: *this) {
89 os << it.first << ": " << it.second << std::endl;
90 }
91 os << "### End retained entries" << std::endl;
92 }
93 return string<A>(os.str().c_str(), get_allocator());
94}
95
96// update sketch
97
98template<typename S, typename U, typename P, typename A>
99update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
100policy_(policy),
101map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
102{}
103
104template<typename S, typename U, typename P, typename A>
106 return map_.allocator_;
107}
109template<typename S, typename U, typename P, typename A>
111 return map_.is_empty_;
112}
113
114template<typename S, typename U, typename P, typename A>
116 return map_.num_entries_ > 1 ? false : true;;
117}
119template<typename S, typename U, typename P, typename A>
121 return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
122}
124template<typename S, typename U, typename P, typename A>
126 return map_.num_entries_;
127}
129template<typename S, typename U, typename P, typename A>
131 return compute_seed_hash(map_.seed_);
132}
133
134template<typename S, typename U, typename P, typename A>
136 return map_.lg_nom_size_;
137}
138
139template<typename S, typename U, typename P, typename A>
140auto update_tuple_sketch<S, U, P, A>::get_rf() const -> resize_factor {
141 return map_.rf_;
142}
143
144template<typename S, typename U, typename P, typename A>
145template<typename UU>
146void update_tuple_sketch<S, U, P, A>::update(uint64_t key, UU&& value) {
147 update(&key, sizeof(key), std::forward<UU>(value));
148}
149
150template<typename S, typename U, typename P, typename A>
151template<typename UU>
152void update_tuple_sketch<S, U, P, A>::update(int64_t key, UU&& value) {
153 update(&key, sizeof(key), std::forward<UU>(value));
154}
156template<typename S, typename U, typename P, typename A>
157template<typename UU>
158void update_tuple_sketch<S, U, P, A>::update(uint32_t key, UU&& value) {
159 update(static_cast<int32_t>(key), std::forward<UU>(value));
160}
161
162template<typename S, typename U, typename P, typename A>
163template<typename UU>
164void update_tuple_sketch<S, U, P, A>::update(int32_t key, UU&& value) {
165 update(static_cast<int64_t>(key), std::forward<UU>(value));
166}
167
168template<typename S, typename U, typename P, typename A>
169template<typename UU>
170void update_tuple_sketch<S, U, P, A>::update(uint16_t key, UU&& value) {
171 update(static_cast<int16_t>(key), std::forward<UU>(value));
172}
173
174template<typename S, typename U, typename P, typename A>
175template<typename UU>
176void update_tuple_sketch<S, U, P, A>::update(int16_t key, UU&& value) {
177 update(static_cast<int64_t>(key), std::forward<UU>(value));
178}
179
180template<typename S, typename U, typename P, typename A>
181template<typename UU>
182void update_tuple_sketch<S, U, P, A>::update(uint8_t key, UU&& value) {
183 update(static_cast<int8_t>(key), std::forward<UU>(value));
184}
185
186template<typename S, typename U, typename P, typename A>
187template<typename UU>
188void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
189 update(static_cast<int64_t>(key), std::forward<UU>(value));
190}
191
192template<typename S, typename U, typename P, typename A>
193template<typename UU>
194void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
195 if (key.empty()) return;
196 update(key.c_str(), key.length(), std::forward<UU>(value));
197}
198
199template<typename S, typename U, typename P, typename A>
200template<typename UU>
201void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
202 update(canonical_double(key), std::forward<UU>(value));
203}
204
205template<typename S, typename U, typename P, typename A>
206template<typename UU>
207void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
208 update(static_cast<double>(key), std::forward<UU>(value));
209}
210
211template<typename S, typename U, typename P, typename A>
212template<typename UU>
213void update_tuple_sketch<S, U, P, A>::update(const void* key, size_t length, UU&& value) {
214 const uint64_t hash = map_.hash_and_screen(key, length);
215 if (hash == 0) return;
216 auto result = map_.find(hash);
217 if (!result.second) {
218 S summary = policy_.create();
219 policy_.update(summary, std::forward<UU>(value));
220 map_.insert(result.first, Entry(hash, std::move(summary)));
221 } else {
222 policy_.update((*result.first).second, std::forward<UU>(value));
223 }
224}
225
226template<typename S, typename U, typename P, typename A>
230
231template<typename S, typename U, typename P, typename A>
235
236template<typename S, typename U, typename P, typename A>
238 return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
241template<typename S, typename U, typename P, typename A>
243 return iterator(nullptr, 0, 1 << map_.lg_cur_size_);
244}
245
246template<typename S, typename U, typename P, typename A>
247auto update_tuple_sketch<S, U, P, A>::begin() const -> const_iterator {
248 return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
249}
250
251template<typename S, typename U, typename P, typename A>
252auto update_tuple_sketch<S, U, P, A>::end() const -> const_iterator {
253 return const_iterator(nullptr, 0, 1 << map_.lg_cur_size_);
254}
255
256template<typename S, typename U, typename P, typename A>
260
261template<typename S, typename U, typename P, typename A>
262template<typename Predicate>
264 return compact_tuple_sketch<S, A>::filter(*this, predicate);
265}
266
267template<typename S, typename U, typename P, typename A>
268void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
269 os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
270 os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
271 os << " resize factor : " << (1 << map_.rf_) << std::endl;
272}
273
274// compact sketch
275
276template<typename S, typename A>
277compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
278 std::vector<Entry, AllocEntry>&& entries):
279is_empty_(is_empty),
280is_ordered_(is_ordered || (entries.size() <= 1ULL)),
281seed_hash_(seed_hash),
282theta_(theta),
283entries_(std::move(entries))
284{}
285
286template<typename S, typename A>
288is_empty_(other.is_empty()),
289is_ordered_(other.is_ordered() || ordered),
290seed_hash_(other.get_seed_hash()),
291theta_(other.get_theta64()),
292entries_(other.get_allocator())
293{
294 entries_.reserve(other.get_num_retained());
295 std::copy(other.begin(), other.end(), std::back_inserter(entries_));
296 if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
297}
298
299template<typename S, typename A>
301is_empty_(other.is_empty()),
302is_ordered_(other.is_ordered()),
303seed_hash_(other.get_seed_hash()),
304theta_(other.get_theta64()),
305entries_(std::move(other.entries_))
306{}
307
308template<typename S, typename A>
309compact_tuple_sketch<S, A>::compact_tuple_sketch(const theta_sketch_alloc<AllocU64>& other, const S& summary, bool ordered):
310is_empty_(other.is_empty()),
311is_ordered_(other.is_ordered() || ordered),
312seed_hash_(other.get_seed_hash()),
313theta_(other.get_theta64()),
314entries_(other.get_allocator())
315{
316 entries_.reserve(other.get_num_retained());
317 for (uint64_t hash: other) {
318 entries_.push_back(Entry(hash, summary));
319 }
320 if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
321}
322
323template<typename S, typename A>
325 return entries_.get_allocator();
326}
327
328template<typename S, typename A>
330 return is_empty_;
331}
332
333template<typename S, typename A>
335 return is_ordered_;
336}
337
338template<typename S, typename A>
340 return theta_;
341}
342
343template<typename S, typename A>
345 return static_cast<uint32_t>(entries_.size());
346}
347
348template<typename S, typename A>
350 return seed_hash_;
351}
352
353template<typename S, typename A>
354template<typename Predicate>
355compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::filter(const Predicate& predicate) const {
356 return filter(*this, predicate);
357}
358
359template<typename S, typename A>
360template<typename Sketch, typename Predicate>
361compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::filter(const Sketch& sketch, const Predicate& predicate) {
362 std::vector<Entry, AllocEntry> entries(sketch.get_allocator());
363 entries.reserve(sketch.get_num_retained());
364 std::copy_if(
365 sketch.begin(),
366 sketch.end(),
367 std::back_inserter(entries),
368 [&predicate](const Entry& e) {return predicate(e.second);}
369 );
370 entries.shrink_to_fit();
372 !sketch.is_estimation_mode() && entries.empty(),
373 sketch.is_ordered(),
374 sketch.get_seed_hash(),
375 sketch.get_theta64(),
376 std::move(entries)
377 );
378}
379
380// implementation for fixed-size arithmetic types (integral and floating point)
381template<typename S, typename A>
382template<typename SD, typename SS, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type>
384 unused(sd);
385 return entries_.size() * sizeof(SS);
386}
387
388// implementation for all other types (non-arithmetic)
389template<typename S, typename A>
390template<typename SD, typename SS, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type>
392 size_t size = 0;
393 for (const auto& it: entries_) {
394 size += sd.size_of_item(it.second);
396 return size;
397}
398
399template<typename S, typename A>
400template<typename SerDe>
401void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
402 const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
403 write(os, preamble_longs);
404 const uint8_t serial_version = SERIAL_VERSION;
405 write(os, serial_version);
406 const uint8_t family = SKETCH_FAMILY;
407 write(os, family);
408 const uint8_t type = SKETCH_TYPE;
409 write(os, type);
410 const uint8_t unused8 = 0;
411 write(os, unused8);
412 const uint8_t flags_byte(
413 (1 << flags::IS_COMPACT) |
414 (1 << flags::IS_READ_ONLY) |
415 (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
416 (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
417 );
418 write(os, flags_byte);
419 const uint16_t seed_hash = get_seed_hash();
420 write(os, seed_hash);
421 if (preamble_longs > 1) {
422 const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
423 write(os, num_entries);
424 const uint32_t unused32 = 0;
425 write(os, unused32);
426 }
427 if (this->is_estimation_mode()) {
428 write(os, this->theta_);
429 }
430 for (const auto& it: entries_) {
431 write(os, it.first);
432 sd.serialize(os, &it.second, 1);
433 }
434}
435
436template<typename S, typename A>
437template<typename SerDe>
438auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
439 const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
440 const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
441 + sizeof(uint64_t) * entries_.size() + get_serialized_size_summaries_bytes(sd);
442 vector_bytes bytes(size, 0, entries_.get_allocator());
443 uint8_t* ptr = bytes.data() + header_size_bytes;
444 const uint8_t* end_ptr = ptr + size;
445
446 ptr += copy_to_mem(preamble_longs, ptr);
447 const uint8_t serial_version = SERIAL_VERSION;
448 ptr += copy_to_mem(serial_version, ptr);
449 const uint8_t family = SKETCH_FAMILY;
450 ptr += copy_to_mem(family, ptr);
451 const uint8_t type = SKETCH_TYPE;
452 ptr += copy_to_mem(type, ptr);
453 ptr += sizeof(uint8_t); // unused
454 const uint8_t flags_byte(
455 (1 << flags::IS_COMPACT) |
456 (1 << flags::IS_READ_ONLY) |
457 (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
458 (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
459 );
460 ptr += copy_to_mem(flags_byte, ptr);
461 const uint16_t seed_hash = get_seed_hash();
462 ptr += copy_to_mem(seed_hash, ptr);
463 if (preamble_longs > 1) {
464 const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
465 ptr += copy_to_mem(num_entries, ptr);
466 ptr += sizeof(uint32_t); // unused
467 }
468 if (this->is_estimation_mode()) {
469 ptr += copy_to_mem(theta_, ptr);
470 }
471 for (const auto& it: entries_) {
472 ptr += copy_to_mem(it.first, ptr);
473 ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
474 }
475 return bytes;
476}
477
478template<typename S, typename A>
479template<typename SerDe>
480compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
481 const auto preamble_longs = read<uint8_t>(is);
482 const auto serial_version = read<uint8_t>(is);
483 const auto family = read<uint8_t>(is);
484 const auto type = read<uint8_t>(is);
485 read<uint8_t>(is); // unused
486 const auto flags_byte = read<uint8_t>(is);
487 const auto seed_hash = read<uint16_t>(is);
488 if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
489 throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
490 + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
491 }
492 checker<true>::check_sketch_family(family, SKETCH_FAMILY);
493 if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
494 throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
495 + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
496 }
497 const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
498 if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
499
500 uint64_t theta = theta_constants::MAX_THETA;
501 uint32_t num_entries = 0;
502 if (!is_empty) {
503 if (preamble_longs == 1) {
504 num_entries = 1;
505 } else {
506 num_entries = read<uint32_t>(is);
507 read<uint32_t>(is); // unused
508 if (preamble_longs > 2) {
509 theta = read<uint64_t>(is);
510 }
511 }
512 }
513 A alloc(allocator);
514 std::vector<Entry, AllocEntry> entries(alloc);
515 if (!is_empty) {
516 entries.reserve(num_entries);
517 std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
518 for (size_t i = 0; i < num_entries; ++i) {
519 const auto key = read<uint64_t>(is);
520 sd.deserialize(is, summary.get(), 1);
521 entries.push_back(Entry(key, std::move(*summary)));
522 (*summary).~S();
523 }
524 }
525 if (!is.good()) throw std::runtime_error("error reading from std::istream");
526 const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
527 return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
528}
529
530template<typename S, typename A>
531template<typename SerDe>
532compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* bytes, size_t size, uint64_t seed, const SerDe& sd, const A& allocator) {
533 ensure_minimum_memory(size, 8);
534 const char* ptr = static_cast<const char*>(bytes);
535 const char* base = ptr;
536 uint8_t preamble_longs;
537 ptr += copy_from_mem(ptr, preamble_longs);
538 uint8_t serial_version;
539 ptr += copy_from_mem(ptr, serial_version);
540 uint8_t family;
541 ptr += copy_from_mem(ptr, family);
542 uint8_t type;
543 ptr += copy_from_mem(ptr, type);
544 ptr += sizeof(uint8_t); // unused
545 uint8_t flags_byte;
546 ptr += copy_from_mem(ptr, flags_byte);
547 uint16_t seed_hash;
548 ptr += copy_from_mem(ptr, seed_hash);
549 if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
550 throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
551 + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
552 }
553 checker<true>::check_sketch_family(family, SKETCH_FAMILY);
554 if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
555 throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
556 + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
557 }
558 const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
559 if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
560
561 uint64_t theta = theta_constants::MAX_THETA;
562 uint32_t num_entries = 0;
563
564 if (!is_empty) {
565 if (preamble_longs == 1) {
566 num_entries = 1;
567 } else {
568 ensure_minimum_memory(size, 8); // read the first prelong before this method
569 ptr += copy_from_mem(ptr, num_entries);
570 ptr += sizeof(uint32_t); // unused
571 if (preamble_longs > 2) {
572 ensure_minimum_memory(size, (preamble_longs - 1) << 3);
573 ptr += copy_from_mem(ptr, theta);
574 }
575 }
576 }
577 const size_t keys_size_bytes = sizeof(uint64_t) * num_entries;
578 ensure_minimum_memory(size, ptr - base + keys_size_bytes);
579 A alloc(allocator);
580 std::vector<Entry, AllocEntry> entries(alloc);
581 if (!is_empty) {
582 entries.reserve(num_entries);
583 std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
584 for (size_t i = 0; i < num_entries; ++i) {
585 uint64_t key;
586 ptr += copy_from_mem(ptr, key);
587 ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
588 entries.push_back(Entry(key, std::move(*summary)));
589 (*summary).~S();
590 }
591 }
592 const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
593 return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
594}
595
596template<typename S, typename A>
598 return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
599}
600
601template<typename S, typename A>
603 return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
604}
605
606template<typename S, typename A>
607auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
608 return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
609}
610
611template<typename S, typename A>
612auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
613 return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
614}
615
616template<typename S, typename A>
617void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
618
619// builder
620
621template<typename D, typename P, typename A>
622tuple_base_builder<D, P, A>::tuple_base_builder(const P& policy, const A& allocator):
623theta_base_builder<D, A>(allocator), policy_(policy) {}
624
625template<typename S, typename U, typename P, typename A>
626update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy, const A& allocator):
627tuple_base_builder<builder, P, A>(policy, allocator) {}
628
629template<typename S, typename U, typename P, typename A>
631 return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
633
634} /* namespace datasketches */
virtual bool is_ordered() const =0
virtual uint32_t get_num_retained() const =0
Compact Tuple sketch.
Definition tuple_sketch.hpp:416
virtual uint64_t get_theta64() const
Definition tuple_sketch_impl.hpp:339
virtual uint32_t get_num_retained() const
Definition tuple_sketch_impl.hpp:344
compact_tuple_sketch filter(const Predicate &predicate) const
Produces a Compact Tuple sketch from this sketch by applying a given predicate to each entry.
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition tuple_sketch_impl.hpp:401
virtual bool is_empty() const
Definition tuple_sketch_impl.hpp:329
virtual bool is_ordered() const
Definition tuple_sketch_impl.hpp:334
virtual uint16_t get_seed_hash() const
Definition tuple_sketch_impl.hpp:349
virtual iterator end()
Iterator pointing past the valid range.
Definition tuple_sketch_impl.hpp:602
compact_tuple_sketch(const Base &other, bool ordered)
Copy constructor.
Definition tuple_sketch_impl.hpp:287
virtual Allocator get_allocator() const
Definition tuple_sketch_impl.hpp:324
static compact_tuple_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const SerDe &sd=SerDe(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
virtual iterator begin()
Iterator over entries in this sketch.
Definition tuple_sketch_impl.hpp:597
size_t get_serialized_size_summaries_bytes(const SerDe &sd) const
Computes size needed to serialize summaries in the sketch.
Base class for the Theta Sketch, a generalization of the Kth Minimum Value (KMV) sketch.
Definition theta_sketch.hpp:127
Tuple base builder.
Definition tuple_sketch.hpp:614
Base class for Tuple sketch.
Definition tuple_sketch.hpp:54
double get_upper_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const
Returns the approximate upper error bound given a number of standard deviations over an arbitrary num...
Definition tuple_sketch_impl.hpp:57
double get_estimate() const
Definition tuple_sketch_impl.hpp:40
virtual bool is_ordered() const =0
double get_lower_bound(uint8_t num_std_devs, uint32_t num_subset_entries) const
Returns the approximate lower error bound given a number of standard deviations over an arbitrary num...
Definition tuple_sketch_impl.hpp:45
string< Allocator > to_string(bool print_items=false) const
Provides a human-readable summary of this sketch as a string.
Definition tuple_sketch_impl.hpp:69
virtual uint32_t get_num_retained() const =0
double get_theta() const
Definition tuple_sketch_impl.hpp:34
virtual iterator end()=0
Iterator pointing past the valid range.
virtual iterator begin()=0
Iterator over entries in this sketch.
bool is_estimation_mode() const
Definition tuple_sketch_impl.hpp:29
Update Tuple sketch builder.
Definition tuple_sketch.hpp:624
builder(const P &policy=P(), const A &allocator=A())
Constructor Creates and instance of the builder with default parameters.
Definition tuple_sketch_impl.hpp:626
update_tuple_sketch< S, U, P, A > build() const
This is to create an instance of the sketch with predefined parameters.
Definition tuple_sketch_impl.hpp:630
Update Tuple sketch.
Definition tuple_sketch.hpp:217
void trim()
Remove retained entries in excess of the nominal size k (if any)
Definition tuple_sketch_impl.hpp:227
void reset()
Reset the sketch to the initial empty state.
Definition tuple_sketch_impl.hpp:232
const uint64_t MAX_THETA
max theta - signed max for compatibility with Java
Definition theta_constants.hpp:36
DataSketches namespace.
Definition binomial_bounds.hpp:38