datasketches-cpp
Loading...
Searching...
No Matches
var_opt_union_impl.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef _VAR_OPT_UNION_IMPL_HPP_
21#define _VAR_OPT_UNION_IMPL_HPP_
22
23#include "var_opt_union.hpp"
24
25#include <cmath>
26#include <sstream>
27#include <stdexcept>
28
29namespace datasketches {
30
31template<typename T, typename A>
32var_opt_union<T, A>::var_opt_union(uint32_t max_k, const A& allocator) :
33 n_(0),
34 outer_tau_numer_(0.0),
35 outer_tau_denom_(0),
36 max_k_(max_k),
37 allocator_(allocator),
38 gadget_(max_k, var_opt_sketch<T, A>::DEFAULT_RESIZE_FACTOR, true, allocator)
39{}
40
41template<typename T, typename A>
42var_opt_union<T, A>::var_opt_union(const var_opt_union& other) :
43 n_(other.n_),
44 outer_tau_numer_(other.outer_tau_numer_),
45 outer_tau_denom_(other.outer_tau_denom_),
46 max_k_(other.max_k_),
47 allocator_(other.allocator_),
48 gadget_(other.gadget_)
49{}
50
51template<typename T, typename A>
52var_opt_union<T, A>::var_opt_union(var_opt_union&& other) noexcept :
53 n_(other.n_),
54 outer_tau_numer_(other.outer_tau_numer_),
55 outer_tau_denom_(other.outer_tau_denom_),
56 max_k_(other.max_k_),
57 allocator_(other.allocator_),
58 gadget_(std::move(other.gadget_))
59{}
60
61template<typename T, typename A>
62var_opt_union<T, A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
63 uint32_t max_k, var_opt_sketch<T, A>&& gadget, const A& allocator) :
64 n_(n),
65 outer_tau_numer_(outer_tau_numer),
66 outer_tau_denom_(outer_tau_denom),
67 max_k_(max_k),
68 allocator_(allocator),
69 gadget_(gadget)
70{}
71
72template<typename T, typename A>
73var_opt_union<T, A>::~var_opt_union() {}
74
75template<typename T, typename A>
76var_opt_union<T, A>& var_opt_union<T, A>::operator=(const var_opt_union& other) {
77 var_opt_union union_copy(other);
78 std::swap(n_, union_copy.n_);
79 std::swap(outer_tau_numer_, union_copy.outer_tau_numer_);
80 std::swap(outer_tau_denom_, union_copy.outer_tau_denom_);
81 std::swap(max_k_, union_copy.max_k_);
82 std::swap(allocator_, other.allocator_);
83 std::swap(gadget_, union_copy.gadget_);
84 return *this;
85}
86
87template<typename T, typename A>
88var_opt_union<T, A>& var_opt_union<T, A>::operator=(var_opt_union&& other) {
89 std::swap(n_, other.n_);
90 std::swap(outer_tau_numer_, other.outer_tau_numer_);
91 std::swap(outer_tau_denom_, other.outer_tau_denom_);
92 std::swap(max_k_, other.max_k_);
93 std::swap(allocator_, other.allocator_);
94 std::swap(gadget_, other.gadget_);
95 return *this;
96}
97
98/*
99 * An empty union requires 8 bytes.
100 *
101 * <pre>
102 * Long || Start Byte Adr:
103 * Adr:
104 * || 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
105 * 0 || Preamble_Longs | SerVer | FamID | Flags |---------Max Res. Size (K)---------|
106 * </pre>
107 *
108 * A non-empty sketch requires 24 bytes of preamble for an under-full sample; once there are
109 * at least k items the sketch uses 32 bytes of preamble.
110 *
111 * The count of items seen is limited to 48 bits (~256 trillion) even though there are adjacent
112 * unused preamble bits. The acceptance probability for an item is a double in the range [0,1),
113 * limiting us to 53 bits of randomness due to details of the IEEE floating point format. To
114 * ensure meaningful probabilities as the items seen count approaches capacity, we intentionally
115 * use slightly fewer bits.
116 *
117 * Following the header are weights for the heavy items, then marks in the event this is a gadget.
118 * The serialized items come last.
119 *
120 * <pre>
121 * Long || Start Byte Adr:
122 * Adr:
123 * || 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
124 * 0 || Preamble_Longs | SerVer | FamID | Flags |---------Max Res. Size (K)---------|
125 *
126 * || 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
127 * 1 ||---------------------------Items Seen Count (N)--------------------------------|
128 *
129 * || 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
130 * 2 ||------------------------Outer Tau Numerator (double)---------------------------|
131 *
132 * || 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
133 * 3 ||----------------------Outer Tau Denominator (uint64_t)-------------------------|
134 * </pre>
135 */
136
137template<typename T, typename A>
138template<typename SerDe>
139var_opt_union<T, A> var_opt_union<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
140 const auto preamble_longs = read<uint8_t>(is);
141 const auto serial_version = read<uint8_t>(is);
142 const auto family_id = read<uint8_t>(is);
143 const auto flags = read<uint8_t>(is);
144 const auto max_k = read<uint32_t>(is);
145
146 check_preamble_longs(preamble_longs, flags);
147 check_family_and_serialization_version(family_id, serial_version);
148
149 if (max_k == 0 || max_k > var_opt_constants::MAX_K) {
150 throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
151 }
152
153 bool is_empty = flags & EMPTY_FLAG_MASK;
154
155 if (is_empty) {
156 if (!is.good())
157 throw std::runtime_error("error reading from std::istream");
158 else
159 return var_opt_union(max_k);
160 }
161
162 const auto items_seen = read<uint64_t>(is);
163 const auto outer_tau_numer = read<double>(is);
164 const auto outer_tau_denom = read<uint64_t>(is);
165
166 var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(is, sd, allocator);
167
168 if (!is.good())
169 throw std::runtime_error("error reading from std::istream");
170
171 return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget), allocator);
172}
173
174template<typename T, typename A>
175template<typename SerDe>
176var_opt_union<T, A> var_opt_union<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
177 ensure_minimum_memory(size, 8);
178 const char* ptr = static_cast<const char*>(bytes);
179 uint8_t preamble_longs;
180 ptr += copy_from_mem(ptr, preamble_longs);
181 uint8_t serial_version;
182 ptr += copy_from_mem(ptr, serial_version);
183 uint8_t family_id;
184 ptr += copy_from_mem(ptr, family_id);
185 uint8_t flags;
186 ptr += copy_from_mem(ptr, flags);
187 uint32_t max_k;
188 ptr += copy_from_mem(ptr, max_k);
189
190 check_preamble_longs(preamble_longs, flags);
191 check_family_and_serialization_version(family_id, serial_version);
192
193 if (max_k == 0 || max_k > var_opt_constants::MAX_K) {
194 throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
195 }
196
197 bool is_empty = flags & EMPTY_FLAG_MASK;
198
199 if (is_empty) {
200 return var_opt_union(max_k);
201 }
202
203 uint64_t items_seen;
204 ptr += copy_from_mem(ptr, items_seen);
205 double outer_tau_numer;
206 ptr += copy_from_mem(ptr, outer_tau_numer);
207 uint64_t outer_tau_denom;
208 ptr += copy_from_mem(ptr, outer_tau_denom);
209
210 const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
211 var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(ptr, gadget_size, sd, allocator);
212
213 return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget), allocator);
214}
215
216template<typename T, typename A>
217template<typename SerDe>
219 if (n_ == 0) {
220 return PREAMBLE_LONGS_EMPTY << 3;
221 } else {
222 return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes(sd);
223 }
224}
225
226template<typename T, typename A>
227template<typename SerDe>
228void var_opt_union<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
229 bool empty = (n_ == 0);
230
231 const uint8_t serialization_version(SER_VER);
232 const uint8_t family_id(FAMILY_ID);
233
234 uint8_t preamble_longs;
235 uint8_t flags;
236 if (empty) {
237 preamble_longs = PREAMBLE_LONGS_EMPTY;
238 flags = EMPTY_FLAG_MASK;
239 } else {
240 preamble_longs = PREAMBLE_LONGS_NON_EMPTY;
241 flags = 0;
242 }
243
244 write(os, preamble_longs);
245 write(os, serialization_version);
246 write(os, family_id);
247 write(os, flags);
248 write(os, max_k_);
249
250 if (!empty) {
251 write(os, n_);
252 write(os, outer_tau_numer_);
253 write(os, outer_tau_denom_);
254 gadget_.serialize(os, sd);
255 }
256}
257
258template<typename T, typename A>
259template<typename SerDe>
260std::vector<uint8_t, AllocU8<A>> var_opt_union<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
261 const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
262 std::vector<uint8_t, AllocU8<A>> bytes(size, 0, gadget_.allocator_);
263 uint8_t* ptr = bytes.data() + header_size_bytes;
264
265 const bool empty = n_ == 0;
266
267 const uint8_t serialization_version(SER_VER);
268 const uint8_t family_id(FAMILY_ID);
269
270 uint8_t preamble_longs;
271 uint8_t flags;
272
273 if (empty) {
274 preamble_longs = PREAMBLE_LONGS_EMPTY;
275 flags = EMPTY_FLAG_MASK;
276 } else {
277 preamble_longs = PREAMBLE_LONGS_NON_EMPTY;
278 flags = 0;
279 }
280
281 // first prelong
282 ptr += copy_to_mem(preamble_longs, ptr);
283 ptr += copy_to_mem(serialization_version, ptr);
284 ptr += copy_to_mem(family_id, ptr);
285 ptr += copy_to_mem(flags, ptr);
286 ptr += copy_to_mem(max_k_, ptr);
287
288 if (!empty) {
289 ptr += copy_to_mem(n_, ptr);
290 ptr += copy_to_mem(outer_tau_numer_, ptr);
291 ptr += copy_to_mem(outer_tau_denom_, ptr);
292
293 auto gadget_bytes = gadget_.serialize(0, sd);
294 ptr += copy_to_mem(gadget_bytes.data(), ptr, gadget_bytes.size() * sizeof(uint8_t));
295 }
296
297 return bytes;
298}
299
300template<typename T, typename A>
302 n_ = 0;
303 outer_tau_numer_ = 0.0;
304 outer_tau_denom_ = 0;
305 gadget_.reset();
306}
307
308template<typename T, typename A>
310 // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
311 // The stream does not support passing an allocator instance, and alternatives are complicated.
312 std::ostringstream os;
313 os << "### VarOpt Union SUMMARY:" << std::endl;
314 os << " n : " << n_ << std::endl;
315 os << " Max k : " << max_k_ << std::endl;
316 os << " Gadget Summary:" << std::endl;
317 os << gadget_.to_string();
318 os << "### END VarOpt Union SUMMARY" << std::endl;
319 return string<A>(os.str().c_str(), gadget_.allocator_);
320}
321
322template<typename T, typename A>
324 merge_items(sk);
325 resolve_tau(sk);
326}
327
328template<typename T, typename A>
330 merge_items(std::move(sk));
331 resolve_tau(sk); // don't need items, so ok even if they've been moved out
332}
333
334template<typename T, typename A>
336 if (outer_tau_denom_ == 0) {
337 return 0.0;
338 } else {
339 return outer_tau_numer_ / outer_tau_denom_;
340 }
341}
342
343template<typename T, typename A>
344void var_opt_union<T, A>::merge_items(const var_opt_sketch<T, A>& sketch) {
345 if (sketch.n_ == 0) {
346 return;
347 }
348
349 n_ += sketch.n_;
350
351 // H region const_iterator
352 typename var_opt_sketch<T, A>::const_iterator h_itr(sketch, false, false);
353 typename var_opt_sketch<T, A>::const_iterator h_end(sketch, true, false);
354 while (h_itr != h_end) {
355 std::pair<const T&, const double> sample = *h_itr;
356 gadget_.update(sample.first, sample.second, false);
357 ++h_itr;
358 }
359
360 // Weight-correcting R region iterator (const_iterator doesn't do the correction)
361 typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
362 typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
363 while (r_itr != r_end) {
364 std::pair<const T&, const double> sample = *r_itr;
365 gadget_.update(sample.first, sample.second, true);
366 ++r_itr;
367 }
368}
369
370template<typename T, typename A>
371void var_opt_union<T, A>::merge_items(var_opt_sketch<T, A>&& sketch) {
372 if (sketch.n_ == 0) {
373 return;
374 }
375
376 n_ += sketch.n_;
377
378 // H region iterator
379 typename var_opt_sketch<T, A>::iterator h_itr(sketch, false, false);
380 typename var_opt_sketch<T, A>::iterator h_end(sketch, true, false);
381 while (h_itr != h_end) {
382 std::pair<T&, double> sample = *h_itr;
383 gadget_.update(std::move(sample.first), sample.second, false);
384 ++h_itr;
385 }
386
387 // Weight-correcting R region iterator
388 typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
389 typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
390 while (r_itr != r_end) {
391 std::pair<T&, double> sample = *r_itr;
392 gadget_.update(std::move(sample.first), sample.second, true);
393 ++r_itr;
394 }
395}
396
397template<typename T, typename A>
398void var_opt_union<T, A>::resolve_tau(const var_opt_sketch<T, A>& sketch) {
399 if (sketch.r_ > 0) {
400 const double sketch_tau = sketch.get_tau();
401 const double outer_tau = get_outer_tau();
402
403 if (outer_tau_denom_ == 0) {
404 // detect first estimation mode sketch and grab its tau
405 outer_tau_numer_ = sketch.total_wt_r_;
406 outer_tau_denom_ = sketch.r_;
407 } else if (sketch_tau > outer_tau) {
408 // switch to a bigger value of outer_tau
409 outer_tau_numer_ = sketch.total_wt_r_;
410 outer_tau_denom_ = sketch.r_;
411 } else if (sketch_tau == outer_tau) {
412 // Ok if previous equality test isn't quite perfect. Mistakes in either direction should
413 // be fairly benign.
414 // Without conceptually changing outer_tau, update number and denominator. In particular,
415 // add the total weight of the incoming reservoir to the running total.
416 outer_tau_numer_ += sketch.total_wt_r_;
417 outer_tau_denom_ += sketch.r_;
418 }
419
420 // do nothing if sketch's tau is smaller than outer_tau
421 }
422}
423
424template<typename T, typename A>
426 // If no marked items in H, gadget is already valid mathematically. We can return what is
427 // basically just a copy of the gadget.
428 if (gadget_.num_marks_in_h_ == 0) {
429 return simple_gadget_coercer();
430 } else {
431 // Copy of gadget. This may produce needless copying in the
432 // pseudo-exact case below, but should simplify the code without
433 // needing to make the gadget a pointer
434 var_opt_sketch<T, A> gcopy(gadget_, false, n_);
435
436 // At this point, we know that marked items are present in H. So:
437 // 1. Result will necessarily be in estimation mode
438 // 2. Marked items currently in H need to be absorbed into reservoir (R)
439 const bool is_pseudo_exact = detect_and_handle_subcase_of_pseudo_exact(gcopy);
440 if (!is_pseudo_exact) {
441 // continue with main logic
442 migrate_marked_items_by_decreasing_k(gcopy);
443 }
444 // sub-case was already detected and handled, so return the result
445 return gcopy;
446 }
447}
448
455template<typename T, typename A>
457 if (gadget_.num_marks_in_h_ != 0) throw std::logic_error("simple gadget coercer only applies if no marks");
458 return var_opt_sketch<T, A>(gadget_, true, n_);
459}
460
461// this is a condition checked in detect_and_handle_subcase_of_pseudo_exact()
462template<typename T, typename A>
463bool var_opt_union<T, A>::there_exist_unmarked_h_items_lighter_than_target(double threshold) const {
464 for (uint32_t i = 0; i < gadget_.h_; ++i) {
465 if ((gadget_.weights_[i] < threshold) && !gadget_.marks_[i]) {
466 return true;
467 }
468 }
469 return false;
470}
471
472template<typename T, typename A>
473bool var_opt_union<T, A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T, A>& sk) const {
474 // gadget is seemingly exact
475 const bool condition1 = gadget_.r_ == 0;
476
477 // but there are marked items in H, so only _pseudo_ exact
478 const bool condition2 = gadget_.num_marks_in_h_ > 0;
479
480 // if gadget is pseudo-exact and the number of marks equals outer_tau_denom, then we can deduce
481 // from the bookkeeping logic of resolve_tau() that all estimation mode input sketches must
482 // have had the same tau, so we can throw all of the marked items into a common reservoir.
483 const bool condition3 = gadget_.num_marks_in_h_ == outer_tau_denom_;
484
485 if (!(condition1 && condition2 && condition3)) {
486 return false;
487 } else {
488
489 // explicitly enforce rule that items in H should not be lighter than the sketch's tau
490 const bool anti_condition4 = there_exist_unmarked_h_items_lighter_than_target(gadget_.get_tau());
491 if (anti_condition4) {
492 return false;
493 } else {
494 // conditions 1 through 4 hold
495 mark_moving_gadget_coercer(sk);
496 return true;
497 }
498 }
499}
500
509template<typename T, typename A>
510void var_opt_union<T, A>::mark_moving_gadget_coercer(var_opt_sketch<T, A>& sk) const {
511 const uint32_t result_k = gadget_.h_ + gadget_.r_;
512
513 uint32_t result_h = 0;
514 uint32_t result_r = 0;
515 size_t next_r_pos = result_k; // = (result_k+1)-1, to fill R region from back to front
516
517 double* wts = AllocDouble(allocator_).allocate(result_k + 1);
518 T* data = A(allocator_).allocate(result_k + 1);
519
520 // insert R region items, ignoring weights
521 // Currently (May 2017) this next block is unreachable; this coercer is used only in the
522 // pseudo-exact case in which case there are no items natively in R, only marked items in H
523 // that will be moved into R as part of the coercion process.
524 // Addedndum (Jan 2020): Cleanup at end of method assumes R count is 0
525 const size_t final_idx = gadget_.get_num_samples();
526 for (size_t idx = gadget_.h_ + 1; idx <= final_idx; ++idx) {
527 new (&data[next_r_pos]) T(gadget_.data_[idx]);
528 wts[next_r_pos] = gadget_.weights_[idx];
529 ++result_r;
530 --next_r_pos;
531 }
532
533 double transferred_weight = 0;
534
535 // insert H region items
536 for (size_t idx = 0; idx < gadget_.h_; ++idx) {
537 if (gadget_.marks_[idx]) {
538 new (&data[next_r_pos]) T(gadget_.data_[idx]);
539 wts[next_r_pos] = -1.0;
540 transferred_weight += gadget_.weights_[idx];
541 ++result_r;
542 --next_r_pos;
543 } else {
544 new (&data[result_h]) T(gadget_.data_[idx]);
545 wts[result_h] = gadget_.weights_[idx];
546 ++result_h;
547 }
548 }
549
550 if (result_h + result_r != result_k) throw std::logic_error("H + R counts must equal k");
551 if (std::abs(transferred_weight - outer_tau_numer_) > 1e-10) {
552 throw std::logic_error("unexpected mismatch in transferred weight");
553 }
554
555 const double result_r_weight = gadget_.total_wt_r_ + transferred_weight;
556 const uint64_t result_n = n_;
557
558 // explicitly set weight value for the gap
559 wts[result_h] = -1.0;
560
561 // clean up arrays in input sketch, replace with new values
562 AllocBool(allocator_).deallocate(sk.marks_, sk.curr_items_alloc_);
563 AllocDouble(allocator_).deallocate(sk.weights_, sk.curr_items_alloc_);
564 for (size_t i = 0; i < result_k; ++i) { sk.data_[i].~T(); } // assumes everything in H region, no gap
565 A(allocator_).deallocate(sk.data_, sk.curr_items_alloc_);
566
567 sk.data_ = data;
568 sk.weights_ = wts;
569 sk.marks_ = nullptr;
570 sk.num_marks_in_h_ = 0;
571 sk.curr_items_alloc_ = result_k + 1;
572 sk.k_ = result_k;
573 sk.n_ = result_n;
574 sk.h_ = result_h;
575 sk.r_ = result_r;
576 sk.total_wt_r_ = result_r_weight;
577}
578
579// this is basically a continuation of get_result(), but modifying the input gadget copy
580template<typename T, typename A>
581void var_opt_union<T, A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T, A>& gcopy) const {
582 const uint32_t r_count = gcopy.r_;
583 const uint32_t h_count = gcopy.h_;
584 const uint32_t k = gcopy.k_;
585
586 // should be ensured by caller
587 if (gcopy.num_marks_in_h_ == 0) throw std::logic_error("unexpectedly found no marked items to migrate");
588 // either full (of samples), in pseudo-exact mode, or both
589 if ((r_count != 0) && ((h_count + r_count) != k)) throw std::logic_error("invalid gadget state");
590
591 // if non-full and pseudo-exact, change k so that gcopy is full
592 if ((r_count == 0) && (h_count < k)) {
593 gcopy.k_ = h_count; // may leve extra space allocated but that's ok
594 }
595
596 // Now k equals the number of samples, so reducing k will increase tau.
597 // Also, we know that there are at least 2 samples because 0 or 1 would have been handled
598 // by the earlier logic in get_result()
599 gcopy.decrease_k_by_1();
600
601 // gcopy is now in estimation mode, just like the final result must be (due to marked items)
602 if (gcopy.get_tau() == 0.0) throw std::logic_error("gadget must be in sampling mode");
603
604 // keep reducing k until all marked items have been absorbed into the reservoir
605 while (gcopy.num_marks_in_h_ > 0) {
606 // gcopy.k_ >= 2 because h_ and r_ are both at least 1, but checked in next method anyway
607 gcopy.decrease_k_by_1();
608 }
609
610 gcopy.strip_marks();
611}
612
613template<typename T, typename A>
614void var_opt_union<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
615 bool is_empty(flags & EMPTY_FLAG_MASK);
616
617 if (is_empty) {
618 if (preamble_longs != PREAMBLE_LONGS_EMPTY) {
619 throw std::invalid_argument("Possible corruption: Preamble longs must be "
620 + std::to_string(PREAMBLE_LONGS_EMPTY) + " for an empty sketch. Found: "
621 + std::to_string(preamble_longs));
622 }
623 } else {
624 if (preamble_longs != PREAMBLE_LONGS_NON_EMPTY) {
625 throw std::invalid_argument("Possible corruption: Preamble longs must be "
626 + std::to_string(PREAMBLE_LONGS_NON_EMPTY)
627 + " for a non-empty sketch. Found: " + std::to_string(preamble_longs));
628 }
629 }
630}
631
632template<typename T, typename A>
633void var_opt_union<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
634 if (family_id == FAMILY_ID) {
635 if (ser_ver != SER_VER) {
636 throw std::invalid_argument("Possible corruption: VarOpt Union serialization version must be "
637 + std::to_string(SER_VER) + ". Found: " + std::to_string(ser_ver));
638 }
639 return;
640 }
641 // TODO: extend to handle reservoir sampling
642
643 throw std::invalid_argument("Possible corruption: VarOpt Union family id must be "
644 + std::to_string(FAMILY_ID) + ". Found: " + std::to_string(family_id));
645}
646
647} // namespace datasketches
648
649#endif // _VAR_OPT_UNION_IMPL_HPP_
This sketch samples data from a stream of items.
Definition var_opt_sketch.hpp:67
Provides a unioning operation over var_opt_sketch objects.
Definition var_opt_union.hpp:52
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the union.
Definition var_opt_union_impl.hpp:218
void reset()
Resets the union to its default, empty state.
Definition var_opt_union_impl.hpp:301
string< A > to_string() const
Prints a summary of the union as a string.
Definition var_opt_union_impl.hpp:309
const resize_factor DEFAULT_RESIZE_FACTOR
default resize factor
Definition theta_constants.hpp:33
DataSketches namespace.
Definition binomial_bounds.hpp:38