datasketches-cpp
var_opt_union_impl.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _VAR_OPT_UNION_IMPL_HPP_
21 #define _VAR_OPT_UNION_IMPL_HPP_
22 
23 #include "var_opt_union.hpp"
24 
25 #include <cmath>
26 #include <sstream>
27 #include <stdexcept>
28 
29 namespace datasketches {
30 
31 template<typename T, typename A>
32 var_opt_union<T, A>::var_opt_union(uint32_t max_k, const A& allocator) :
33  n_(0),
34  outer_tau_numer_(0.0),
35  outer_tau_denom_(0),
36  max_k_(max_k),
37  allocator_(allocator),
38  gadget_(max_k, var_opt_sketch<T, A>::DEFAULT_RESIZE_FACTOR, true, allocator)
39 {}
40 
41 template<typename T, typename A>
42 var_opt_union<T, A>::var_opt_union(const var_opt_union& other) :
43  n_(other.n_),
44  outer_tau_numer_(other.outer_tau_numer_),
45  outer_tau_denom_(other.outer_tau_denom_),
46  max_k_(other.max_k_),
47  allocator_(other.allocator_),
48  gadget_(other.gadget_)
49 {}
50 
51 template<typename T, typename A>
52 var_opt_union<T, A>::var_opt_union(var_opt_union&& other) noexcept :
53  n_(other.n_),
54  outer_tau_numer_(other.outer_tau_numer_),
55  outer_tau_denom_(other.outer_tau_denom_),
56  max_k_(other.max_k_),
57  allocator_(other.allocator_),
58  gadget_(std::move(other.gadget_))
59 {}
60 
61 template<typename T, typename A>
62 var_opt_union<T, A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
63  uint32_t max_k, var_opt_sketch<T, A>&& gadget, const A& allocator) :
64  n_(n),
65  outer_tau_numer_(outer_tau_numer),
66  outer_tau_denom_(outer_tau_denom),
67  max_k_(max_k),
68  allocator_(allocator),
69  gadget_(gadget)
70 {}
71 
72 template<typename T, typename A>
73 var_opt_union<T, A>::~var_opt_union() {}
74 
75 template<typename T, typename A>
76 var_opt_union<T, A>& var_opt_union<T, A>::operator=(const var_opt_union& other) {
77  var_opt_union union_copy(other);
78  std::swap(n_, union_copy.n_);
79  std::swap(outer_tau_numer_, union_copy.outer_tau_numer_);
80  std::swap(outer_tau_denom_, union_copy.outer_tau_denom_);
81  std::swap(max_k_, union_copy.max_k_);
82  std::swap(allocator_, other.allocator_);
83  std::swap(gadget_, union_copy.gadget_);
84  return *this;
85 }
86 
87 template<typename T, typename A>
88 var_opt_union<T, A>& var_opt_union<T, A>::operator=(var_opt_union&& other) {
89  std::swap(n_, other.n_);
90  std::swap(outer_tau_numer_, other.outer_tau_numer_);
91  std::swap(outer_tau_denom_, other.outer_tau_denom_);
92  std::swap(max_k_, other.max_k_);
93  std::swap(allocator_, other.allocator_);
94  std::swap(gadget_, other.gadget_);
95  return *this;
96 }
97 
98 /*
99  * An empty union requires 8 bytes.
100  *
101  * <pre>
102  * Long || Start Byte Adr:
103  * Adr:
104  * || 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
105  * 0 || Preamble_Longs | SerVer | FamID | Flags |---------Max Res. Size (K)---------|
106  * </pre>
107  *
108  * A non-empty sketch requires 24 bytes of preamble for an under-full sample; once there are
109  * at least k items the sketch uses 32 bytes of preamble.
110  *
111  * The count of items seen is limited to 48 bits (~256 trillion) even though there are adjacent
112  * unused preamble bits. The acceptance probability for an item is a double in the range [0,1),
113  * limiting us to 53 bits of randomness due to details of the IEEE floating point format. To
114  * ensure meaningful probabilities as the items seen count approaches capacity, we intentionally
115  * use slightly fewer bits.
116  *
117  * Following the header are weights for the heavy items, then marks in the event this is a gadget.
118  * The serialized items come last.
119  *
120  * <pre>
121  * Long || Start Byte Adr:
122  * Adr:
123  * || 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
124  * 0 || Preamble_Longs | SerVer | FamID | Flags |---------Max Res. Size (K)---------|
125  *
126  * || 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
127  * 1 ||---------------------------Items Seen Count (N)--------------------------------|
128  *
129  * || 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
130  * 2 ||------------------------Outer Tau Numerator (double)---------------------------|
131  *
132  * || 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
133  * 3 ||----------------------Outer Tau Denominator (uint64_t)-------------------------|
134  * </pre>
135  */
136 
137 template<typename T, typename A>
138 template<typename SerDe>
139 var_opt_union<T, A> var_opt_union<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
140  const auto preamble_longs = read<uint8_t>(is);
141  const auto serial_version = read<uint8_t>(is);
142  const auto family_id = read<uint8_t>(is);
143  const auto flags = read<uint8_t>(is);
144  const auto max_k = read<uint32_t>(is);
145 
146  check_preamble_longs(preamble_longs, flags);
147  check_family_and_serialization_version(family_id, serial_version);
148 
149  if (max_k == 0 || max_k > var_opt_constants::MAX_K) {
150  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
151  }
152 
153  bool is_empty = flags & EMPTY_FLAG_MASK;
154 
155  if (is_empty) {
156  if (!is.good())
157  throw std::runtime_error("error reading from std::istream");
158  else
159  return var_opt_union(max_k);
160  }
161 
162  const auto items_seen = read<uint64_t>(is);
163  const auto outer_tau_numer = read<double>(is);
164  const auto outer_tau_denom = read<uint64_t>(is);
165 
166  var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(is, sd, allocator);
167 
168  if (!is.good())
169  throw std::runtime_error("error reading from std::istream");
170 
171  return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget), allocator);
172 }
173 
174 template<typename T, typename A>
175 template<typename SerDe>
176 var_opt_union<T, A> var_opt_union<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
177  ensure_minimum_memory(size, 8);
178  const char* ptr = static_cast<const char*>(bytes);
179  uint8_t preamble_longs;
180  ptr += copy_from_mem(ptr, preamble_longs);
181  uint8_t serial_version;
182  ptr += copy_from_mem(ptr, serial_version);
183  uint8_t family_id;
184  ptr += copy_from_mem(ptr, family_id);
185  uint8_t flags;
186  ptr += copy_from_mem(ptr, flags);
187  uint32_t max_k;
188  ptr += copy_from_mem(ptr, max_k);
189 
190  check_preamble_longs(preamble_longs, flags);
191  check_family_and_serialization_version(family_id, serial_version);
192 
193  if (max_k == 0 || max_k > var_opt_constants::MAX_K) {
194  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
195  }
196 
197  bool is_empty = flags & EMPTY_FLAG_MASK;
198 
199  if (is_empty) {
200  return var_opt_union(max_k);
201  }
202 
203  uint64_t items_seen;
204  ptr += copy_from_mem(ptr, items_seen);
205  double outer_tau_numer;
206  ptr += copy_from_mem(ptr, outer_tau_numer);
207  uint64_t outer_tau_denom;
208  ptr += copy_from_mem(ptr, outer_tau_denom);
209 
210  const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
211  var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(ptr, gadget_size, sd, allocator);
212 
213  return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget), allocator);
214 }
215 
216 template<typename T, typename A>
217 template<typename SerDe>
218 size_t var_opt_union<T, A>::get_serialized_size_bytes(const SerDe& sd) const {
219  if (n_ == 0) {
220  return PREAMBLE_LONGS_EMPTY << 3;
221  } else {
222  return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes(sd);
223  }
224 }
225 
226 template<typename T, typename A>
227 template<typename SerDe>
228 void var_opt_union<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
229  bool empty = (n_ == 0);
230 
231  const uint8_t serialization_version(SER_VER);
232  const uint8_t family_id(FAMILY_ID);
233 
234  uint8_t preamble_longs;
235  uint8_t flags;
236  if (empty) {
237  preamble_longs = PREAMBLE_LONGS_EMPTY;
238  flags = EMPTY_FLAG_MASK;
239  } else {
240  preamble_longs = PREAMBLE_LONGS_NON_EMPTY;
241  flags = 0;
242  }
243 
244  write(os, preamble_longs);
245  write(os, serialization_version);
246  write(os, family_id);
247  write(os, flags);
248  write(os, max_k_);
249 
250  if (!empty) {
251  write(os, n_);
252  write(os, outer_tau_numer_);
253  write(os, outer_tau_denom_);
254  gadget_.serialize(os, sd);
255  }
256 }
257 
258 template<typename T, typename A>
259 template<typename SerDe>
260 std::vector<uint8_t, AllocU8<A>> var_opt_union<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
261  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
262  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, gadget_.allocator_);
263  uint8_t* ptr = bytes.data() + header_size_bytes;
264 
265  const bool empty = n_ == 0;
266 
267  const uint8_t serialization_version(SER_VER);
268  const uint8_t family_id(FAMILY_ID);
269 
270  uint8_t preamble_longs;
271  uint8_t flags;
272 
273  if (empty) {
274  preamble_longs = PREAMBLE_LONGS_EMPTY;
275  flags = EMPTY_FLAG_MASK;
276  } else {
277  preamble_longs = PREAMBLE_LONGS_NON_EMPTY;
278  flags = 0;
279  }
280 
281  // first prelong
282  ptr += copy_to_mem(preamble_longs, ptr);
283  ptr += copy_to_mem(serialization_version, ptr);
284  ptr += copy_to_mem(family_id, ptr);
285  ptr += copy_to_mem(flags, ptr);
286  ptr += copy_to_mem(max_k_, ptr);
287 
288  if (!empty) {
289  ptr += copy_to_mem(n_, ptr);
290  ptr += copy_to_mem(outer_tau_numer_, ptr);
291  ptr += copy_to_mem(outer_tau_denom_, ptr);
292 
293  auto gadget_bytes = gadget_.serialize(0, sd);
294  ptr += copy_to_mem(gadget_bytes.data(), ptr, gadget_bytes.size() * sizeof(uint8_t));
295  }
296 
297  return bytes;
298 }
299 
300 template<typename T, typename A>
302  n_ = 0;
303  outer_tau_numer_ = 0.0;
304  outer_tau_denom_ = 0;
305  gadget_.reset();
306 }
307 
308 template<typename T, typename A>
310  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
311  // The stream does not support passing an allocator instance, and alternatives are complicated.
312  std::ostringstream os;
313  os << "### VarOpt Union SUMMARY:" << std::endl;
314  os << " n : " << n_ << std::endl;
315  os << " Max k : " << max_k_ << std::endl;
316  os << " Gadget Summary:" << std::endl;
317  os << gadget_.to_string();
318  os << "### END VarOpt Union SUMMARY" << std::endl;
319  return string<A>(os.str().c_str(), gadget_.allocator_);
320 }
321 
322 template<typename T, typename A>
324  merge_items(sk);
325  resolve_tau(sk);
326 }
327 
328 template<typename T, typename A>
330  merge_items(std::move(sk));
331  resolve_tau(sk); // don't need items, so ok even if they've been moved out
332 }
333 
334 template<typename T, typename A>
335 double var_opt_union<T, A>::get_outer_tau() const {
336  if (outer_tau_denom_ == 0) {
337  return 0.0;
338  } else {
339  return outer_tau_numer_ / outer_tau_denom_;
340  }
341 }
342 
343 template<typename T, typename A>
344 void var_opt_union<T, A>::merge_items(const var_opt_sketch<T, A>& sketch) {
345  if (sketch.n_ == 0) {
346  return;
347  }
348 
349  n_ += sketch.n_;
350 
351  // H region const_iterator
352  typename var_opt_sketch<T, A>::const_iterator h_itr(sketch, false, false);
353  typename var_opt_sketch<T, A>::const_iterator h_end(sketch, true, false);
354  while (h_itr != h_end) {
355  std::pair<const T&, const double> sample = *h_itr;
356  gadget_.update(sample.first, sample.second, false);
357  ++h_itr;
358  }
359 
360  // Weight-correcting R region iterator (const_iterator doesn't do the correction)
361  typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
362  typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
363  while (r_itr != r_end) {
364  std::pair<const T&, const double> sample = *r_itr;
365  gadget_.update(sample.first, sample.second, true);
366  ++r_itr;
367  }
368 }
369 
370 template<typename T, typename A>
371 void var_opt_union<T, A>::merge_items(var_opt_sketch<T, A>&& sketch) {
372  if (sketch.n_ == 0) {
373  return;
374  }
375 
376  n_ += sketch.n_;
377 
378  // H region iterator
379  typename var_opt_sketch<T, A>::iterator h_itr(sketch, false, false);
380  typename var_opt_sketch<T, A>::iterator h_end(sketch, true, false);
381  while (h_itr != h_end) {
382  std::pair<T&, double> sample = *h_itr;
383  gadget_.update(std::move(sample.first), sample.second, false);
384  ++h_itr;
385  }
386 
387  // Weight-correcting R region iterator
388  typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
389  typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
390  while (r_itr != r_end) {
391  std::pair<T&, double> sample = *r_itr;
392  gadget_.update(std::move(sample.first), sample.second, true);
393  ++r_itr;
394  }
395 }
396 
397 template<typename T, typename A>
398 void var_opt_union<T, A>::resolve_tau(const var_opt_sketch<T, A>& sketch) {
399  if (sketch.r_ > 0) {
400  const double sketch_tau = sketch.get_tau();
401  const double outer_tau = get_outer_tau();
402 
403  if (outer_tau_denom_ == 0) {
404  // detect first estimation mode sketch and grab its tau
405  outer_tau_numer_ = sketch.total_wt_r_;
406  outer_tau_denom_ = sketch.r_;
407  } else if (sketch_tau > outer_tau) {
408  // switch to a bigger value of outer_tau
409  outer_tau_numer_ = sketch.total_wt_r_;
410  outer_tau_denom_ = sketch.r_;
411  } else if (sketch_tau == outer_tau) {
412  // Ok if previous equality test isn't quite perfect. Mistakes in either direction should
413  // be fairly benign.
414  // Without conceptually changing outer_tau, update number and denominator. In particular,
415  // add the total weight of the incoming reservoir to the running total.
416  outer_tau_numer_ += sketch.total_wt_r_;
417  outer_tau_denom_ += sketch.r_;
418  }
419 
420  // do nothing if sketch's tau is smaller than outer_tau
421  }
422 }
423 
424 template<typename T, typename A>
426  // If no marked items in H, gadget is already valid mathematically. We can return what is
427  // basically just a copy of the gadget.
428  if (gadget_.num_marks_in_h_ == 0) {
429  return simple_gadget_coercer();
430  } else {
431  // Copy of gadget. This may produce needless copying in the
432  // pseudo-exact case below, but should simplify the code without
433  // needing to make the gadget a pointer
434  var_opt_sketch<T, A> gcopy(gadget_, false, n_);
435 
436  // At this point, we know that marked items are present in H. So:
437  // 1. Result will necessarily be in estimation mode
438  // 2. Marked items currently in H need to be absorbed into reservoir (R)
439  const bool is_pseudo_exact = detect_and_handle_subcase_of_pseudo_exact(gcopy);
440  if (!is_pseudo_exact) {
441  // continue with main logic
442  migrate_marked_items_by_decreasing_k(gcopy);
443  }
444  // sub-case was already detected and handled, so return the result
445  return gcopy;
446  }
447 }
448 
455 template<typename T, typename A>
457  if (gadget_.num_marks_in_h_ != 0) throw std::logic_error("simple gadget coercer only applies if no marks");
458  return var_opt_sketch<T, A>(gadget_, true, n_);
459 }
460 
461 // this is a condition checked in detect_and_handle_subcase_of_pseudo_exact()
462 template<typename T, typename A>
463 bool var_opt_union<T, A>::there_exist_unmarked_h_items_lighter_than_target(double threshold) const {
464  for (uint32_t i = 0; i < gadget_.h_; ++i) {
465  if ((gadget_.weights_[i] < threshold) && !gadget_.marks_[i]) {
466  return true;
467  }
468  }
469  return false;
470 }
471 
472 template<typename T, typename A>
473 bool var_opt_union<T, A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T, A>& sk) const {
474  // gadget is seemingly exact
475  const bool condition1 = gadget_.r_ == 0;
476 
477  // but there are marked items in H, so only _pseudo_ exact
478  const bool condition2 = gadget_.num_marks_in_h_ > 0;
479 
480  // if gadget is pseudo-exact and the number of marks equals outer_tau_denom, then we can deduce
481  // from the bookkeeping logic of resolve_tau() that all estimation mode input sketches must
482  // have had the same tau, so we can throw all of the marked items into a common reservoir.
483  const bool condition3 = gadget_.num_marks_in_h_ == outer_tau_denom_;
484 
485  if (!(condition1 && condition2 && condition3)) {
486  return false;
487  } else {
488 
489  // explicitly enforce rule that items in H should not be lighter than the sketch's tau
490  const bool anti_condition4 = there_exist_unmarked_h_items_lighter_than_target(gadget_.get_tau());
491  if (anti_condition4) {
492  return false;
493  } else {
494  // conditions 1 through 4 hold
495  mark_moving_gadget_coercer(sk);
496  return true;
497  }
498  }
499 }
500 
509 template<typename T, typename A>
510 void var_opt_union<T, A>::mark_moving_gadget_coercer(var_opt_sketch<T, A>& sk) const {
511  const uint32_t result_k = gadget_.h_ + gadget_.r_;
512 
513  uint32_t result_h = 0;
514  uint32_t result_r = 0;
515  size_t next_r_pos = result_k; // = (result_k+1)-1, to fill R region from back to front
516 
517  double* wts = AllocDouble(allocator_).allocate(result_k + 1);
518  T* data = A(allocator_).allocate(result_k + 1);
519 
520  // insert R region items, ignoring weights
521  // Currently (May 2017) this next block is unreachable; this coercer is used only in the
522  // pseudo-exact case in which case there are no items natively in R, only marked items in H
523  // that will be moved into R as part of the coercion process.
524  // Addedndum (Jan 2020): Cleanup at end of method assumes R count is 0
525  const size_t final_idx = gadget_.get_num_samples();
526  for (size_t idx = gadget_.h_ + 1; idx <= final_idx; ++idx) {
527  new (&data[next_r_pos]) T(gadget_.data_[idx]);
528  wts[next_r_pos] = gadget_.weights_[idx];
529  ++result_r;
530  --next_r_pos;
531  }
532 
533  double transferred_weight = 0;
534 
535  // insert H region items
536  for (size_t idx = 0; idx < gadget_.h_; ++idx) {
537  if (gadget_.marks_[idx]) {
538  new (&data[next_r_pos]) T(gadget_.data_[idx]);
539  wts[next_r_pos] = -1.0;
540  transferred_weight += gadget_.weights_[idx];
541  ++result_r;
542  --next_r_pos;
543  } else {
544  new (&data[result_h]) T(gadget_.data_[idx]);
545  wts[result_h] = gadget_.weights_[idx];
546  ++result_h;
547  }
548  }
549 
550  if (result_h + result_r != result_k) throw std::logic_error("H + R counts must equal k");
551  if (std::abs(transferred_weight - outer_tau_numer_) > 1e-10) {
552  throw std::logic_error("unexpected mismatch in transferred weight");
553  }
554 
555  const double result_r_weight = gadget_.total_wt_r_ + transferred_weight;
556  const uint64_t result_n = n_;
557 
558  // explicitly set weight value for the gap
559  wts[result_h] = -1.0;
560 
561  // clean up arrays in input sketch, replace with new values
562  AllocBool(allocator_).deallocate(sk.marks_, sk.curr_items_alloc_);
563  AllocDouble(allocator_).deallocate(sk.weights_, sk.curr_items_alloc_);
564  for (size_t i = 0; i < result_k; ++i) { sk.data_[i].~T(); } // assumes everything in H region, no gap
565  A(allocator_).deallocate(sk.data_, sk.curr_items_alloc_);
566 
567  sk.data_ = data;
568  sk.weights_ = wts;
569  sk.marks_ = nullptr;
570  sk.num_marks_in_h_ = 0;
571  sk.curr_items_alloc_ = result_k + 1;
572  sk.k_ = result_k;
573  sk.n_ = result_n;
574  sk.h_ = result_h;
575  sk.r_ = result_r;
576  sk.total_wt_r_ = result_r_weight;
577 }
578 
579 // this is basically a continuation of get_result(), but modifying the input gadget copy
580 template<typename T, typename A>
581 void var_opt_union<T, A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T, A>& gcopy) const {
582  const uint32_t r_count = gcopy.r_;
583  const uint32_t h_count = gcopy.h_;
584  const uint32_t k = gcopy.k_;
585 
586  // should be ensured by caller
587  if (gcopy.num_marks_in_h_ == 0) throw std::logic_error("unexpectedly found no marked items to migrate");
588  // either full (of samples), in pseudo-exact mode, or both
589  if ((r_count != 0) && ((h_count + r_count) != k)) throw std::logic_error("invalid gadget state");
590 
591  // if non-full and pseudo-exact, change k so that gcopy is full
592  if ((r_count == 0) && (h_count < k)) {
593  gcopy.k_ = h_count; // may leve extra space allocated but that's ok
594  }
595 
596  // Now k equals the number of samples, so reducing k will increase tau.
597  // Also, we know that there are at least 2 samples because 0 or 1 would have been handled
598  // by the earlier logic in get_result()
599  gcopy.decrease_k_by_1();
600 
601  // gcopy is now in estimation mode, just like the final result must be (due to marked items)
602  if (gcopy.get_tau() == 0.0) throw std::logic_error("gadget must be in sampling mode");
603 
604  // keep reducing k until all marked items have been absorbed into the reservoir
605  while (gcopy.num_marks_in_h_ > 0) {
606  // gcopy.k_ >= 2 because h_ and r_ are both at least 1, but checked in next method anyway
607  gcopy.decrease_k_by_1();
608  }
609 
610  gcopy.strip_marks();
611 }
612 
613 template<typename T, typename A>
614 void var_opt_union<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
615  bool is_empty(flags & EMPTY_FLAG_MASK);
616 
617  if (is_empty) {
618  if (preamble_longs != PREAMBLE_LONGS_EMPTY) {
619  throw std::invalid_argument("Possible corruption: Preamble longs must be "
620  + std::to_string(PREAMBLE_LONGS_EMPTY) + " for an empty sketch. Found: "
621  + std::to_string(preamble_longs));
622  }
623  } else {
624  if (preamble_longs != PREAMBLE_LONGS_NON_EMPTY) {
625  throw std::invalid_argument("Possible corruption: Preamble longs must be "
626  + std::to_string(PREAMBLE_LONGS_NON_EMPTY)
627  + " for a non-empty sketch. Found: " + std::to_string(preamble_longs));
628  }
629  }
630 }
631 
632 template<typename T, typename A>
633 void var_opt_union<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
634  if (family_id == FAMILY_ID) {
635  if (ser_ver != SER_VER) {
636  throw std::invalid_argument("Possible corruption: VarOpt Union serialization version must be "
637  + std::to_string(SER_VER) + ". Found: " + std::to_string(ser_ver));
638  }
639  return;
640  }
641  // TODO: extend to handle reservoir sampling
642 
643  throw std::invalid_argument("Possible corruption: VarOpt Union family id must be "
644  + std::to_string(FAMILY_ID) + ". Found: " + std::to_string(family_id));
645 }
646 
647 } // namespace datasketches
648 
649 #endif // _VAR_OPT_UNION_IMPL_HPP_
static var_opt_sketch deserialize(std::istream &is, const SerDe &sd=SerDe(), const A &allocator=A())
This method deserializes a sketch from a given stream.
Provides a unioning operation over var_opt_sketch objects.
Definition: var_opt_union.hpp:52
size_t get_serialized_size_bytes(const SerDe &sd=SerDe()) const
Computes size needed to serialize the current state of the union.
Definition: var_opt_union_impl.hpp:218
void reset()
Resets the union to its default, empty state.
Definition: var_opt_union_impl.hpp:301
string< A > to_string() const
Prints a summary of the union as a string.
Definition: var_opt_union_impl.hpp:309
const resize_factor DEFAULT_RESIZE_FACTOR
default resize factor
Definition: theta_constants.hpp:33
DataSketches namespace.
Definition: binomial_bounds.hpp:38