datasketches-cpp
HllSketch-internal.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _HLLSKETCH_INTERNAL_HPP_
21 #define _HLLSKETCH_INTERNAL_HPP_
22 
23 #include "hll.hpp"
24 #include "HllUtil.hpp"
25 #include "HllSketchImplFactory.hpp"
26 #include "CouponList.hpp"
27 #include "HllArray.hpp"
28 #include "common_defs.hpp"
29 
30 #include <cstdio>
31 #include <cstdlib>
32 #include <string>
33 #include <iostream>
34 #include <sstream>
35 #include <iomanip>
36 
37 namespace datasketches {
38 
39 typedef union {
40  int64_t longBytes;
41  double doubleBytes;
42 } longDoubleUnion;
43 
44 template<typename A>
45 hll_sketch_alloc<A>::hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) {
46  HllUtil<A>::checkLgK(lg_config_k);
47  if (start_full_size) {
48  sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator);
49  } else {
50  typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
51  sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST, allocator);
52  }
53 }
54 
55 template<typename A>
56 hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is, const A& allocator) {
57  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is, allocator);
58  return hll_sketch_alloc<A>(impl);
59 }
60 
61 template<typename A>
62 hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
63  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len, allocator);
64  return hll_sketch_alloc<A>(impl);
65 }
66 
67 template<typename A>
68 hll_sketch_alloc<A>::~hll_sketch_alloc() {
69  if (sketch_impl != nullptr) {
70  sketch_impl->get_deleter()(sketch_impl);
71  }
72 }
73 
74 template<typename A>
75 hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that) :
76  sketch_impl(that.sketch_impl->copy())
77 {}
78 
79 template<typename A>
80 hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type) :
81  sketch_impl(that.sketch_impl->copyAs(tgt_type))
82 {}
83 
84 template<typename A>
85 hll_sketch_alloc<A>::hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept :
86  sketch_impl(nullptr)
87 {
88  std::swap(sketch_impl, that.sketch_impl);
89 }
90 
91 template<typename A>
92 hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
93  sketch_impl(that)
94 {}
95 
96 template<typename A>
97 hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
98  sketch_impl->get_deleter()(sketch_impl);
99  sketch_impl = other.sketch_impl->copy();
100  return *this;
101 }
102 
103 template<typename A>
104 hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
105  std::swap(sketch_impl, other.sketch_impl);
106  return *this;
107 }
108 
109 template<typename A>
110 void hll_sketch_alloc<A>::reset() {
111  // TODO: need to allow starting from a full-sized sketch
112  // (either here or in other implementation)
113  sketch_impl = sketch_impl->reset();
114 }
115 
116 template<typename A>
117 void hll_sketch_alloc<A>::update(const std::string& datum) {
118  if (datum.empty()) { return; }
119  HashState hashResult;
120  HllUtil<A>::hash(datum.c_str(), datum.length(), DEFAULT_SEED, hashResult);
121  coupon_update(HllUtil<A>::coupon(hashResult));
122 }
123 
124 template<typename A>
125 void hll_sketch_alloc<A>::update(uint64_t datum) {
126  // no sign extension with 64 bits so no need to cast to signed value
127  HashState hashResult;
128  HllUtil<A>::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult);
129  coupon_update(HllUtil<A>::coupon(hashResult));
130 }
131 
132 template<typename A>
133 void hll_sketch_alloc<A>::update(uint32_t datum) {
134  update(static_cast<int32_t>(datum));
135 }
136 
137 template<typename A>
138 void hll_sketch_alloc<A>::update(uint16_t datum) {
139  update(static_cast<int16_t>(datum));
140 }
141 
142 template<typename A>
143 void hll_sketch_alloc<A>::update(uint8_t datum) {
144  update(static_cast<int8_t>(datum));
145 }
146 
147 template<typename A>
148 void hll_sketch_alloc<A>::update(int64_t datum) {
149  HashState hashResult;
150  HllUtil<A>::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult);
151  coupon_update(HllUtil<A>::coupon(hashResult));
152 }
153 
154 template<typename A>
155 void hll_sketch_alloc<A>::update(int32_t datum) {
156  const int64_t val = static_cast<int64_t>(datum);
157  HashState hashResult;
158  HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
159  coupon_update(HllUtil<A>::coupon(hashResult));
160 }
161 
162 template<typename A>
163 void hll_sketch_alloc<A>::update(int16_t datum) {
164  const int64_t val = static_cast<int64_t>(datum);
165  HashState hashResult;
166  HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
167  coupon_update(HllUtil<A>::coupon(hashResult));
168 }
169 
170 template<typename A>
171 void hll_sketch_alloc<A>::update(int8_t datum) {
172  const int64_t val = static_cast<int64_t>(datum);
173  HashState hashResult;
174  HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
175  coupon_update(HllUtil<A>::coupon(hashResult));
176 }
177 
178 template<typename A>
179 void hll_sketch_alloc<A>::update(double datum) {
180  longDoubleUnion d;
181  d.doubleBytes = static_cast<double>(datum);
182  if (datum == 0.0) {
183  d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
184  } else if (std::isnan(d.doubleBytes)) {
185  d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
186  }
187  HashState hashResult;
188  HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
189  coupon_update(HllUtil<A>::coupon(hashResult));
190 }
191 
192 template<typename A>
193 void hll_sketch_alloc<A>::update(float datum) {
194  longDoubleUnion d;
195  d.doubleBytes = static_cast<double>(datum);
196  if (datum == 0.0) {
197  d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
198  } else if (std::isnan(d.doubleBytes)) {
199  d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
200  }
201  HashState hashResult;
202  HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
203  coupon_update(HllUtil<A>::coupon(hashResult));
204 }
205 
206 template<typename A>
207 void hll_sketch_alloc<A>::update(const void* data, size_t lengthBytes) {
208  if (data == nullptr) { return; }
209  HashState hashResult;
210  HllUtil<A>::hash(data, lengthBytes, DEFAULT_SEED, hashResult);
211  coupon_update(HllUtil<A>::coupon(hashResult));
212 }
213 
214 template<typename A>
215 void hll_sketch_alloc<A>::coupon_update(uint32_t coupon) {
216  if (coupon == hll_constants::EMPTY) { return; }
217  HllSketchImpl<A>* result = this->sketch_impl->couponUpdate(coupon);
218  if (result != this->sketch_impl) {
219  this->sketch_impl->get_deleter()(this->sketch_impl);
220  this->sketch_impl = result;
221  }
222 }
223 
224 template<typename A>
225 void hll_sketch_alloc<A>::serialize_compact(std::ostream& os) const {
226  return sketch_impl->serialize(os, true);
227 }
228 
229 template<typename A>
230 void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
231  return sketch_impl->serialize(os, false);
232 }
233 
234 template<typename A>
235 auto hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const -> vector_bytes {
236  return sketch_impl->serialize(true, header_size_bytes);
237 }
238 
239 template<typename A>
240 auto hll_sketch_alloc<A>::serialize_updatable() const -> vector_bytes {
241  return sketch_impl->serialize(false, 0);
242 }
243 
244 template<typename A>
245 string<A> hll_sketch_alloc<A>::to_string(const bool summary,
246  const bool detail,
247  const bool aux_detail,
248  const bool all) const {
249  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
250  // The stream does not support passing an allocator instance, and alternatives are complicated.
251  std::stringstream os;
252  if (summary) {
253  os << "### HLL sketch summary:" << std::endl
254  << " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
255  << " Hll Target : " << type_as_string() << std::endl
256  << " Current Mode : " << mode_as_string() << std::endl
257  << " LB : " << get_lower_bound(1) << std::endl
258  << " Estimate : " << get_estimate() << std::endl
259  << " UB : " << get_upper_bound(1) << std::endl
260  << " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
261  if (get_current_mode() == HLL) {
262  HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
263  os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
264  << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
265  << " HipAccum : " << hllArray->getHipAccum() << std::endl
266  << " KxQ0 : " << hllArray->getKxQ0() << std::endl
267  << " KxQ1 : " << hllArray->getKxQ1() << std::endl;
268  if (get_target_type() == HLL_4) {
269  const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
270  os << " Aux table? : " << (hll4_ptr->getAuxHashMap() != nullptr ? "true" : "false") << std::endl;
271  }
272  } else {
273  os << " Coupon count : "
274  << std::to_string(((CouponList<A>*) sketch_impl)->getCouponCount()) << std::endl;
275  }
276  os << "### End HLL sketch summary" << std::endl;
277  }
278 
279  if (detail) {
280  os << "### HLL sketch data detail:" << std::endl;
281  if (get_current_mode() == HLL) {
282  const HllArray<A>* hll_ptr = static_cast<const HllArray<A>*>(sketch_impl);
283  os << std::left << std::setw(10) << "Slot" << std::setw(6) << "Value" << std::endl;
284  auto it = hll_ptr->begin(all);
285  while (it != hll_ptr->end()) {
286  os << std::setw(10) << HllUtil<A>::getLow26(*it);
287  os << std::setw(6) << HllUtil<A>::getValue(*it);
288  os << std::endl;
289  ++it;
290  }
291  } else {
292  const CouponList<A>* list_ptr = static_cast<const CouponList<A>*>(sketch_impl);
293  os << std::left;
294  os << std::setw(10) << "Index";
295  os << std::setw(10) << "Key";
296  os << std::setw(10) << "Slot";
297  os << std::setw(6) << "Value";
298  os << std::endl;
299  auto it = list_ptr->begin(all);
300  int i = 0;
301  int mask = (1 << get_lg_config_k()) - 1;
302  while (it != list_ptr->end()) {
303  os << std::setw(10) << i;
304  os << std::setw(10) << HllUtil<A>::getLow26(*it);
305  os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
306  os << std::setw(6) << HllUtil<A>::getValue(*it);
307  os << std::endl;
308  ++it;
309  ++i;
310  }
311  }
312  os << "### End HLL sketch data detail" << std::endl;
313  }
314  if (aux_detail) {
315  if ((get_current_mode() == HLL) && (get_target_type() == HLL_4)) {
316  const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
317  const AuxHashMap<A>* aux_ptr = hll4_ptr->getAuxHashMap();
318  if (aux_ptr != nullptr) {
319  os << "### HLL sketch aux detail:" << std::endl;
320  os << std::left;
321  os << std::setw(10) << "Index";
322  os << std::setw(10) << "Key";
323  os << std::setw(10) << "Slot";
324  os << std::setw(6) << "Value";
325  os << std::endl;
326  auto it = aux_ptr->begin(all);
327  int i = 0;
328  int mask = (1 << get_lg_config_k()) - 1;
329  while (it != aux_ptr->end()) {
330  os << std::setw(10) << i;
331  os << std::setw(10) << HllUtil<A>::getLow26(*it);
332  os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
333  os << std::setw(6) << HllUtil<A>::getValue(*it);
334  os << std::endl;
335  ++it;
336  ++i;
337  }
338  os << "### End HLL sketch aux detail" << std::endl;
339  }
340  }
341  }
342 
343  return string<A>(os.str().c_str(), sketch_impl->getAllocator());
344 }
345 
346 template<typename A>
347 double hll_sketch_alloc<A>::get_estimate() const {
348  return sketch_impl->getEstimate();
349 }
350 
351 template<typename A>
352 double hll_sketch_alloc<A>::get_composite_estimate() const {
353  return sketch_impl->getCompositeEstimate();
354 }
355 
356 template<typename A>
357 double hll_sketch_alloc<A>::get_lower_bound(uint8_t numStdDev) const {
358  return sketch_impl->getLowerBound(numStdDev);
359 }
360 
361 template<typename A>
362 double hll_sketch_alloc<A>::get_upper_bound(uint8_t numStdDev) const {
363  return sketch_impl->getUpperBound(numStdDev);
364 }
365 
366 template<typename A>
367 hll_mode hll_sketch_alloc<A>::get_current_mode() const {
368  return sketch_impl->getCurMode();
369 }
370 
371 template<typename A>
372 uint8_t hll_sketch_alloc<A>::get_lg_config_k() const {
373  return sketch_impl->getLgConfigK();
374 }
375 
376 template<typename A>
377 target_hll_type hll_sketch_alloc<A>::get_target_type() const {
378  return sketch_impl->getTgtHllType();
379 }
380 
381 template<typename A>
382 bool hll_sketch_alloc<A>::is_out_of_order_flag() const {
383  return sketch_impl->isOutOfOrderFlag();
384 }
385 
386 template<typename A>
387 bool hll_sketch_alloc<A>::is_estimation_mode() const {
388  return true;
389 }
390 
391 template<typename A>
392 uint32_t hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
393  return sketch_impl->getUpdatableSerializationBytes();
394 }
395 
396 template<typename A>
397 uint32_t hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
398  return sketch_impl->getCompactSerializationBytes();
399 }
400 
401 template<typename A>
402 bool hll_sketch_alloc<A>::is_compact() const {
403  return sketch_impl->isCompact();
404 }
405 
406 template<typename A>
407 bool hll_sketch_alloc<A>::is_empty() const {
408  return sketch_impl->isEmpty();
409 }
410 
411 template<typename A>
412 std::string hll_sketch_alloc<A>::type_as_string() const {
413  switch (sketch_impl->getTgtHllType()) {
414  case target_hll_type::HLL_4:
415  return std::string("HLL_4");
416  case target_hll_type::HLL_6:
417  return std::string("HLL_6");
418  case target_hll_type::HLL_8:
419  return std::string("HLL_8");
420  default:
421  throw std::runtime_error("Sketch state error: Invalid target_hll_type");
422  }
423 }
424 
425 template<typename A>
426 std::string hll_sketch_alloc<A>::mode_as_string() const {
427  switch (sketch_impl->getCurMode()) {
428  case LIST:
429  return std::string("LIST");
430  case SET:
431  return std::string("SET");
432  case HLL:
433  return std::string("HLL");
434  default:
435  throw std::runtime_error("Sketch state error: Invalid hll_mode");
436  }
437 }
438 
439 template<typename A>
440 uint32_t hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(uint8_t lg_config_k,
441  const target_hll_type tgtHllType) {
442  uint32_t arrBytes;
443  if (tgtHllType == target_hll_type::HLL_4) {
444  const uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[lg_config_k];
445  arrBytes = HllArray<A>::hll4ArrBytes(lg_config_k) + auxBytes;
446  } else if (tgtHllType == target_hll_type::HLL_6) {
447  arrBytes = HllArray<A>::hll6ArrBytes(lg_config_k);
448  } else { //HLL_8
449  arrBytes = HllArray<A>::hll8ArrBytes(lg_config_k);
450  }
451  return hll_constants::HLL_BYTE_ARR_START + arrBytes;
452 }
453 
454 template<typename A>
455 double hll_sketch_alloc<A>::get_rel_err(bool upperBound, bool unioned,
456  uint8_t lg_config_k, uint8_t numStdDev) {
457  return HllUtil<A>::getRelErr(upperBound, unioned, lg_config_k, numStdDev);
458 }
459 
460 }
461 
462 #endif // _HLLSKETCH_INTERNAL_HPP_
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72