datasketches-cpp
Loading...
Searching...
No Matches
HllSketch-internal.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef _HLLSKETCH_INTERNAL_HPP_
21#define _HLLSKETCH_INTERNAL_HPP_
22
23#include "hll.hpp"
24#include "HllUtil.hpp"
25#include "HllSketchImplFactory.hpp"
26#include "CouponList.hpp"
27#include "HllArray.hpp"
28#include "common_defs.hpp"
29
30#include <cstdio>
31#include <cstdlib>
32#include <string>
33#include <iostream>
34#include <sstream>
35#include <iomanip>
36
37namespace datasketches {
38
39typedef union {
40 int64_t longBytes;
41 double doubleBytes;
42} longDoubleUnion;
43
44template<typename A>
45hll_sketch_alloc<A>::hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) {
46 HllUtil<A>::checkLgK(lg_config_k);
47 if (start_full_size) {
48 sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator);
49 } else {
50 typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
51 sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST, allocator);
52 }
53}
54
55template<typename A>
56hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is, const A& allocator) {
57 HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is, allocator);
58 return hll_sketch_alloc<A>(impl);
59}
60
61template<typename A>
62hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
63 HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len, allocator);
64 return hll_sketch_alloc<A>(impl);
65}
66
67template<typename A>
68hll_sketch_alloc<A>::~hll_sketch_alloc() {
69 if (sketch_impl != nullptr) {
70 sketch_impl->get_deleter()(sketch_impl);
71 }
72}
73
74template<typename A>
75hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that) :
76 sketch_impl(that.sketch_impl->copy())
77{}
78
79template<typename A>
80hll_sketch_alloc<A>::hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type) :
81 sketch_impl(that.sketch_impl->copyAs(tgt_type))
82{}
83
84template<typename A>
85hll_sketch_alloc<A>::hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept :
86 sketch_impl(nullptr)
87{
88 std::swap(sketch_impl, that.sketch_impl);
89}
90
91template<typename A>
92hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
93 sketch_impl(that)
94{}
95
96template<typename A>
97hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
98 sketch_impl->get_deleter()(sketch_impl);
99 sketch_impl = other.sketch_impl->copy();
100 return *this;
101}
102
103template<typename A>
104hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
105 std::swap(sketch_impl, other.sketch_impl);
106 return *this;
107}
108
109template<typename A>
110void hll_sketch_alloc<A>::reset() {
111 // TODO: need to allow starting from a full-sized sketch
112 // (either here or in other implementation)
113 sketch_impl = sketch_impl->reset();
114}
115
116template<typename A>
117void hll_sketch_alloc<A>::update(const std::string& datum) {
118 if (datum.empty()) { return; }
119 HashState hashResult;
120 HllUtil<A>::hash(datum.c_str(), datum.length(), DEFAULT_SEED, hashResult);
121 coupon_update(HllUtil<A>::coupon(hashResult));
122}
123
124template<typename A>
125void hll_sketch_alloc<A>::update(uint64_t datum) {
126 // no sign extension with 64 bits so no need to cast to signed value
127 HashState hashResult;
128 HllUtil<A>::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult);
129 coupon_update(HllUtil<A>::coupon(hashResult));
130}
131
132template<typename A>
133void hll_sketch_alloc<A>::update(uint32_t datum) {
134 update(static_cast<int32_t>(datum));
135}
136
137template<typename A>
138void hll_sketch_alloc<A>::update(uint16_t datum) {
139 update(static_cast<int16_t>(datum));
140}
141
142template<typename A>
143void hll_sketch_alloc<A>::update(uint8_t datum) {
144 update(static_cast<int8_t>(datum));
145}
146
147template<typename A>
148void hll_sketch_alloc<A>::update(int64_t datum) {
149 HashState hashResult;
150 HllUtil<A>::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult);
151 coupon_update(HllUtil<A>::coupon(hashResult));
152}
153
154template<typename A>
155void hll_sketch_alloc<A>::update(int32_t datum) {
156 const int64_t val = static_cast<int64_t>(datum);
157 HashState hashResult;
158 HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
159 coupon_update(HllUtil<A>::coupon(hashResult));
160}
161
162template<typename A>
163void hll_sketch_alloc<A>::update(int16_t datum) {
164 const int64_t val = static_cast<int64_t>(datum);
165 HashState hashResult;
166 HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
167 coupon_update(HllUtil<A>::coupon(hashResult));
168}
169
170template<typename A>
171void hll_sketch_alloc<A>::update(int8_t datum) {
172 const int64_t val = static_cast<int64_t>(datum);
173 HashState hashResult;
174 HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
175 coupon_update(HllUtil<A>::coupon(hashResult));
176}
177
178template<typename A>
179void hll_sketch_alloc<A>::update(double datum) {
180 longDoubleUnion d;
181 d.doubleBytes = static_cast<double>(datum);
182 if (datum == 0.0) {
183 d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
184 } else if (std::isnan(d.doubleBytes)) {
185 d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
186 }
187 HashState hashResult;
188 HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
189 coupon_update(HllUtil<A>::coupon(hashResult));
190}
191
192template<typename A>
193void hll_sketch_alloc<A>::update(float datum) {
194 longDoubleUnion d;
195 d.doubleBytes = static_cast<double>(datum);
196 if (datum == 0.0) {
197 d.doubleBytes = 0.0; // canonicalize -0.0 to 0.0
198 } else if (std::isnan(d.doubleBytes)) {
199 d.longBytes = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
200 }
201 HashState hashResult;
202 HllUtil<A>::hash(&d, sizeof(double), DEFAULT_SEED, hashResult);
203 coupon_update(HllUtil<A>::coupon(hashResult));
204}
205
206template<typename A>
207void hll_sketch_alloc<A>::update(const void* data, size_t lengthBytes) {
208 if (data == nullptr) { return; }
209 HashState hashResult;
210 HllUtil<A>::hash(data, lengthBytes, DEFAULT_SEED, hashResult);
211 coupon_update(HllUtil<A>::coupon(hashResult));
212}
213
214template<typename A>
215void hll_sketch_alloc<A>::coupon_update(uint32_t coupon) {
216 if (coupon == hll_constants::EMPTY) { return; }
217 HllSketchImpl<A>* result = this->sketch_impl->couponUpdate(coupon);
218 if (result != this->sketch_impl) {
219 this->sketch_impl->get_deleter()(this->sketch_impl);
220 this->sketch_impl = result;
221 }
222}
223
224template<typename A>
225void hll_sketch_alloc<A>::serialize_compact(std::ostream& os) const {
226 return sketch_impl->serialize(os, true);
227}
228
229template<typename A>
230void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
231 return sketch_impl->serialize(os, false);
232}
233
234template<typename A>
235auto hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const -> vector_bytes {
236 return sketch_impl->serialize(true, header_size_bytes);
237}
238
239template<typename A>
240auto hll_sketch_alloc<A>::serialize_updatable() const -> vector_bytes {
241 return sketch_impl->serialize(false, 0);
242}
243
244template<typename A>
245string<A> hll_sketch_alloc<A>::to_string(const bool summary,
246 const bool detail,
247 const bool aux_detail,
248 const bool all) const {
249 // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
250 // The stream does not support passing an allocator instance, and alternatives are complicated.
251 std::stringstream os;
252 if (summary) {
253 os << "### HLL sketch summary:" << std::endl
254 << " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
255 << " Hll Target : " << type_as_string() << std::endl
256 << " Current Mode : " << mode_as_string() << std::endl
257 << " LB : " << get_lower_bound(1) << std::endl
258 << " Estimate : " << get_estimate() << std::endl
259 << " UB : " << get_upper_bound(1) << std::endl
260 << " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
261 if (get_current_mode() == HLL) {
262 HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
263 os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
264 << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
265 << " HipAccum : " << hllArray->getHipAccum() << std::endl
266 << " KxQ0 : " << hllArray->getKxQ0() << std::endl
267 << " KxQ1 : " << hllArray->getKxQ1() << std::endl;
268 if (get_target_type() == HLL_4) {
269 const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
270 os << " Aux table? : " << (hll4_ptr->getAuxHashMap() != nullptr ? "true" : "false") << std::endl;
271 }
272 } else {
273 os << " Coupon count : "
274 << std::to_string(((CouponList<A>*) sketch_impl)->getCouponCount()) << std::endl;
275 }
276 os << "### End HLL sketch summary" << std::endl;
277 }
278
279 if (detail) {
280 os << "### HLL sketch data detail:" << std::endl;
281 if (get_current_mode() == HLL) {
282 const HllArray<A>* hll_ptr = static_cast<const HllArray<A>*>(sketch_impl);
283 os << std::left << std::setw(10) << "Slot" << std::setw(6) << "Value" << std::endl;
284 auto it = hll_ptr->begin(all);
285 while (it != hll_ptr->end()) {
286 os << std::setw(10) << HllUtil<A>::getLow26(*it);
287 os << std::setw(6) << HllUtil<A>::getValue(*it);
288 os << std::endl;
289 ++it;
290 }
291 } else {
292 const CouponList<A>* list_ptr = static_cast<const CouponList<A>*>(sketch_impl);
293 os << std::left;
294 os << std::setw(10) << "Index";
295 os << std::setw(10) << "Key";
296 os << std::setw(10) << "Slot";
297 os << std::setw(6) << "Value";
298 os << std::endl;
299 auto it = list_ptr->begin(all);
300 int i = 0;
301 int mask = (1 << get_lg_config_k()) - 1;
302 while (it != list_ptr->end()) {
303 os << std::setw(10) << i;
304 os << std::setw(10) << HllUtil<A>::getLow26(*it);
305 os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
306 os << std::setw(6) << HllUtil<A>::getValue(*it);
307 os << std::endl;
308 ++it;
309 ++i;
310 }
311 }
312 os << "### End HLL sketch data detail" << std::endl;
313 }
314 if (aux_detail) {
315 if ((get_current_mode() == HLL) && (get_target_type() == HLL_4)) {
316 const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
317 const AuxHashMap<A>* aux_ptr = hll4_ptr->getAuxHashMap();
318 if (aux_ptr != nullptr) {
319 os << "### HLL sketch aux detail:" << std::endl;
320 os << std::left;
321 os << std::setw(10) << "Index";
322 os << std::setw(10) << "Key";
323 os << std::setw(10) << "Slot";
324 os << std::setw(6) << "Value";
325 os << std::endl;
326 auto it = aux_ptr->begin(all);
327 int i = 0;
328 int mask = (1 << get_lg_config_k()) - 1;
329 while (it != aux_ptr->end()) {
330 os << std::setw(10) << i;
331 os << std::setw(10) << HllUtil<A>::getLow26(*it);
332 os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
333 os << std::setw(6) << HllUtil<A>::getValue(*it);
334 os << std::endl;
335 ++it;
336 ++i;
337 }
338 os << "### End HLL sketch aux detail" << std::endl;
339 }
340 }
341 }
342
343 return string<A>(os.str().c_str(), sketch_impl->getAllocator());
344}
345
346template<typename A>
347double hll_sketch_alloc<A>::get_estimate() const {
348 return sketch_impl->getEstimate();
349}
350
351template<typename A>
352double hll_sketch_alloc<A>::get_composite_estimate() const {
353 return sketch_impl->getCompositeEstimate();
354}
355
356template<typename A>
357double hll_sketch_alloc<A>::get_lower_bound(uint8_t numStdDev) const {
358 return sketch_impl->getLowerBound(numStdDev);
359}
360
361template<typename A>
362double hll_sketch_alloc<A>::get_upper_bound(uint8_t numStdDev) const {
363 return sketch_impl->getUpperBound(numStdDev);
364}
365
366template<typename A>
367hll_mode hll_sketch_alloc<A>::get_current_mode() const {
368 return sketch_impl->getCurMode();
369}
370
371template<typename A>
372uint8_t hll_sketch_alloc<A>::get_lg_config_k() const {
373 return sketch_impl->getLgConfigK();
374}
375
376template<typename A>
377target_hll_type hll_sketch_alloc<A>::get_target_type() const {
378 return sketch_impl->getTgtHllType();
379}
380
381template<typename A>
382bool hll_sketch_alloc<A>::is_out_of_order_flag() const {
383 return sketch_impl->isOutOfOrderFlag();
384}
385
386template<typename A>
387bool hll_sketch_alloc<A>::is_estimation_mode() const {
388 return true;
389}
390
391template<typename A>
392uint32_t hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
393 return sketch_impl->getUpdatableSerializationBytes();
394}
395
396template<typename A>
397uint32_t hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
398 return sketch_impl->getCompactSerializationBytes();
399}
400
401template<typename A>
402bool hll_sketch_alloc<A>::is_compact() const {
403 return sketch_impl->isCompact();
404}
405
406template<typename A>
407bool hll_sketch_alloc<A>::is_empty() const {
408 return sketch_impl->isEmpty();
409}
410
411template<typename A>
412std::string hll_sketch_alloc<A>::type_as_string() const {
413 switch (sketch_impl->getTgtHllType()) {
414 case target_hll_type::HLL_4:
415 return std::string("HLL_4");
416 case target_hll_type::HLL_6:
417 return std::string("HLL_6");
418 case target_hll_type::HLL_8:
419 return std::string("HLL_8");
420 default:
421 throw std::runtime_error("Sketch state error: Invalid target_hll_type");
422 }
423}
424
425template<typename A>
426std::string hll_sketch_alloc<A>::mode_as_string() const {
427 switch (sketch_impl->getCurMode()) {
428 case LIST:
429 return std::string("LIST");
430 case SET:
431 return std::string("SET");
432 case HLL:
433 return std::string("HLL");
434 default:
435 throw std::runtime_error("Sketch state error: Invalid hll_mode");
436 }
437}
438
439template<typename A>
440uint32_t hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(uint8_t lg_config_k,
441 const target_hll_type tgtHllType) {
442 uint32_t arrBytes;
443 if (tgtHllType == target_hll_type::HLL_4) {
444 const uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[lg_config_k];
445 arrBytes = HllArray<A>::hll4ArrBytes(lg_config_k) + auxBytes;
446 } else if (tgtHllType == target_hll_type::HLL_6) {
447 arrBytes = HllArray<A>::hll6ArrBytes(lg_config_k);
448 } else { //HLL_8
449 arrBytes = HllArray<A>::hll8ArrBytes(lg_config_k);
450 }
451 return hll_constants::HLL_BYTE_ARR_START + arrBytes;
452}
453
454template<typename A>
455double hll_sketch_alloc<A>::get_rel_err(bool upperBound, bool unioned,
456 uint8_t lg_config_k, uint8_t numStdDev) {
457 return HllUtil<A>::getRelErr(upperBound, unioned, lg_config_k, numStdDev);
458}
459
460}
461
462#endif // _HLLSKETCH_INTERNAL_HPP_
DataSketches namespace.
Definition binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition hll.hpp:72