datasketches-cpp
Loading...
Searching...
No Matches
hll.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef _HLL_HPP_
21#define _HLL_HPP_
22
23#include "common_defs.hpp"
24#include "HllUtil.hpp"
25
26#include <iostream>
27#include <memory>
28#include <string>
29#include <vector>
30
31namespace datasketches {
32
33// forward declarations
34template<typename A> class hll_sketch_alloc;
35template<typename A> class hll_union_alloc;
36
38using hll_sketch = hll_sketch_alloc<std::allocator<uint8_t>>;
41
77
111// forward declaration
112template<typename A> class HllSketchImpl;
113
114template<typename A = std::allocator<uint8_t> >
115class hll_sketch_alloc final {
116 public:
126 explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
127
132 hll_sketch_alloc(const hll_sketch_alloc<A>& that);
133
139 hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type);
140
145 hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept;
146
152 static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
153
160 static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
161
163 virtual ~hll_sketch_alloc();
164
170 hll_sketch_alloc& operator=(const hll_sketch_alloc<A>& other);
171
177 hll_sketch_alloc& operator=(hll_sketch_alloc<A>&& other);
178
183 void reset();
184
185 // This is a convenience alias for users
186 // The type returned by the following serialize method
187 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
188
195 vector_bytes serialize_compact(unsigned header_size_bytes = 0) const;
196
202 vector_bytes serialize_updatable() const;
203
209 void serialize_compact(std::ostream& os) const;
210
216 void serialize_updatable(std::ostream& os) const;
217
226 string<A> to_string(bool summary = true,
227 bool detail = false,
228 bool aux_detail = false,
229 bool all = false) const;
230
237 void update(const std::string& datum);
238
243 void update(uint64_t datum);
244
249 void update(uint32_t datum);
250
255 void update(uint16_t datum);
256
261 void update(uint8_t datum);
262
267 void update(int64_t datum);
268
273 void update(int32_t datum);
274
279 void update(int16_t datum);
280
285 void update(int8_t datum);
286
291 void update(double datum);
292
297 void update(float datum);
298
304 void update(const void* data, size_t length_bytes);
305
310 double get_estimate() const;
311
323 double get_composite_estimate() const;
324
331 double get_lower_bound(uint8_t num_std_dev) const;
332
339 double get_upper_bound(uint8_t num_std_dev) const;
340
345 uint8_t get_lg_config_k() const;
346
351 target_hll_type get_target_type() const;
352
357 bool is_compact() const;
358
363 bool is_empty() const;
364
369 uint32_t get_compact_serialization_bytes() const;
370
375 uint32_t get_updatable_serialization_bytes() const;
376
388 static uint32_t get_max_updatable_serialization_bytes(uint8_t lg_k, target_hll_type tgt_type);
389
400 static double get_rel_err(bool upper_bound, bool unioned,
401 uint8_t lg_config_k, uint8_t num_std_dev);
402
403 private:
404 explicit hll_sketch_alloc(HllSketchImpl<A>* that);
405
406 void coupon_update(uint32_t coupon);
407
408 std::string type_as_string() const;
409 std::string mode_as_string() const;
410
411 hll_mode get_current_mode() const;
412 uint8_t get_serialization_version() const;
413 bool is_out_of_order_flag() const;
414 bool is_estimation_mode() const;
415
416 HllSketchImpl<A>* sketch_impl;
417 friend hll_union_alloc<A>;
418};
419
445template<typename A = std::allocator<uint8_t> >
447 public:
454 explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
455
460 double get_estimate() const;
461
473 double get_composite_estimate() const;
474
481 double get_lower_bound(uint8_t num_std_dev) const;
482
489 double get_upper_bound(uint8_t num_std_dev) const;
490
495 uint8_t get_lg_config_k() const;
496
502
507 bool is_empty() const;
508
513 void reset();
514
521 hll_sketch_alloc<A> get_result(target_hll_type tgt_type = HLL_4) const;
522
527 void update(const hll_sketch_alloc<A>& sketch);
528
533 void update(hll_sketch_alloc<A>&& sketch);
534
541 void update(const std::string& datum);
542
547 void update(uint64_t datum);
548
553 void update(uint32_t datum);
554
559 void update(uint16_t datum);
560
565 void update(uint8_t datum);
566
571 void update(int64_t datum);
572
577 void update(int32_t datum);
578
583 void update(int16_t datum);
584
589 void update(int8_t datum);
590
595 void update(double datum);
596
601 void update(float datum);
602
608 void update(const void* data, size_t length_bytes);
609
620 static double get_rel_err(bool upper_bound, bool unioned,
621 uint8_t lg_config_k, uint8_t num_std_dev);
622
623 private:
624
634 inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
635
636 static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, uint8_t tgt_lg_k);
637
638 void coupon_update(uint32_t coupon);
639
640 hll_mode get_current_mode() const;
641 bool is_out_of_order_flag() const;
642 bool is_estimation_mode() const;
643
644 // calls couponUpdate on sketch, freeing the old sketch upon changes in hll_mode
645 static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon);
646
647 uint8_t lg_max_k_;
648 hll_sketch_alloc<A> gadget_;
649};
650
651} // namespace datasketches
652
653#include "hll.private.hpp"
654
655#endif // _HLL_HPP_
This is a high performance implementation of Phillipe Flajolet's HLL sketch but with significantly im...
Definition HllSketchImpl.hpp:31
This performs union operations for HLL sketches.
Definition hll.hpp:446
target_hll_type get_target_type() const
Returns the union's target HLL mode (from target_hll_type).
Definition HllUnion-internal.hpp:191
double get_composite_estimate() const
This is less accurate than the get_estimate() method and is automatically used when the union has gon...
Definition HllUnion-internal.hpp:140
double get_estimate() const
Returns the current cardinality estimate.
Definition HllUnion-internal.hpp:133
void update(const hll_sketch_alloc< A > &sketch)
Update this union operator with the given sketch.
Definition HllUnion-internal.hpp:46
hll_sketch_alloc< A > get_result(target_hll_type tgt_type=HLL_4) const
Returns the result of this union operator with the specified target_hll_type.
Definition HllUnion-internal.hpp:41
bool is_empty() const
Indicates if the union is currently empty.
Definition HllUnion-internal.hpp:171
uint8_t get_lg_config_k() const
Returns union's configured lg_k value.
Definition HllUnion-internal.hpp:161
double get_lower_bound(uint8_t num_std_dev) const
Returns the approximate lower error bound given the specified number of standard deviations.
Definition HllUnion-internal.hpp:147
void reset()
Resets the union to an empty state in coupon collection mode.
Definition HllUnion-internal.hpp:166
static double get_rel_err(bool upper_bound, bool unioned, uint8_t lg_config_k, uint8_t num_std_dev)
Gets the current (approximate) Relative Error (RE) asymptotic values given several parameters.
Definition HllUnion-internal.hpp:196
double get_upper_bound(uint8_t num_std_dev) const
Returns the approximate upper error bound given the specified number of standard deviations.
Definition HllUnion-internal.hpp:154
DataSketches namespace.
Definition binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition hll.hpp:72
@ HLL_6
6 bits per entry (fixed size)
Definition hll.hpp:74
@ HLL_8
8 bits per entry (fastest, fixed size)
Definition hll.hpp:75
@ HLL_4
4 bits per entry (most compact, size may vary)
Definition hll.hpp:73
hll_sketch_alloc< std::allocator< uint8_t > > hll_sketch
HLL sketch alias with default allocator.
Definition hll.hpp:38