datasketches-cpp
hll.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _HLL_HPP_
21 #define _HLL_HPP_
22 
23 #include "common_defs.hpp"
24 #include "HllUtil.hpp"
25 
26 #include <iostream>
27 #include <memory>
28 #include <string>
29 #include <vector>
30 
31 namespace datasketches {
32 
33 // forward declarations
34 template<typename A> class hll_sketch_alloc;
35 template<typename A> class hll_union_alloc;
36 
38 using hll_sketch = hll_sketch_alloc<std::allocator<uint8_t>>;
41 
75  HLL_8
76 };
77 
111 // forward declaration
112 template<typename A> class HllSketchImpl;
113 
114 template<typename A = std::allocator<uint8_t> >
115 class hll_sketch_alloc final {
116  public:
126  explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
127 
132  hll_sketch_alloc(const hll_sketch_alloc<A>& that);
133 
139  hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type);
140 
145  hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept;
146 
152  static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
153 
160  static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
161 
163  virtual ~hll_sketch_alloc();
164 
170  hll_sketch_alloc& operator=(const hll_sketch_alloc<A>& other);
171 
177  hll_sketch_alloc& operator=(hll_sketch_alloc<A>&& other);
178 
183  void reset();
184 
185  // This is a convenience alias for users
186  // The type returned by the following serialize method
187  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
188 
195  vector_bytes serialize_compact(unsigned header_size_bytes = 0) const;
196 
202  vector_bytes serialize_updatable() const;
203 
209  void serialize_compact(std::ostream& os) const;
210 
216  void serialize_updatable(std::ostream& os) const;
217 
226  string<A> to_string(bool summary = true,
227  bool detail = false,
228  bool aux_detail = false,
229  bool all = false) const;
230 
237  void update(const std::string& datum);
238 
243  void update(uint64_t datum);
244 
249  void update(uint32_t datum);
250 
255  void update(uint16_t datum);
256 
261  void update(uint8_t datum);
262 
267  void update(int64_t datum);
268 
273  void update(int32_t datum);
274 
279  void update(int16_t datum);
280 
285  void update(int8_t datum);
286 
291  void update(double datum);
292 
297  void update(float datum);
298 
304  void update(const void* data, size_t length_bytes);
305 
310  double get_estimate() const;
311 
323  double get_composite_estimate() const;
324 
331  double get_lower_bound(uint8_t num_std_dev) const;
332 
339  double get_upper_bound(uint8_t num_std_dev) const;
340 
345  uint8_t get_lg_config_k() const;
346 
351  target_hll_type get_target_type() const;
352 
357  bool is_compact() const;
358 
363  bool is_empty() const;
364 
369  uint32_t get_compact_serialization_bytes() const;
370 
375  uint32_t get_updatable_serialization_bytes() const;
376 
388  static uint32_t get_max_updatable_serialization_bytes(uint8_t lg_k, target_hll_type tgt_type);
389 
400  static double get_rel_err(bool upper_bound, bool unioned,
401  uint8_t lg_config_k, uint8_t num_std_dev);
402 
403  private:
404  explicit hll_sketch_alloc(HllSketchImpl<A>* that);
405 
406  void coupon_update(uint32_t coupon);
407 
408  std::string type_as_string() const;
409  std::string mode_as_string() const;
410 
411  hll_mode get_current_mode() const;
412  uint8_t get_serialization_version() const;
413  bool is_out_of_order_flag() const;
414  bool is_estimation_mode() const;
415 
416  HllSketchImpl<A>* sketch_impl;
417  friend hll_union_alloc<A>;
418 };
419 
445 template<typename A = std::allocator<uint8_t> >
447  public:
454  explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
455 
460  double get_estimate() const;
461 
473  double get_composite_estimate() const;
474 
481  double get_lower_bound(uint8_t num_std_dev) const;
482 
489  double get_upper_bound(uint8_t num_std_dev) const;
490 
495  uint8_t get_lg_config_k() const;
496 
502 
507  bool is_empty() const;
508 
513  void reset();
514 
521  hll_sketch_alloc<A> get_result(target_hll_type tgt_type = HLL_4) const;
522 
527  void update(const hll_sketch_alloc<A>& sketch);
528 
533  void update(hll_sketch_alloc<A>&& sketch);
534 
541  void update(const std::string& datum);
542 
547  void update(uint64_t datum);
548 
553  void update(uint32_t datum);
554 
559  void update(uint16_t datum);
560 
565  void update(uint8_t datum);
566 
571  void update(int64_t datum);
572 
577  void update(int32_t datum);
578 
583  void update(int16_t datum);
584 
589  void update(int8_t datum);
590 
595  void update(double datum);
596 
601  void update(float datum);
602 
608  void update(const void* data, size_t length_bytes);
609 
620  static double get_rel_err(bool upper_bound, bool unioned,
621  uint8_t lg_config_k, uint8_t num_std_dev);
622 
623  private:
624 
634  inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
635 
636  static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, uint8_t tgt_lg_k);
637 
638  void coupon_update(uint32_t coupon);
639 
640  hll_mode get_current_mode() const;
641  bool is_out_of_order_flag() const;
642  bool is_estimation_mode() const;
643 
644  // calls couponUpdate on sketch, freeing the old sketch upon changes in hll_mode
645  static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon);
646 
647  uint8_t lg_max_k_;
648  hll_sketch_alloc<A> gadget_;
649 };
650 
651 } // namespace datasketches
652 
653 #include "hll.private.hpp"
654 
655 #endif // _HLL_HPP_
This is a high performance implementation of Phillipe Flajolet's HLL sketch but with significantly im...
Definition: HllSketchImpl.hpp:31
This performs union operations for HLL sketches.
Definition: hll.hpp:446
target_hll_type get_target_type() const
Returns the union's target HLL mode (from target_hll_type).
Definition: HllUnion-internal.hpp:191
double get_composite_estimate() const
This is less accurate than the get_estimate() method and is automatically used when the union has gon...
Definition: HllUnion-internal.hpp:140
double get_estimate() const
Returns the current cardinality estimate.
Definition: HllUnion-internal.hpp:133
void update(const hll_sketch_alloc< A > &sketch)
Update this union operator with the given sketch.
Definition: HllUnion-internal.hpp:46
hll_sketch_alloc< A > get_result(target_hll_type tgt_type=HLL_4) const
Returns the result of this union operator with the specified target_hll_type.
Definition: HllUnion-internal.hpp:41
bool is_empty() const
Indicates if the union is currently empty.
Definition: HllUnion-internal.hpp:171
uint8_t get_lg_config_k() const
Returns union's configured lg_k value.
Definition: HllUnion-internal.hpp:161
double get_lower_bound(uint8_t num_std_dev) const
Returns the approximate lower error bound given the specified number of standard deviations.
Definition: HllUnion-internal.hpp:147
hll_union_alloc(uint8_t lg_max_k, const A &allocator=A())
Construct an hll_union operator with the given maximum log2 of k.
Definition: HllUnion-internal.hpp:35
void reset()
Resets the union to an empty state in coupon collection mode.
Definition: HllUnion-internal.hpp:166
static double get_rel_err(bool upper_bound, bool unioned, uint8_t lg_config_k, uint8_t num_std_dev)
Gets the current (approximate) Relative Error (RE) asymptotic values given several parameters.
Definition: HllUnion-internal.hpp:196
double get_upper_bound(uint8_t num_std_dev) const
Returns the approximate upper error bound given the specified number of standard deviations.
Definition: HllUnion-internal.hpp:154
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72
@ HLL_6
6 bits per entry (fixed size)
Definition: hll.hpp:74
@ HLL_8
8 bits per entry (fastest, fixed size)
Definition: hll.hpp:75
@ HLL_4
4 bits per entry (most compact, size may vary)
Definition: hll.hpp:73
hll_sketch_alloc< std::allocator< uint8_t > > hll_sketch
HLL sketch alias with default allocator.
Definition: hll.hpp:38