datasketches-cpp
HllArray.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _HLLARRAY_HPP_
21 #define _HLLARRAY_HPP_
22 
23 #include "HllSketchImpl.hpp"
24 #include "HllUtil.hpp"
25 
26 namespace datasketches {
27 
28 template<typename A>
29 class AuxHashMap;
30 
31 template<typename A>
32 class HllArray : public HllSketchImpl<A> {
33  public:
34  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
35 
36  HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
37  explicit HllArray(const HllArray& other, target_hll_type tgtHllType);
38 
39  static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
40  static HllArray* newHll(std::istream& is, const A& allocator);
41 
42  virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const;
43  virtual void serialize(std::ostream& os, bool compact) const;
44 
45  virtual ~HllArray() = default;
46  virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
47 
48  virtual HllArray* copy() const = 0;
49  virtual HllArray* copyAs(target_hll_type tgtHllType) const;
50 
51  virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) = 0;
52 
53  virtual double getEstimate() const;
54  virtual double getCompositeEstimate() const;
55  virtual double getLowerBound(uint8_t numStdDev) const;
56  virtual double getUpperBound(uint8_t numStdDev) const;
57 
58  inline uint8_t getCurMin() const;
59  inline uint32_t getNumAtCurMin() const;
60  inline double getHipAccum() const;
61 
62  virtual uint32_t getHllByteArrBytes() const = 0;
63 
64  virtual uint32_t getUpdatableSerializationBytes() const;
65  virtual uint32_t getCompactSerializationBytes() const;
66 
67  virtual bool isOutOfOrderFlag() const;
68  virtual bool isEmpty() const;
69  virtual bool isCompact() const;
70 
71  virtual void putOutOfOrderFlag(bool flag);
72 
73  inline double getKxQ0() const;
74  inline double getKxQ1() const;
75 
76  virtual uint32_t getMemDataStart() const;
77  virtual uint8_t getPreInts() const;
78 
79  void putCurMin(uint8_t curMin);
80  void putHipAccum(double hipAccum);
81  inline void putKxQ0(double kxq0);
82  inline void putKxQ1(double kxq1);
83  void putNumAtCurMin(uint32_t numAtCurMin);
84 
85  static uint32_t hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK);
86  static uint32_t hll4ArrBytes(uint8_t lgConfigK);
87  static uint32_t hll6ArrBytes(uint8_t lgConfigK);
88  static uint32_t hll8ArrBytes(uint8_t lgConfigK);
89 
90  virtual AuxHashMap<A>* getAuxHashMap() const;
91 
92  void setRebuildKxqCurminFlag(bool rebuild);
93  bool isRebuildKxqCurminFlag() const;
94  void check_rebuild_kxq_cur_min();
95 
96  class const_iterator;
97  virtual const_iterator begin(bool all = false) const;
98  virtual const_iterator end() const;
99 
100  virtual A getAllocator() const;
101 
102  const vector_bytes& getHllArray() const;
103 
104  protected:
105  void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
106  double getHllBitMapEstimate() const;
107  double getHllRawEstimate() const;
108 
109  double hipAccum_;
110  double kxq0_;
111  double kxq1_;
112  vector_bytes hllByteArr_; //init by sub-classes
113  uint8_t curMin_; //always zero for Hll6 and Hll8, only tracked by Hll4Array
114  uint32_t numAtCurMin_; //interpreted as num zeros when curMin == 0
115  bool oooFlag_; //Out-Of-Order Flag
116  bool rebuild_kxq_curmin_; // flag to recompute
117 
118  friend class HllSketchImplFactory<A>;
119 };
120 
121 template<typename A>
122 class HllArray<A>::const_iterator {
123 public:
124  using iterator_category = std::input_iterator_tag;
125  using value_type = uint32_t;
126  using difference_type = void;
127  using pointer = uint32_t*;
128  using reference = uint32_t;
129 
130  const_iterator(const uint8_t* array, uint32_t array_slze, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
131  const_iterator& operator++();
132  bool operator!=(const const_iterator& other) const;
133  reference operator*() const;
134 private:
135  const uint8_t* array_;
136  uint32_t array_size_;
137  uint32_t index_;
138  target_hll_type hll_type_;
139  const AuxHashMap<A>* exceptions_;
140  uint8_t offset_;
141  bool all_;
142  uint8_t value_; // cached value to avoid computing in operator++ and in operator*()
143  static inline uint8_t get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
144 };
145 
146 }
147 
148 #endif /* _HLLARRAY_HPP_ */
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72