datasketches-cpp
HllUnion-internal.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _HLLUNION_INTERNAL_HPP_
21 #define _HLLUNION_INTERNAL_HPP_
22 
23 #include "hll.hpp"
24 
25 #include "HllSketchImpl.hpp"
26 #include "HllArray.hpp"
27 #include "HllUtil.hpp"
28 
29 #include <stdexcept>
30 #include <string>
31 
32 namespace datasketches {
33 
34 template<typename A>
35 hll_union_alloc<A>::hll_union_alloc(uint8_t lg_max_k, const A& allocator):
36  lg_max_k_(HllUtil<A>::checkLgK(lg_max_k)),
37  gadget_(lg_max_k, target_hll_type::HLL_8, false, allocator)
38 {}
39 
40 template<typename A>
41 hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type) const {
42  return hll_sketch_alloc<A>(gadget_, target_type);
43 }
44 
45 template<typename A>
46 void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
47  if (sketch.is_empty()) return;
48  union_impl(sketch, lg_max_k_);
49 }
50 
51 template<typename A>
52 void hll_union_alloc<A>::update(hll_sketch_alloc<A>&& sketch) {
53  if (sketch.is_empty()) return;
54  if (gadget_.is_empty() && sketch.get_target_type() == HLL_8 && sketch.get_lg_config_k() <= lg_max_k_) {
55  if (sketch.get_current_mode() == HLL || sketch.get_lg_config_k() == lg_max_k_) {
56  gadget_ = std::move(sketch);
57  }
58  }
59  union_impl(sketch, lg_max_k_);
60 }
61 
62 template<typename A>
63 void hll_union_alloc<A>::update(const std::string& datum) {
64  gadget_.update(datum);
65 }
66 
67 template<typename A>
68 void hll_union_alloc<A>::update(uint64_t datum) {
69  gadget_.update(datum);
70 }
71 
72 template<typename A>
73 void hll_union_alloc<A>::update(uint32_t datum) {
74  gadget_.update(datum);
75 }
76 
77 template<typename A>
78 void hll_union_alloc<A>::update(uint16_t datum) {
79  gadget_.update(datum);
80 }
81 
82 template<typename A>
83 void hll_union_alloc<A>::update(uint8_t datum) {
84  gadget_.update(datum);
85 }
86 
87 template<typename A>
88 void hll_union_alloc<A>::update(int64_t datum) {
89  gadget_.update(datum);
90 }
91 
92 template<typename A>
93 void hll_union_alloc<A>::update(int32_t datum) {
94  gadget_.update(datum);
95 }
96 
97 template<typename A>
98 void hll_union_alloc<A>::update(int16_t datum) {
99  gadget_.update(datum);
100 }
101 
102 template<typename A>
103 void hll_union_alloc<A>::update(int8_t datum) {
104  gadget_.update(datum);
105 }
106 
107 template<typename A>
108 void hll_union_alloc<A>::update(double datum) {
109  gadget_.update(datum);
110 }
111 
112 template<typename A>
113 void hll_union_alloc<A>::update(float datum) {
114  gadget_.update(datum);
115 }
116 
117 template<typename A>
118 void hll_union_alloc<A>::update(const void* data, size_t length_bytes) {
119  gadget_.update(data, length_bytes);
120 }
121 
122 template<typename A>
123 void hll_union_alloc<A>::coupon_update(uint32_t coupon) {
124  if (coupon == HllUtil<A>::EMPTY) { return; }
125  HllSketchImpl<A>* result = gadget_.sketch_impl->coupon_update(coupon);
126  if (result != gadget_.sketch_impl) {
127  if (gadget_.sketch_impl != nullptr) { gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); }
128  gadget_.sketch_impl = result;
129  }
130 }
131 
132 template<typename A>
134  if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
135  static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
136  return gadget_.get_estimate();
137 }
138 
139 template<typename A>
141  if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
142  static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
143  return gadget_.get_composite_estimate();
144 }
145 
146 template<typename A>
147 double hll_union_alloc<A>::get_lower_bound(uint8_t num_std_dev) const {
148  if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
149  static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
150  return gadget_.get_lower_bound(num_std_dev);
151 }
152 
153 template<typename A>
154 double hll_union_alloc<A>::get_upper_bound(uint8_t num_std_dev) const {
155  if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
156  static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
157  return gadget_.get_upper_bound(num_std_dev);
158 }
159 
160 template<typename A>
162  return gadget_.get_lg_config_k();
163 }
164 
165 template<typename A>
167  gadget_.reset();
168 }
169 
170 template<typename A>
172  return gadget_.is_empty();
173 }
174 
175 template<typename A>
177  return gadget_.is_out_of_order_flag();
178 }
179 
180 template<typename A>
181 hll_mode hll_union_alloc<A>::get_current_mode() const {
182  return gadget_.get_current_mode();
183 }
184 
185 template<typename A>
186 bool hll_union_alloc<A>::is_estimation_mode() const {
187  return gadget_.is_estimation_mode();
188 }
189 
190 template<typename A>
192  return target_hll_type::HLL_8;
193 }
194 
195 template<typename A>
196 double hll_union_alloc<A>::get_rel_err(bool upper_bound, bool unioned,
197  uint8_t lg_config_k, uint8_t num_std_dev) {
198  return HllUtil<A>::getRelErr(upper_bound, unioned, lg_config_k, num_std_dev);
199 }
200 
201 template<typename A>
203  if (src_impl->getCurMode() != HLL) {
204  throw std::logic_error("Attempt to downsample non-HLL sketch");
205  }
206  const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
207  const int src_lg_k = src->getLgConfigK();
208  if (src_lg_k <= tgt_lg_k) {
209  return src->copyAs(HLL_8);
210  }
211  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
212  Hll8Array<A>* tgtHllArr = new (hll8Alloc(src->getAllocator()).allocate(1)) Hll8Array<A>(tgt_lg_k, false, src->getAllocator());
213  tgtHllArr->mergeHll(*src);
214  //both of these are required for isomorphism
215  tgtHllArr->putHipAccum(src->getHipAccum());
216  tgtHllArr->putOutOfOrderFlag(src->isOutOfOrderFlag());
217  return tgtHllArr;
218 }
219 
220 template<typename A>
221 inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon) {
222  HllSketchImpl<A>* result = impl->couponUpdate(coupon);
223  if (result != impl) {
224  impl->get_deleter()(impl);
225  }
226  return result;
227 }
228 
229 template<typename A>
230 void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k) {
231  const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
232  HllSketchImpl<A>* dst_impl = gadget_.sketch_impl; //default
233  if (src_impl->getCurMode() == LIST || src_impl->getCurMode() == SET) {
234  if (dst_impl->isEmpty() && src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
235  dst_impl = src_impl->copyAs(HLL_8);
236  gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
237  } else {
238  const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
239  for (auto coupon: *src) {
240  dst_impl = leak_free_coupon_update(dst_impl, coupon); //assignment required
241  }
242  }
243  } else if (!dst_impl->isEmpty()) { // src is HLL
244  if (dst_impl->getCurMode() == LIST || dst_impl->getCurMode() == SET) {
245  // swap so that src is LIST or SET, tgt is HLL
246  // use lg_max_k because LIST has effective K of 2^26
247  const CouponList<A>* src = static_cast<const CouponList<A>*>(dst_impl);
248  dst_impl = copy_or_downsample(src_impl, lg_max_k);
249  static_cast<Hll8Array<A>*>(dst_impl)->mergeList(*src);
250  gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
251  } else { // gadget is HLL
252  if (src_impl->getLgConfigK() < dst_impl->getLgConfigK()) {
253  dst_impl = copy_or_downsample(dst_impl, sketch.get_lg_config_k());
254  gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
255  }
256  const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
257  static_cast<Hll8Array<A>*>(dst_impl)->mergeHll(*src);
258  dst_impl->putOutOfOrderFlag(true);
259  static_cast<Hll8Array<A>*>(dst_impl)->putHipAccum(0);
260  }
261  } else { // src is HLL, gadget is empty
262  dst_impl = copy_or_downsample(src_impl, lg_max_k);
263  gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
264  }
265  gadget_.sketch_impl = dst_impl; // gadget replaced
266 }
267 
268 }
269 
270 #endif // _HLLUNION_INTERNAL_HPP_
This is a high performance implementation of Phillipe Flajolet's HLL sketch but with significantly im...
Definition: HllSketchImpl.hpp:31
This performs union operations for HLL sketches.
Definition: hll.hpp:446
target_hll_type get_target_type() const
Returns the union's target HLL mode (from target_hll_type).
Definition: HllUnion-internal.hpp:191
double get_composite_estimate() const
This is less accurate than the get_estimate() method and is automatically used when the union has gon...
Definition: HllUnion-internal.hpp:140
double get_estimate() const
Returns the current cardinality estimate.
Definition: HllUnion-internal.hpp:133
void update(const hll_sketch_alloc< A > &sketch)
Update this union operator with the given sketch.
Definition: HllUnion-internal.hpp:46
hll_sketch_alloc< A > get_result(target_hll_type tgt_type=HLL_4) const
Returns the result of this union operator with the specified target_hll_type.
Definition: HllUnion-internal.hpp:41
bool is_empty() const
Indicates if the union is currently empty.
Definition: HllUnion-internal.hpp:171
uint8_t get_lg_config_k() const
Returns union's configured lg_k value.
Definition: HllUnion-internal.hpp:161
double get_lower_bound(uint8_t num_std_dev) const
Returns the approximate lower error bound given the specified number of standard deviations.
Definition: HllUnion-internal.hpp:147
hll_union_alloc(uint8_t lg_max_k, const A &allocator=A())
Construct an hll_union operator with the given maximum log2 of k.
Definition: HllUnion-internal.hpp:35
void reset()
Resets the union to an empty state in coupon collection mode.
Definition: HllUnion-internal.hpp:166
static double get_rel_err(bool upper_bound, bool unioned, uint8_t lg_config_k, uint8_t num_std_dev)
Gets the current (approximate) Relative Error (RE) asymptotic values given several parameters.
Definition: HllUnion-internal.hpp:196
double get_upper_bound(uint8_t num_std_dev) const
Returns the approximate upper error bound given the specified number of standard deviations.
Definition: HllUnion-internal.hpp:154
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72
@ HLL_8
8 bits per entry (fastest, fixed size)
Definition: hll.hpp:75