datasketches-cpp
Loading...
Searching...
No Matches
HllUnion-internal.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef _HLLUNION_INTERNAL_HPP_
21#define _HLLUNION_INTERNAL_HPP_
22
23#include "hll.hpp"
24
25#include "HllSketchImpl.hpp"
26#include "HllArray.hpp"
27#include "HllUtil.hpp"
28
29#include <stdexcept>
30#include <string>
31
32namespace datasketches {
33
34template<typename A>
35hll_union_alloc<A>::hll_union_alloc(uint8_t lg_max_k, const A& allocator):
36 lg_max_k_(HllUtil<A>::checkLgK(lg_max_k)),
37 gadget_(lg_max_k, target_hll_type::HLL_8, false, allocator)
38{}
39
40template<typename A>
41hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type) const {
42 return hll_sketch_alloc<A>(gadget_, target_type);
43}
44
45template<typename A>
46void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
47 if (sketch.is_empty()) { return; }
48 union_impl(sketch, lg_max_k_);
49}
50
51template<typename A>
52void hll_union_alloc<A>::update(hll_sketch_alloc<A>&& sketch) {
53 if (sketch.is_empty()) { return; }
54 if (gadget_.is_empty() && sketch.get_target_type() == HLL_8 && sketch.get_lg_config_k() <= lg_max_k_) {
55 if (sketch.get_current_mode() == HLL || sketch.get_lg_config_k() == lg_max_k_) {
56 gadget_ = std::move(sketch);
57 }
58 }
59 union_impl(sketch, lg_max_k_);
60}
61
62template<typename A>
63void hll_union_alloc<A>::update(const std::string& datum) {
64 gadget_.update(datum);
65}
66
67template<typename A>
68void hll_union_alloc<A>::update(uint64_t datum) {
69 gadget_.update(datum);
70}
71
72template<typename A>
73void hll_union_alloc<A>::update(uint32_t datum) {
74 gadget_.update(datum);
75}
76
77template<typename A>
78void hll_union_alloc<A>::update(uint16_t datum) {
79 gadget_.update(datum);
80}
81
82template<typename A>
83void hll_union_alloc<A>::update(uint8_t datum) {
84 gadget_.update(datum);
85}
86
87template<typename A>
88void hll_union_alloc<A>::update(int64_t datum) {
89 gadget_.update(datum);
90}
91
92template<typename A>
93void hll_union_alloc<A>::update(int32_t datum) {
94 gadget_.update(datum);
95}
96
97template<typename A>
98void hll_union_alloc<A>::update(int16_t datum) {
99 gadget_.update(datum);
100}
101
102template<typename A>
103void hll_union_alloc<A>::update(int8_t datum) {
104 gadget_.update(datum);
105}
106
107template<typename A>
108void hll_union_alloc<A>::update(double datum) {
109 gadget_.update(datum);
110}
111
112template<typename A>
113void hll_union_alloc<A>::update(float datum) {
114 gadget_.update(datum);
115}
116
117template<typename A>
118void hll_union_alloc<A>::update(const void* data, size_t length_bytes) {
119 gadget_.update(data, length_bytes);
120}
121
122template<typename A>
123void hll_union_alloc<A>::coupon_update(uint32_t coupon) {
124 if (coupon == HllUtil<A>::EMPTY) { return; }
125 HllSketchImpl<A>* result = gadget_.sketch_impl->coupon_update(coupon);
126 if (result != gadget_.sketch_impl) {
127 if (gadget_.sketch_impl != nullptr) { gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); }
128 gadget_.sketch_impl = result;
129 }
130}
131
132template<typename A>
134 if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL) {
135 static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
136 }
137 return gadget_.get_estimate();
138}
139
140template<typename A>
142 if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL) {
143 static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
144 }
145 return gadget_.get_composite_estimate();
146}
147
148template<typename A>
149double hll_union_alloc<A>::get_lower_bound(uint8_t num_std_dev) const {
150 if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL) {
151 static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
152 }
153 return gadget_.get_lower_bound(num_std_dev);
154}
155
156template<typename A>
157double hll_union_alloc<A>::get_upper_bound(uint8_t num_std_dev) const {
158 if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL) {
159 static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
160 }
161 return gadget_.get_upper_bound(num_std_dev);
162}
163
164template<typename A>
166 return gadget_.get_lg_config_k();
167}
168
169template<typename A>
171 gadget_.reset();
172}
173
174template<typename A>
176 return gadget_.is_empty();
177}
178
179template<typename A>
181 return gadget_.is_out_of_order_flag();
182}
183
184template<typename A>
185hll_mode hll_union_alloc<A>::get_current_mode() const {
186 return gadget_.get_current_mode();
187}
188
189template<typename A>
190bool hll_union_alloc<A>::is_estimation_mode() const {
191 return gadget_.is_estimation_mode();
192}
193
194template<typename A>
198
199template<typename A>
200double hll_union_alloc<A>::get_rel_err(bool upper_bound, bool unioned,
201 uint8_t lg_config_k, uint8_t num_std_dev) {
202 return HllUtil<A>::getRelErr(upper_bound, unioned, lg_config_k, num_std_dev);
203}
204
205template<typename A>
207 if (src_impl->getCurMode() != HLL) {
208 throw std::logic_error("Attempt to downsample non-HLL sketch");
209 }
210 const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
211 const int src_lg_k = src->getLgConfigK();
212 if (src_lg_k <= tgt_lg_k) {
213 return src->copyAs(HLL_8);
214 }
215 typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
216 Hll8Array<A>* tgtHllArr = new (hll8Alloc(src->getAllocator()).allocate(1)) Hll8Array<A>(tgt_lg_k, false, src->getAllocator());
217 tgtHllArr->mergeHll(*src);
218 //both of these are required for isomorphism
219 tgtHllArr->putHipAccum(src->getHipAccum());
220 tgtHllArr->putOutOfOrderFlag(src->isOutOfOrderFlag());
221 return tgtHllArr;
222}
223
224template<typename A>
225inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon) {
226 HllSketchImpl<A>* result = impl->couponUpdate(coupon);
227 if (result != impl) {
228 impl->get_deleter()(impl);
229 }
230 return result;
231}
232
233template<typename A>
234void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k) {
235 const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
236 HllSketchImpl<A>* dst_impl = gadget_.sketch_impl; //default
237 if (src_impl->getCurMode() == LIST || src_impl->getCurMode() == SET) {
238 if (dst_impl->isEmpty() && src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
239 dst_impl = src_impl->copyAs(HLL_8);
240 gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
241 } else {
242 const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
243 for (auto coupon: *src) {
244 dst_impl = leak_free_coupon_update(dst_impl, coupon); //assignment required
245 }
246 }
247 } else if (!dst_impl->isEmpty()) { // src is HLL
248 if (dst_impl->getCurMode() == LIST || dst_impl->getCurMode() == SET) {
249 // swap so that src is LIST or SET, tgt is HLL
250 // use lg_max_k because LIST has effective K of 2^26
251 const CouponList<A>* src = static_cast<const CouponList<A>*>(dst_impl);
252 dst_impl = copy_or_downsample(src_impl, lg_max_k);
253 static_cast<Hll8Array<A>*>(dst_impl)->mergeList(*src);
254 gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
255 } else { // gadget is HLL
256 if (src_impl->getLgConfigK() < dst_impl->getLgConfigK()) {
257 dst_impl = copy_or_downsample(dst_impl, sketch.get_lg_config_k());
258 gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
259 }
260 const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
261 static_cast<Hll8Array<A>*>(dst_impl)->mergeHll(*src);
262 dst_impl->putOutOfOrderFlag(true);
263 static_cast<Hll8Array<A>*>(dst_impl)->putHipAccum(0);
264 }
265 } else { // src is HLL, gadget is empty
266 dst_impl = copy_or_downsample(src_impl, lg_max_k);
267 gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
268 }
269 gadget_.sketch_impl = dst_impl; // gadget replaced
270}
271
272}
273
274#endif // _HLLUNION_INTERNAL_HPP_
This is a high performance implementation of Phillipe Flajolet's HLL sketch but with significantly im...
Definition HllSketchImpl.hpp:31
This performs union operations for HLL sketches.
Definition hll.hpp:446
target_hll_type get_target_type() const
Returns the union's target HLL mode (from target_hll_type).
Definition HllUnion-internal.hpp:195
double get_composite_estimate() const
This is less accurate than the get_estimate() method and is automatically used when the union has gon...
Definition HllUnion-internal.hpp:141
double get_estimate() const
Returns the current cardinality estimate.
Definition HllUnion-internal.hpp:133
void update(const hll_sketch_alloc< A > &sketch)
Update this union operator with the given sketch.
Definition HllUnion-internal.hpp:46
hll_sketch_alloc< A > get_result(target_hll_type tgt_type=HLL_4) const
Returns the result of this union operator with the specified target_hll_type.
Definition HllUnion-internal.hpp:41
bool is_empty() const
Indicates if the union is currently empty.
Definition HllUnion-internal.hpp:175
uint8_t get_lg_config_k() const
Returns union's configured lg_k value.
Definition HllUnion-internal.hpp:165
double get_lower_bound(uint8_t num_std_dev) const
Returns the approximate lower error bound given the specified number of standard deviations.
Definition HllUnion-internal.hpp:149
hll_union_alloc(uint8_t lg_max_k, const A &allocator=A())
Construct an hll_union operator with the given maximum log2 of k.
Definition HllUnion-internal.hpp:35
void reset()
Resets the union to an empty state in coupon collection mode.
Definition HllUnion-internal.hpp:170
static double get_rel_err(bool upper_bound, bool unioned, uint8_t lg_config_k, uint8_t num_std_dev)
Gets the current (approximate) Relative Error (RE) asymptotic values given several parameters.
Definition HllUnion-internal.hpp:200
double get_upper_bound(uint8_t num_std_dev) const
Returns the approximate upper error bound given the specified number of standard deviations.
Definition HllUnion-internal.hpp:157
DataSketches namespace.
Definition binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition hll.hpp:72
@ HLL_8
8 bits per entry (fastest, fixed size)
Definition hll.hpp:75