datasketches-cpp
All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
Hll4Array-internal.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef _HLL4ARRAY_INTERNAL_HPP_
21#define _HLL4ARRAY_INTERNAL_HPP_
22
23#include "Hll4Array.hpp"
24
25#include <cstring>
26#include <memory>
27#include <stdexcept>
28#include <string>
29
30namespace datasketches {
31
32template<typename A>
33Hll4Array<A>::Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator):
34HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
35auxHashMap_(nullptr)
36{
37 const uint32_t numBytes = this->hll4ArrBytes(lgConfigK);
38 this->hllByteArr_.resize(numBytes, 0);
39}
40
41template<typename A>
42Hll4Array<A>::Hll4Array(const Hll4Array<A>& that) :
43 HllArray<A>(that)
44{
45 // can determine hllByteArr size in parent class, no need to allocate here
46 // but parent class doesn't handle the auxHashMap
47 if (that.auxHashMap_ != nullptr) {
48 auxHashMap_ = that.auxHashMap_->copy();
49 } else {
50 auxHashMap_ = nullptr;
51 }
52}
53
54template<typename A>
55Hll4Array<A>::Hll4Array(const HllArray<A>& other) :
56 HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_4, other.isStartFullSize(), other.getAllocator()),
57 auxHashMap_(nullptr)
58{
59 const int numBytes = this->hll4ArrBytes(this->lgConfigK_);
60 this->hllByteArr_.resize(numBytes, 0);
61 this->oooFlag_ = other.isOutOfOrderFlag();
62
63 for (const auto coupon : other) { // all = false, so skip empty values
64 internalCouponUpdate(coupon); // updates KxQ registers
65 }
66 this->hipAccum_ = other.getHipAccum();
67 this->rebuild_kxq_curmin_ = false;
68}
69
70template<typename A>
71Hll4Array<A>::~Hll4Array() {
72 // hllByteArr deleted in parent
73 if (auxHashMap_ != nullptr) {
74 AuxHashMap<A>::make_deleter()(auxHashMap_);
75 }
76}
77
78template<typename A>
79std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
80 return [](HllSketchImpl<A>* ptr) {
81 Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
82 using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
83 Hll4Alloc hll4Alloc(hll->getAllocator());
84 hll->~Hll4Array();
85 hll4Alloc.deallocate(hll, 1);
86 };
87}
88
89template<typename A>
90Hll4Array<A>* Hll4Array<A>::copy() const {
91 using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
92 Hll4Alloc hll4Alloc(this->getAllocator());
93 return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
94}
95
96template<typename A>
97uint32_t Hll4Array<A>::getUpdatableSerializationBytes() const {
98 AuxHashMap<A>* auxHashMap = getAuxHashMap();
99 uint32_t auxBytes;
100 if (auxHashMap == nullptr) {
101 auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
102 } else {
103 auxBytes = 4 << auxHashMap->getLgAuxArrInts();
104 }
105 return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxBytes;
106}
107
108template<typename A>
109uint32_t Hll4Array<A>::getHllByteArrBytes() const {
110 return this->hll4ArrBytes(this->lgConfigK_);
111}
112
113template<typename A>
114AuxHashMap<A>* Hll4Array<A>::getAuxHashMap() const {
115 return auxHashMap_;
116}
117
118template<typename A>
119void Hll4Array<A>::putAuxHashMap(AuxHashMap<A>* auxHashMap) {
120 this->auxHashMap_ = auxHashMap;
121}
122
123template<typename A>
124uint8_t Hll4Array<A>::getSlot(uint32_t slotNo) const {
125 const uint8_t byte = this->hllByteArr_[slotNo >> 1];
126 if ((slotNo & 1) > 0) { // odd?
127 return byte >> 4;
128 }
129 return byte & hll_constants::loNibbleMask;
130}
131
132template<typename A>
133uint8_t Hll4Array<A>::adjustRawValue(uint32_t slot, uint8_t value) const {
134 if (value != hll_constants::AUX_TOKEN) return value + this->curMin_;
135 return auxHashMap_->mustFindValueFor(slot);
136}
137
138template<typename A>
139HllSketchImpl<A>* Hll4Array<A>::couponUpdate(uint32_t coupon) {
140 internalCouponUpdate(coupon);
141 return this;
142}
143
144template<typename A>
145void Hll4Array<A>::internalCouponUpdate(uint32_t coupon) {
146 const uint8_t newValue = HllUtil<A>::getValue(coupon);
147 if (newValue <= this->curMin_) {
148 return; // quick rejection, but only works for large N
149 }
150 const uint32_t configKmask = (1 << this->lgConfigK_) - 1;
151 const uint32_t slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
152 internalHll4Update(slotNo, newValue);
153}
154
155template<typename A>
156void Hll4Array<A>::putSlot(uint32_t slotNo, uint8_t newValue) {
157 const uint32_t byteno = slotNo >> 1;
158 const uint8_t oldValue = this->hllByteArr_[byteno];
159 if ((slotNo & 1) == 0) { // set low nibble
160 this->hllByteArr_[byteno]
161 = ((oldValue & hll_constants::hiNibbleMask) | (newValue & hll_constants::loNibbleMask));
162 } else { // set high nibble
163 this->hllByteArr_[byteno]
164 = ((oldValue & hll_constants::loNibbleMask) | ((newValue << 4) & hll_constants::hiNibbleMask));
165 }
166}
167
168//In C: two-registers.c Line 836 in "hhb_abstract_set_slot_if_new_value_bigger" non-sparse
169template<typename A>
170void Hll4Array<A>::internalHll4Update(uint32_t slotNo, uint8_t newVal) {
171
172 const uint8_t rawStoredOldValue = getSlot(slotNo); // could be a 0
173 // this is provably a LB:
174 const uint8_t lbOnOldValue = rawStoredOldValue + this->curMin_; // lower bound, could be 0
175
176 if (newVal > lbOnOldValue) { // 842
177 // Note: if an AUX_TOKEN exists, then auxHashMap must already exist
178 // 846: rawStoredOldValue == AUX_TOKEN
179 const uint8_t actualOldValue = (rawStoredOldValue < hll_constants::AUX_TOKEN)
180 ? (lbOnOldValue) : (auxHashMap_->mustFindValueFor(slotNo));
181
182 if (newVal > actualOldValue) { // 848: actualOldValue could still be 0; newValue > 0
183 // we know that the array will change, but we haven't actually updated yet
184 this->hipAndKxQIncrementalUpdate(actualOldValue, newVal);
185
186 // newVal >= curMin
187
188 const uint8_t shiftedNewValue = newVal - this->curMin_; // 874
189 // redundant since we know newVal >= curMin,
190 // and lgConfigK bounds do not allow overflowing an int
191 //assert(shiftedNewValue >= 0);
192
193 if (rawStoredOldValue == hll_constants::AUX_TOKEN) { // 879
194 // Given that we have an AUX_TOKEN, there are 4 cases for how to
195 // actually modify the data structure
196
197 if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 1: 881
198 // the byte array already contains aux token
199 // This is the case where old and new values are both exceptions.
200 // The 4-bit array already is AUX_TOKEN, only need to update auxHashMap
201 auxHashMap_->mustReplace(slotNo, newVal);
202 }
203 else { // case 2: 885
204 // This is the hypothetical case where the old value is an exception and the new one is not,
205 // which is impossible given that curMin has not changed here and newVal > oldValue
206 }
207 } else { // rawStoredOldValue != AUX_TOKEN
208 if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 3: 892
209 // This is the case where the old value is not an exception and the new value is.
210 // The AUX_TOKEN must be stored in the 4-bit array and the new value
211 // added to the exception table
212 putSlot(slotNo, hll_constants::AUX_TOKEN);
213 if (auxHashMap_ == nullptr) {
214 auxHashMap_ = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_],
215 this->lgConfigK_, this->getAllocator());
216 }
217 auxHashMap_->mustAdd(slotNo, newVal);
218 }
219 else { // case 4: 897
220 // This is the case where neither the old value nor the new value is an exception.
221 // We just overwrite the 4-bit array with the shifted new value.
222 putSlot(slotNo, shiftedNewValue);
223 }
224 }
225
226 // we just increased a pair value, so it might be time to change curMin
227 if (actualOldValue == this->curMin_) { // 908
228 --(this->numAtCurMin_);
229 while (this->numAtCurMin_ == 0) {
230 shiftToBiggerCurMin(); // increases curMin by 1, builds a new aux table
231 // shifts values in 4-bit table and recounts curMin
232 }
233 }
234 } // end newVal <= actualOldValue
235 } // end newValue <= lbOnOldValue -> return, no need to update array
236}
237
238// This scheme only works with two double registers (2 kxq values).
239// HipAccum, kxq0 and kxq1 remain untouched.
240// This changes curMin, numAtCurMin, hllByteArr and auxMap.
241// Entering this routine assumes that all slots have valid values > 0 and <= 15.
242// An AuxHashMap must exist if any values in the current hllByteArray are already 15.
243// In C: again-two-registers.c Lines 710 "hhb_shift_to_bigger_curmin"
244template<typename A>
245void Hll4Array<A>::shiftToBiggerCurMin() {
246 const uint8_t newCurMin = this->curMin_ + 1;
247 const uint32_t configK = 1 << this->lgConfigK_;
248 const uint32_t configKmask = configK - 1;
249
250 uint32_t numAtNewCurMin = 0;
251 uint32_t numAuxTokens = 0;
252
253 // Walk through the slots of 4-bit array decrementing stored values by one unless it
254 // equals AUX_TOKEN, where it is left alone but counted to be checked later.
255 // If oldStoredValue is 0 it is an error.
256 // If the decremented value is 0, we increment numAtNewCurMin.
257 // Because getNibble is masked to 4 bits oldStoredValue can never be > 15 or negative
258 for (uint32_t i = 0; i < configK; i++) { //724
259 uint8_t oldStoredValue = getSlot(i);
260 if (oldStoredValue == 0) {
261 throw std::runtime_error("Array slots cannot be 0 at this point.");
262 }
263 if (oldStoredValue < hll_constants::AUX_TOKEN) {
264 putSlot(i, --oldStoredValue);
265 if (oldStoredValue == 0) { numAtNewCurMin++; }
266 } else { //oldStoredValue == AUX_TOKEN
267 numAuxTokens++;
268 if (auxHashMap_ == nullptr) {
269 throw std::logic_error("auxHashMap cannot be null at this point");
270 }
271 }
272 }
273
274 // If old AuxHashMap exists, walk through it updating some slots and build a new AuxHashMap
275 // if needed.
276 AuxHashMap<A>* newAuxMap = nullptr;
277 if (auxHashMap_ != nullptr) {
278 uint32_t slotNum;
279 uint8_t oldActualVal;
280 uint8_t newShiftedVal;
281
282 for (const auto coupon: *auxHashMap_) {
283 slotNum = HllUtil<A>::getLow26(coupon) & configKmask;
284 oldActualVal = HllUtil<A>::getValue(coupon);
285 if (oldActualVal < newCurMin) {
286 throw std::logic_error("oldActualVal < newCurMin when incrementing curMin");
287 }
288 newShiftedVal = oldActualVal - newCurMin;
289
290 if (getSlot(slotNum) != hll_constants::AUX_TOKEN) {
291 throw std::logic_error("getSlot(slotNum) != AUX_TOKEN for item in auxiliary hash map");
292 }
293 // Array slot != AUX_TOKEN at getSlot(slotNum);
294 if (newShiftedVal < hll_constants::AUX_TOKEN) { // 756
295 if (newShiftedVal != 14) {
296 throw std::logic_error("newShiftedVal != 14 for item in old auxHashMap despite curMin increment");
297 }
298 // The former exception value isn't one anymore, so it stays out of new AuxHashMap.
299 // Correct the AUX_TOKEN value in the HLL array to the newShiftedVal (14).
300 putSlot(slotNum, newShiftedVal);
301 numAuxTokens--;
302 } else { //newShiftedVal >= AUX_TOKEN
303 // the former exception remains an exception, so must be added to the newAuxMap
304 if (newAuxMap == nullptr) {
305 newAuxMap = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_],
306 this->lgConfigK_, this->getAllocator());
307 }
308 newAuxMap->mustAdd(slotNum, oldActualVal);
309 }
310 } //end scan of oldAuxMap
311 } //end if (auxHashMap != null)
312 else { // oldAuxMap == null
313 if (numAuxTokens != 0) {
314 throw std::logic_error("No auxiliary hash map, but numAuxTokens != 0");
315 }
316 }
317
318 if (newAuxMap != nullptr) {
319 if (newAuxMap->getAuxCount() != numAuxTokens) {
320 throw std::runtime_error("Inconsistent counts: auxCount: " + std::to_string(newAuxMap->getAuxCount())
321 + ", HLL tokesn: " + std::to_string(numAuxTokens));
322 }
323 }
324
325 if (auxHashMap_ != nullptr) {
326 AuxHashMap<A>::make_deleter()(auxHashMap_);
327 }
328 auxHashMap_ = newAuxMap;
329
330 this->curMin_ = newCurMin;
331 this->numAtCurMin_ = numAtNewCurMin;
332}
333
334template<typename A>
335typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
336 return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 0, this->tgtHllType_,
337 auxHashMap_, this->curMin_, all);
338}
339
340template<typename A>
341typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
342 return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 1 << this->lgConfigK_,
343 this->tgtHllType_, auxHashMap_, this->curMin_, false);
344}
345
346}
347
348#endif // _HLL4ARRAY_INTERNAL_HPP_
DataSketches namespace.
Definition binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition hll.hpp:72
@ HLL_4
4 bits per entry (most compact, size may vary)
Definition hll.hpp:73