datasketches-cpp
Hll4Array-internal.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _HLL4ARRAY_INTERNAL_HPP_
21 #define _HLL4ARRAY_INTERNAL_HPP_
22 
23 #include "Hll4Array.hpp"
24 
25 #include <cstring>
26 #include <memory>
27 #include <stdexcept>
28 #include <string>
29 
30 namespace datasketches {
31 
32 template<typename A>
33 Hll4Array<A>::Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator):
34 HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
35 auxHashMap_(nullptr)
36 {
37  const uint32_t numBytes = this->hll4ArrBytes(lgConfigK);
38  this->hllByteArr_.resize(numBytes, 0);
39 }
40 
41 template<typename A>
42 Hll4Array<A>::Hll4Array(const Hll4Array<A>& that) :
43  HllArray<A>(that)
44 {
45  // can determine hllByteArr size in parent class, no need to allocate here
46  // but parent class doesn't handle the auxHashMap
47  if (that.auxHashMap_ != nullptr) {
48  auxHashMap_ = that.auxHashMap_->copy();
49  } else {
50  auxHashMap_ = nullptr;
51  }
52 }
53 
54 template<typename A>
55 Hll4Array<A>::Hll4Array(const HllArray<A>& other) :
56  HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_4, other.isStartFullSize(), other.getAllocator()),
57  auxHashMap_(nullptr)
58 {
59  const int numBytes = this->hll4ArrBytes(this->lgConfigK_);
60  this->hllByteArr_.resize(numBytes, 0);
61  this->oooFlag_ = other.isOutOfOrderFlag();
62 
63  for (const auto coupon : other) { // all = false, so skip empty values
64  internalCouponUpdate(coupon); // updates KxQ registers
65  }
66  this->hipAccum_ = other.getHipAccum();
67  this->rebuild_kxq_curmin_ = false;
68 }
69 
70 template<typename A>
71 Hll4Array<A>::~Hll4Array() {
72  // hllByteArr deleted in parent
73  if (auxHashMap_ != nullptr) {
74  AuxHashMap<A>::make_deleter()(auxHashMap_);
75  }
76 }
77 
78 template<typename A>
79 std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
80  return [](HllSketchImpl<A>* ptr) {
81  Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
82  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
83  Hll4Alloc hll4Alloc(hll->getAllocator());
84  hll->~Hll4Array();
85  hll4Alloc.deallocate(hll, 1);
86  };
87 }
88 
89 template<typename A>
90 Hll4Array<A>* Hll4Array<A>::copy() const {
91  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
92  Hll4Alloc hll4Alloc(this->getAllocator());
93  return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
94 }
95 
96 template<typename A>
97 uint32_t Hll4Array<A>::getUpdatableSerializationBytes() const {
98  AuxHashMap<A>* auxHashMap = getAuxHashMap();
99  uint32_t auxBytes;
100  if (auxHashMap == nullptr) {
101  auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
102  } else {
103  auxBytes = 4 << auxHashMap->getLgAuxArrInts();
104  }
105  return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxBytes;
106 }
107 
108 template<typename A>
109 uint32_t Hll4Array<A>::getHllByteArrBytes() const {
110  return this->hll4ArrBytes(this->lgConfigK_);
111 }
112 
113 template<typename A>
114 AuxHashMap<A>* Hll4Array<A>::getAuxHashMap() const {
115  return auxHashMap_;
116 }
117 
118 template<typename A>
119 void Hll4Array<A>::putAuxHashMap(AuxHashMap<A>* auxHashMap) {
120  this->auxHashMap_ = auxHashMap;
121 }
122 
123 template<typename A>
124 uint8_t Hll4Array<A>::getSlot(uint32_t slotNo) const {
125  const uint8_t byte = this->hllByteArr_[slotNo >> 1];
126  if ((slotNo & 1) > 0) { // odd?
127  return byte >> 4;
128  }
129  return byte & hll_constants::loNibbleMask;
130 }
131 
132 template<typename A>
133 uint8_t Hll4Array<A>::adjustRawValue(uint32_t slot, uint8_t value) const {
134  if (value != hll_constants::AUX_TOKEN) return value + this->curMin_;
135  return auxHashMap_->mustFindValueFor(slot);
136 }
137 
138 template<typename A>
139 HllSketchImpl<A>* Hll4Array<A>::couponUpdate(uint32_t coupon) {
140  internalCouponUpdate(coupon);
141  return this;
142 }
143 
144 template<typename A>
145 void Hll4Array<A>::internalCouponUpdate(uint32_t coupon) {
146  const uint8_t newValue = HllUtil<A>::getValue(coupon);
147  if (newValue <= this->curMin_) {
148  return; // quick rejection, but only works for large N
149  }
150  const uint32_t configKmask = (1 << this->lgConfigK_) - 1;
151  const uint32_t slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
152  internalHll4Update(slotNo, newValue);
153 }
154 
155 template<typename A>
156 void Hll4Array<A>::putSlot(uint32_t slotNo, uint8_t newValue) {
157  const uint32_t byteno = slotNo >> 1;
158  const uint8_t oldValue = this->hllByteArr_[byteno];
159  if ((slotNo & 1) == 0) { // set low nibble
160  this->hllByteArr_[byteno]
161  = ((oldValue & hll_constants::hiNibbleMask) | (newValue & hll_constants::loNibbleMask));
162  } else { // set high nibble
163  this->hllByteArr_[byteno]
164  = ((oldValue & hll_constants::loNibbleMask) | ((newValue << 4) & hll_constants::hiNibbleMask));
165  }
166 }
167 
168 //In C: two-registers.c Line 836 in "hhb_abstract_set_slot_if_new_value_bigger" non-sparse
169 template<typename A>
170 void Hll4Array<A>::internalHll4Update(uint32_t slotNo, uint8_t newVal) {
171 
172  const uint8_t rawStoredOldValue = getSlot(slotNo); // could be a 0
173  // this is provably a LB:
174  const uint8_t lbOnOldValue = rawStoredOldValue + this->curMin_; // lower bound, could be 0
175 
176  if (newVal > lbOnOldValue) { // 842
177  // Note: if an AUX_TOKEN exists, then auxHashMap must already exist
178  // 846: rawStoredOldValue == AUX_TOKEN
179  const uint8_t actualOldValue = (rawStoredOldValue < hll_constants::AUX_TOKEN)
180  ? (lbOnOldValue) : (auxHashMap_->mustFindValueFor(slotNo));
181 
182  if (newVal > actualOldValue) { // 848: actualOldValue could still be 0; newValue > 0
183  // we know that the array will change, but we haven't actually updated yet
184  this->hipAndKxQIncrementalUpdate(actualOldValue, newVal);
185 
186  // newVal >= curMin
187 
188  const uint8_t shiftedNewValue = newVal - this->curMin_; // 874
189  // redundant since we know newVal >= curMin,
190  // and lgConfigK bounds do not allow overflowing an int
191  //assert(shiftedNewValue >= 0);
192 
193  if (rawStoredOldValue == hll_constants::AUX_TOKEN) { // 879
194  // Given that we have an AUX_TOKEN, there are 4 cases for how to
195  // actually modify the data structure
196 
197  if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 1: 881
198  // the byte array already contains aux token
199  // This is the case where old and new values are both exceptions.
200  // The 4-bit array already is AUX_TOKEN, only need to update auxHashMap
201  auxHashMap_->mustReplace(slotNo, newVal);
202  }
203  else { // case 2: 885
204  // This is the hypothetical case where the old value is an exception and the new one is not,
205  // which is impossible given that curMin has not changed here and newVal > oldValue
206  }
207  } else { // rawStoredOldValue != AUX_TOKEN
208  if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 3: 892
209  // This is the case where the old value is not an exception and the new value is.
210  // The AUX_TOKEN must be stored in the 4-bit array and the new value
211  // added to the exception table
212  putSlot(slotNo, hll_constants::AUX_TOKEN);
213  if (auxHashMap_ == nullptr) {
214  auxHashMap_ = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_],
215  this->lgConfigK_, this->getAllocator());
216  }
217  auxHashMap_->mustAdd(slotNo, newVal);
218  }
219  else { // case 4: 897
220  // This is the case where neither the old value nor the new value is an exception.
221  // We just overwrite the 4-bit array with the shifted new value.
222  putSlot(slotNo, shiftedNewValue);
223  }
224  }
225 
226  // we just increased a pair value, so it might be time to change curMin
227  if (actualOldValue == this->curMin_) { // 908
228  --(this->numAtCurMin_);
229  while (this->numAtCurMin_ == 0) {
230  shiftToBiggerCurMin(); // increases curMin by 1, builds a new aux table
231  // shifts values in 4-bit table and recounts curMin
232  }
233  }
234  } // end newVal <= actualOldValue
235  } // end newValue <= lbOnOldValue -> return, no need to update array
236 }
237 
238 // This scheme only works with two double registers (2 kxq values).
239 // HipAccum, kxq0 and kxq1 remain untouched.
240 // This changes curMin, numAtCurMin, hllByteArr and auxMap.
241 // Entering this routine assumes that all slots have valid values > 0 and <= 15.
242 // An AuxHashMap must exist if any values in the current hllByteArray are already 15.
243 // In C: again-two-registers.c Lines 710 "hhb_shift_to_bigger_curmin"
244 template<typename A>
245 void Hll4Array<A>::shiftToBiggerCurMin() {
246  const uint8_t newCurMin = this->curMin_ + 1;
247  const uint32_t configK = 1 << this->lgConfigK_;
248  const uint32_t configKmask = configK - 1;
249 
250  uint32_t numAtNewCurMin = 0;
251  uint32_t numAuxTokens = 0;
252 
253  // Walk through the slots of 4-bit array decrementing stored values by one unless it
254  // equals AUX_TOKEN, where it is left alone but counted to be checked later.
255  // If oldStoredValue is 0 it is an error.
256  // If the decremented value is 0, we increment numAtNewCurMin.
257  // Because getNibble is masked to 4 bits oldStoredValue can never be > 15 or negative
258  for (uint32_t i = 0; i < configK; i++) { //724
259  uint8_t oldStoredValue = getSlot(i);
260  if (oldStoredValue == 0) {
261  throw std::runtime_error("Array slots cannot be 0 at this point.");
262  }
263  if (oldStoredValue < hll_constants::AUX_TOKEN) {
264  putSlot(i, --oldStoredValue);
265  if (oldStoredValue == 0) { numAtNewCurMin++; }
266  } else { //oldStoredValue == AUX_TOKEN
267  numAuxTokens++;
268  if (auxHashMap_ == nullptr) {
269  throw std::logic_error("auxHashMap cannot be null at this point");
270  }
271  }
272  }
273 
274  // If old AuxHashMap exists, walk through it updating some slots and build a new AuxHashMap
275  // if needed.
276  AuxHashMap<A>* newAuxMap = nullptr;
277  if (auxHashMap_ != nullptr) {
278  uint32_t slotNum;
279  uint8_t oldActualVal;
280  uint8_t newShiftedVal;
281 
282  for (const auto coupon: *auxHashMap_) {
283  slotNum = HllUtil<A>::getLow26(coupon) & configKmask;
284  oldActualVal = HllUtil<A>::getValue(coupon);
285  if (oldActualVal < newCurMin) {
286  throw std::logic_error("oldActualVal < newCurMin when incrementing curMin");
287  }
288  newShiftedVal = oldActualVal - newCurMin;
289 
290  if (getSlot(slotNum) != hll_constants::AUX_TOKEN) {
291  throw std::logic_error("getSlot(slotNum) != AUX_TOKEN for item in auxiliary hash map");
292  }
293  // Array slot != AUX_TOKEN at getSlot(slotNum);
294  if (newShiftedVal < hll_constants::AUX_TOKEN) { // 756
295  if (newShiftedVal != 14) {
296  throw std::logic_error("newShiftedVal != 14 for item in old auxHashMap despite curMin increment");
297  }
298  // The former exception value isn't one anymore, so it stays out of new AuxHashMap.
299  // Correct the AUX_TOKEN value in the HLL array to the newShiftedVal (14).
300  putSlot(slotNum, newShiftedVal);
301  numAuxTokens--;
302  } else { //newShiftedVal >= AUX_TOKEN
303  // the former exception remains an exception, so must be added to the newAuxMap
304  if (newAuxMap == nullptr) {
305  newAuxMap = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_],
306  this->lgConfigK_, this->getAllocator());
307  }
308  newAuxMap->mustAdd(slotNum, oldActualVal);
309  }
310  } //end scan of oldAuxMap
311  } //end if (auxHashMap != null)
312  else { // oldAuxMap == null
313  if (numAuxTokens != 0) {
314  throw std::logic_error("No auxiliary hash map, but numAuxTokens != 0");
315  }
316  }
317 
318  if (newAuxMap != nullptr) {
319  if (newAuxMap->getAuxCount() != numAuxTokens) {
320  throw std::runtime_error("Inconsistent counts: auxCount: " + std::to_string(newAuxMap->getAuxCount())
321  + ", HLL tokesn: " + std::to_string(numAuxTokens));
322  }
323  }
324 
325  if (auxHashMap_ != nullptr) {
326  AuxHashMap<A>::make_deleter()(auxHashMap_);
327  }
328  auxHashMap_ = newAuxMap;
329 
330  this->curMin_ = newCurMin;
331  this->numAtCurMin_ = numAtNewCurMin;
332 }
333 
334 template<typename A>
335 typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
336  return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 0, this->tgtHllType_,
337  auxHashMap_, this->curMin_, all);
338 }
339 
340 template<typename A>
341 typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
342  return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 1 << this->lgConfigK_,
343  this->tgtHllType_, auxHashMap_, this->curMin_, false);
344 }
345 
346 }
347 
348 #endif // _HLL4ARRAY_INTERNAL_HPP_
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72
@ HLL_4
4 bits per entry (most compact, size may vary)
Definition: hll.hpp:73