datasketches-cpp
Hll8Array-internal.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _HLL8ARRAY_INTERNAL_HPP_
21 #define _HLL8ARRAY_INTERNAL_HPP_
22 
23 #include "Hll8Array.hpp"
24 
25 namespace datasketches {
26 
27 template<typename A>
28 Hll8Array<A>::Hll8Array(uint8_t lgConfigK, bool startFullSize, const A& allocator):
29 HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize, allocator)
30 {
31  const int numBytes = this->hll8ArrBytes(lgConfigK);
32  this->hllByteArr_.resize(numBytes, 0);
33 }
34 
35 template<typename A>
36 Hll8Array<A>::Hll8Array(const HllArray<A>& other):
37  HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_8, other.isStartFullSize(), other.getAllocator())
38 {
39  const int numBytes = this->hll8ArrBytes(this->lgConfigK_);
40  this->hllByteArr_.resize(numBytes, 0);
41  this->oooFlag_ = other.isOutOfOrderFlag();
42  uint32_t num_zeros = 1 << this->lgConfigK_;
43 
44  for (const auto coupon : other) { // all = false, so skip empty values
45  num_zeros--;
46  internalCouponUpdate(coupon); // updates KxQ registers
47  }
48 
49  this->numAtCurMin_ = num_zeros;
50  this->hipAccum_ = other.getHipAccum();
51  this->rebuild_kxq_curmin_ = false;
52 }
53 
54 template<typename A>
55 std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
56  return [](HllSketchImpl<A>* ptr) {
57  Hll8Array<A>* hll = static_cast<Hll8Array<A>*>(ptr);
58  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
59  Hll8Alloc hll8Alloc(hll->getAllocator());
60  hll->~Hll8Array();
61  hll8Alloc.deallocate(hll, 1);
62  };
63 }
64 
65 template<typename A>
66 Hll8Array<A>* Hll8Array<A>::copy() const {
67  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
68  Hll8Alloc hll8Alloc(this->getAllocator());
69  return new (hll8Alloc.allocate(1)) Hll8Array<A>(*this);
70 }
71 
72 template<typename A>
73 uint8_t Hll8Array<A>::getSlot(uint32_t slotNo) const {
74  return this->hllByteArr_[slotNo];
75 }
76 
77 template<typename A>
78 void Hll8Array<A>::putSlot(uint32_t slotNo, uint8_t value) {
79  this->hllByteArr_[slotNo] = value;
80 }
81 
82 template<typename A>
83 uint32_t Hll8Array<A>::getHllByteArrBytes() const {
84  return this->hll8ArrBytes(this->lgConfigK_);
85 }
86 
87 template<typename A>
88 HllSketchImpl<A>* Hll8Array<A>::couponUpdate(uint32_t coupon) {
89  internalCouponUpdate(coupon);
90  return this;
91 }
92 
93 template<typename A>
94 void Hll8Array<A>::internalCouponUpdate(uint32_t coupon) {
95  const uint32_t configKmask = (1 << this->lgConfigK_) - 1;
96  const uint32_t slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
97  const uint8_t newVal = HllUtil<A>::getValue(coupon);
98 
99  const uint8_t curVal = this->hllByteArr_[slotNo];
100  if (newVal > curVal) {
101  this->hllByteArr_[slotNo] = newVal;
102  this->hipAndKxQIncrementalUpdate(curVal, newVal);
103  this->numAtCurMin_ -= curVal == 0; // interpret numAtCurMin as num zeros
104  }
105 }
106 
107 template<typename A>
108 void Hll8Array<A>::mergeList(const CouponList<A>& src) {
109  for (const auto coupon: src) {
110  internalCouponUpdate(coupon);
111  }
112 }
113 
114 template<typename A>
115 void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
116  // at this point src_k >= dst_k
117  // we can optimize further when the k values are equal
118  if (this->getLgConfigK() == src.getLgConfigK()) {
119  if (src.getTgtHllType() == target_hll_type::HLL_8) {
120  uint32_t i = 0;
121  for (const auto value: src.getHllArray()) {
122  this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
123  ++i;
124  }
125  } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
126  const uint32_t src_k = 1 << src.getLgConfigK();
127  uint32_t i = 0;
128  const uint8_t* ptr = src.getHllArray().data();
129  while (i < src_k) {
130  uint8_t value = *ptr & 0x3f;
131  this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
132  ++i;
133  value = *ptr++ >> 6;
134  value |= (*ptr & 0x0f) << 2;
135  this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
136  ++i;
137  value = *ptr++ >> 4;
138  value |= (*ptr & 3) << 4;
139  this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
140  ++i;
141  value = *ptr++ >> 2;
142  this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
143  ++i;
144  }
145  } else { // HLL_4
146  const auto& src4 = static_cast<const Hll4Array<A>&>(src);
147  uint32_t i = 0;
148  for (const auto byte: src.getHllArray()) {
149  this->hllByteArr_[i] = std::max(this->hllByteArr_[i], src4.adjustRawValue(i, byte & hll_constants::loNibbleMask));
150  ++i;
151  this->hllByteArr_[i] = std::max(this->hllByteArr_[i], src4.adjustRawValue(i, byte >> 4));
152  ++i;
153  }
154  }
155  } else {
156  // src_k > dst_k
157  const uint32_t dst_mask = (1 << this->getLgConfigK()) - 1;
158  // special treatment below to optimize performance
159  if (src.getTgtHllType() == target_hll_type::HLL_8) {
160  uint32_t i = 0;
161  for (const auto value: src.getHllArray()) {
162  processValue(i++, dst_mask, value);
163  }
164  } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
165  const uint32_t src_k = 1 << src.getLgConfigK();
166  uint32_t i = 0;
167  const uint8_t* ptr = src.getHllArray().data();
168  while (i < src_k) {
169  uint8_t value = *ptr & 0x3f;
170  processValue(i++, dst_mask, value);
171  value = *ptr++ >> 6;
172  value |= (*ptr & 0x0f) << 2;
173  processValue(i++, dst_mask, value);
174  value = *ptr++ >> 4;
175  value |= (*ptr & 3) << 4;
176  processValue(i++, dst_mask, value);
177  value = *ptr++ >> 2;
178  processValue(i++, dst_mask, value);
179  }
180  } else { // HLL_4
181  const auto& src4 = static_cast<const Hll4Array<A>&>(src);
182  uint32_t i = 0;
183  for (const auto byte: src.getHllArray()) {
184  processValue(i, dst_mask, src4.adjustRawValue(i, byte & hll_constants::loNibbleMask));
185  ++i;
186  processValue(i, dst_mask, src4.adjustRawValue(i, byte >> 4));
187  ++i;
188  }
189  }
190  }
191  this->setRebuildKxqCurminFlag(true);
192 }
193 
194 
195 template<typename A>
196 void Hll8Array<A>::processValue(uint32_t slot, uint32_t mask, uint8_t new_val) {
197  const size_t index = slot & mask;
198  this->hllByteArr_[index] = std::max(this->hllByteArr_[index], new_val);
199 }
200 
201 }
202 
203 #endif // _HLL8ARRAY_INTERNAL_HPP_
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72
@ HLL_8
8 bits per entry (fastest, fixed size)
Definition: hll.hpp:75