20 #ifndef _COUPONHASHSET_INTERNAL_HPP_
21 #define _COUPONHASHSET_INTERNAL_HPP_
23 #include "CouponHashSet.hpp"
32 static int32_t find(
const uint32_t* array, uint8_t lgArrInts, uint32_t coupon);
35 CouponHashSet<A>::CouponHashSet(uint8_t lgConfigK,
target_hll_type tgtHllType,
const A& allocator)
36 : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET, allocator)
39 throw std::invalid_argument(
"CouponHashSet must be initialized with lgConfigK > 7. Found: "
40 + std::to_string(lgConfigK));
45 CouponHashSet<A>::CouponHashSet(
const CouponHashSet<A>& that,
const target_hll_type tgtHllType)
46 : CouponList<A>(that, tgtHllType) {}
49 std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter()
const {
50 return [](HllSketchImpl<A>* ptr) {
51 CouponHashSet<A>* chs =
static_cast<CouponHashSet<A>*
>(ptr);
52 ChsAlloc chsa(chs->getAllocator());
53 chs->~CouponHashSet();
54 chsa.deallocate(chs, 1);
59 CouponHashSet<A>* CouponHashSet<A>::newSet(
const void* bytes,
size_t len,
const A& allocator) {
60 if (len < hll_constants::HASH_SET_INT_ARR_START) {
61 throw std::out_of_range(
"Input data length insufficient to hold CouponHashSet");
64 const uint8_t* data =
static_cast<const uint8_t*
>(bytes);
65 if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HASH_SET_PREINTS) {
66 throw std::invalid_argument(
"Incorrect number of preInts in input stream");
68 if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
69 throw std::invalid_argument(
"Wrong ser ver in input stream");
71 if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
72 throw std::invalid_argument(
"Input stream is not an HLL sketch");
75 const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
77 throw std::invalid_argument(
"Calling set constructor with non-set mode data");
80 const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
82 const uint8_t lgK = data[hll_constants::LG_K_BYTE];
84 throw std::invalid_argument(
"Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
85 + std::to_string(lgK));
87 uint8_t lgArrInts = data[hll_constants::LG_ARR_BYTE];
88 const bool compactFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ?
true :
false);
91 std::memcpy(&couponCount, data + hll_constants::HASH_SET_COUNT_INT,
sizeof(couponCount));
92 if (lgArrInts < hll_constants::LG_INIT_SET_SIZE) {
93 lgArrInts = HllUtil<>::computeLgArrInts(SET, couponCount, lgK);
97 const uint32_t couponsInArray = (compactFlag ? couponCount : (1 << lgArrInts));
98 const size_t expectedLength = hll_constants::HASH_SET_INT_ARR_START + (couponsInArray *
sizeof(uint32_t));
99 if (len < expectedLength) {
100 throw std::out_of_range(
"Byte array too short for sketch. Expected " + std::to_string(expectedLength)
101 +
", found: " + std::to_string(len));
104 ChsAlloc chsa(allocator);
105 CouponHashSet<A>* sketch =
new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
108 const uint8_t* curPos = data + hll_constants::HASH_SET_INT_ARR_START;
110 for (uint32_t i = 0; i < couponCount; ++i, curPos +=
sizeof(coupon)) {
111 std::memcpy(&coupon, curPos,
sizeof(coupon));
112 sketch->couponUpdate(coupon);
115 sketch->coupons_.resize(1ULL << lgArrInts);
116 sketch->couponCount_ = couponCount;
117 std::memcpy(sketch->coupons_.data(),
118 data + hll_constants::HASH_SET_INT_ARR_START,
119 couponsInArray *
sizeof(uint32_t));
126 CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is,
const A& allocator) {
127 uint8_t listHeader[8];
128 read(is, listHeader, 8 *
sizeof(uint8_t));
130 if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HASH_SET_PREINTS) {
131 throw std::invalid_argument(
"Incorrect number of preInts in input stream");
133 if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
134 throw std::invalid_argument(
"Wrong ser ver in input stream");
136 if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
137 throw std::invalid_argument(
"Input stream is not an HLL sketch");
140 hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
142 throw std::invalid_argument(
"Calling set constructor with non-set mode data");
145 const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
147 const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
149 throw std::invalid_argument(
"Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
150 + std::to_string(lgK));
152 uint8_t lgArrInts = listHeader[hll_constants::LG_ARR_BYTE];
153 const bool compactFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ?
true :
false);
155 const auto couponCount = read<uint32_t>(is);
156 if (lgArrInts < hll_constants::LG_INIT_SET_SIZE) {
157 lgArrInts = HllUtil<>::computeLgArrInts(SET, couponCount, lgK);
160 ChsAlloc chsa(allocator);
161 CouponHashSet<A>* sketch =
new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
162 typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr;
163 coupon_hash_set_ptr ptr(sketch, sketch->get_deleter());
168 for (uint32_t i = 0; i < couponCount; ++i) {
169 const auto coupon = read<uint32_t>(is);
170 sketch->couponUpdate(coupon);
173 sketch->coupons_.resize(1ULL << lgArrInts);
174 sketch->couponCount_ = couponCount;
176 read(is, sketch->coupons_.data(), sketch->coupons_.size() *
sizeof(uint32_t));
180 throw std::runtime_error(
"error reading from std::istream");
182 return ptr.release();
186 CouponHashSet<A>* CouponHashSet<A>::copy()
const {
187 ChsAlloc chsa(this->coupons_.get_allocator());
188 return new (chsa.allocate(1)) CouponHashSet<A>(*
this);
192 CouponHashSet<A>* CouponHashSet<A>::copyAs(target_hll_type tgtHllType)
const {
193 ChsAlloc chsa(this->coupons_.get_allocator());
194 return new (chsa.allocate(1)) CouponHashSet<A>(*
this, tgtHllType);
198 HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(uint32_t coupon) {
199 const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(
static_cast<uint32_t
>(this->coupons_.size()));
200 const int32_t index = find<A>(this->coupons_.data(), lgCouponArrInts, coupon);
204 this->coupons_[~index] = coupon;
205 ++this->couponCount_;
206 if (checkGrowOrPromote()) {
207 return this->promoteHeapListOrSetToHll(*
this);
213 uint32_t CouponHashSet<A>::getMemDataStart()
const {
214 return hll_constants::HASH_SET_INT_ARR_START;
218 uint8_t CouponHashSet<A>::getPreInts()
const {
219 return hll_constants::HASH_SET_PREINTS;
223 bool CouponHashSet<A>::checkGrowOrPromote() {
224 if (
static_cast<size_t>(hll_constants::RESIZE_DENOM * this->couponCount_) > (hll_constants::RESIZE_NUMER * this->coupons_.size())) {
225 const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(
static_cast<uint32_t
>(this->coupons_.size()));
226 if (lgCouponArrInts == (this->lgConfigK_ - 3)) {
229 growHashSet(lgCouponArrInts + 1);
235 void CouponHashSet<A>::growHashSet(uint8_t tgtLgCoupArrSize) {
236 const uint32_t tgtLen = 1 << tgtLgCoupArrSize;
237 vector_int coupons_new(tgtLen, 0, this->coupons_.get_allocator());
239 const uint32_t srcLen =
static_cast<uint32_t
>(this->coupons_.size());
240 for (uint32_t i = 0; i < srcLen; ++i) {
241 const uint32_t fetched = this->coupons_[i];
242 if (fetched != hll_constants::EMPTY) {
243 const int32_t idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched);
245 coupons_new[~idx] = fetched;
248 throw std::runtime_error(
"Error: Found duplicate coupon");
251 this->coupons_ = std::move(coupons_new);
255 static int32_t find(
const uint32_t* array, uint8_t lgArrInts, uint32_t coupon) {
256 const uint32_t arrMask = (1 << lgArrInts) - 1;
257 uint32_t probe = coupon & arrMask;
258 const uint32_t loopIndex = probe;
260 const uint32_t couponAtIdx = array[probe];
261 if (couponAtIdx == hll_constants::EMPTY) {
264 else if (coupon == couponAtIdx) {
267 const uint32_t stride = ((coupon & hll_constants::KEY_MASK_26) >> lgArrInts) | 1;
268 probe = (probe + stride) & arrMask;
269 }
while (probe != loopIndex);
270 throw std::invalid_argument(
"Key not found and no empty slots!");
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72