20 #ifndef _COUPONLIST_INTERNAL_HPP_
21 #define _COUPONLIST_INTERNAL_HPP_
23 #include "CouponList.hpp"
24 #include "CubicInterpolation.hpp"
25 #include "HllUtil.hpp"
26 #include "count_zeros.hpp"
35 CouponList<A>::CouponList(uint8_t lgConfigK,
target_hll_type tgtHllType, hll_mode mode,
const A& allocator):
36 HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
39 coupons_(1ULL << (mode == hll_mode::LIST ? hll_constants::LG_INIT_LIST_SIZE : hll_constants::LG_INIT_SET_SIZE), 0, allocator)
43 CouponList<A>::CouponList(
const CouponList& that,
const target_hll_type tgtHllType):
44 HllSketchImpl<A>(that.lgConfigK_, tgtHllType, that.mode_, false),
45 couponCount_(that.couponCount_),
46 oooFlag_(that.oooFlag_),
47 coupons_(that.coupons_)
51 std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter()
const {
52 return [](HllSketchImpl<A>* ptr) {
53 CouponList<A>* cl =
static_cast<CouponList<A>*
>(ptr);
54 ClAlloc cla(cl->getAllocator());
56 cla.deallocate(cl, 1);
61 CouponList<A>* CouponList<A>::copy()
const {
62 ClAlloc cla(coupons_.get_allocator());
63 return new (cla.allocate(1)) CouponList<A>(*
this);
67 CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType)
const {
68 ClAlloc cla(coupons_.get_allocator());
69 return new (cla.allocate(1)) CouponList<A>(*
this, tgtHllType);
73 CouponList<A>* CouponList<A>::newList(
const void* bytes,
size_t len,
const A& allocator) {
74 if (len < hll_constants::LIST_INT_ARR_START) {
75 throw std::out_of_range(
"Input data length insufficient to hold CouponHashSet");
78 const uint8_t* data =
static_cast<const uint8_t*
>(bytes);
79 if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) {
80 throw std::invalid_argument(
"Incorrect number of preInts in input stream");
82 if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
83 throw std::invalid_argument(
"Wrong ser ver in input stream");
85 if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
86 throw std::invalid_argument(
"Input stream is not an HLL sketch");
89 hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
91 throw std::invalid_argument(
"Calling list constructor with non-list mode data");
94 target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
96 const uint8_t lgK = data[hll_constants::LG_K_BYTE];
97 const bool compact = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ?
true :
false);
98 const bool oooFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ?
true :
false);
99 const bool emptyFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ?
true :
false);
101 const uint32_t couponCount = data[hll_constants::LIST_COUNT_BYTE];
102 const uint32_t couponsInArray = (compact ? couponCount : (1 << HllUtil<A>::computeLgArrInts(LIST, couponCount, lgK)));
103 const size_t expectedLength = hll_constants::LIST_INT_ARR_START + (couponsInArray *
sizeof(uint32_t));
104 if (len < expectedLength) {
105 throw std::out_of_range(
"Byte array too short for sketch. Expected " + std::to_string(expectedLength)
106 +
", found: " + std::to_string(len));
109 ClAlloc cla(allocator);
110 CouponList<A>* sketch =
new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
111 sketch->couponCount_ = couponCount;
112 sketch->putOutOfOrderFlag(oooFlag);
116 std::memcpy(sketch->coupons_.data(), data + hll_constants::LIST_INT_ARR_START, couponCount *
sizeof(uint32_t));
123 CouponList<A>* CouponList<A>::newList(std::istream& is,
const A& allocator) {
124 uint8_t listHeader[8];
125 read(is, listHeader, 8 *
sizeof(uint8_t));
127 if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) {
128 throw std::invalid_argument(
"Incorrect number of preInts in input stream");
130 if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
131 throw std::invalid_argument(
"Wrong ser ver in input stream");
133 if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
134 throw std::invalid_argument(
"Input stream is not an HLL sketch");
137 hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
139 throw std::invalid_argument(
"Calling list constructor with non-list mode data");
142 const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
144 const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
145 const bool compact = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ?
true :
false);
146 const bool oooFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ?
true :
false);
147 const bool emptyFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ?
true :
false);
149 ClAlloc cla(allocator);
150 CouponList<A>* sketch =
new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
151 using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
152 coupon_list_ptr ptr(sketch, sketch->get_deleter());
153 const uint32_t couponCount = listHeader[hll_constants::LIST_COUNT_BYTE];
154 sketch->couponCount_ = couponCount;
155 sketch->putOutOfOrderFlag(oooFlag);
161 const uint32_t numToRead = (compact ? couponCount :
static_cast<uint32_t
>(sketch->coupons_.size()));
162 read(is, sketch->coupons_.data(), numToRead *
sizeof(uint32_t));
166 throw std::runtime_error(
"error reading from std::istream");
168 return ptr.release();
172 auto CouponList<A>::serialize(
bool compact,
unsigned header_size_bytes)
const -> vector_bytes {
173 const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
174 vector_bytes byteArr(sketchSizeBytes, 0, getAllocator());
175 uint8_t* bytes = byteArr.data() + header_size_bytes;
177 bytes[hll_constants::PREAMBLE_INTS_BYTE] =
static_cast<uint8_t
>(getPreInts());
178 bytes[hll_constants::SER_VER_BYTE] =
static_cast<uint8_t
>(hll_constants::SER_VER);
179 bytes[hll_constants::FAMILY_BYTE] =
static_cast<uint8_t
>(hll_constants::FAMILY_ID);
180 bytes[hll_constants::LG_K_BYTE] =
static_cast<uint8_t
>(this->lgConfigK_);
181 bytes[hll_constants::LG_ARR_BYTE] = count_trailing_zeros_in_u32(
static_cast<uint32_t
>(coupons_.size()));
182 bytes[hll_constants::FLAGS_BYTE] = this->makeFlagsByte(compact);
183 bytes[hll_constants::LIST_COUNT_BYTE] =
static_cast<uint8_t
>(this->mode_ == LIST ? couponCount_ : 0);
184 bytes[hll_constants::MODE_BYTE] = this->makeModeByte();
186 if (this->mode_ == SET) {
187 std::memcpy(bytes + hll_constants::HASH_SET_COUNT_INT, &couponCount_,
sizeof(couponCount_));
192 const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
195 std::memcpy(bytes + getMemDataStart(), coupons_.data(), coupons_.size() *
sizeof(uint32_t));
199 bytes += getMemDataStart();
200 for (
const uint32_t coupon: *
this) {
201 std::memcpy(bytes, &coupon,
sizeof(coupon));
202 bytes +=
sizeof(coupon);
208 throw std::runtime_error(
"Impossible condition when serializing");
215 void CouponList<A>::serialize(std::ostream& os,
const bool compact)
const {
217 const uint8_t preInts = getPreInts();
219 const uint8_t serialVersion(hll_constants::SER_VER);
220 write(os, serialVersion);
221 const uint8_t familyId(hll_constants::FAMILY_ID);
223 const uint8_t lgKByte = this->lgConfigK_;
225 const uint8_t lgArrIntsByte = count_trailing_zeros_in_u32(
static_cast<uint32_t
>(coupons_.size()));
226 write(os, lgArrIntsByte);
227 const uint8_t flagsByte = this->makeFlagsByte(compact);
228 write(os, flagsByte);
230 if (this->mode_ == LIST) {
231 const uint8_t listCount =
static_cast<uint8_t
>(couponCount_);
232 write(os, listCount);
234 const uint8_t unused = 0;
238 const uint8_t modeByte = this->makeModeByte();
241 if (this->mode_ == SET) {
243 write(os, couponCount_);
248 const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
251 write(os, coupons_.data(), coupons_.size() *
sizeof(uint32_t));
255 for (
const uint32_t coupon: *
this) {
262 throw std::runtime_error(
"Impossible condition when serializing");
269 HllSketchImpl<A>* CouponList<A>::couponUpdate(uint32_t coupon) {
270 for (
size_t i = 0; i < coupons_.size(); ++i) {
271 const uint32_t couponAtIdx = coupons_[i];
272 if (couponAtIdx == hll_constants::EMPTY) {
273 coupons_[i] = coupon;
275 if (couponCount_ ==
static_cast<uint32_t
>(coupons_.size())) {
276 if (this->lgConfigK_ < 8) {
277 return promoteHeapListOrSetToHll(*
this);
279 return promoteHeapListToSet(*
this);
284 if (couponAtIdx == coupon) {
289 throw std::runtime_error(
"Array invalid: no empties and no duplicates");
293 double CouponList<A>::getCompositeEstimate()
const {
return getEstimate(); }
296 double CouponList<A>::getEstimate()
const {
297 const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
298 return fmax(est, couponCount_);
302 double CouponList<A>::getLowerBound(uint8_t numStdDev)
const {
303 HllUtil<A>::checkNumStdDev(numStdDev);
304 const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
305 const double tmp = est / (1.0 + (numStdDev * hll_constants::COUPON_RSE));
306 return fmax(tmp, couponCount_);
310 double CouponList<A>::getUpperBound(uint8_t numStdDev)
const {
311 HllUtil<A>::checkNumStdDev(numStdDev);
312 const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
313 const double tmp = est / (1.0 - (numStdDev * hll_constants::COUPON_RSE));
314 return fmax(tmp, couponCount_);
318 bool CouponList<A>::isEmpty()
const {
return getCouponCount() == 0; }
321 uint32_t CouponList<A>::getUpdatableSerializationBytes()
const {
322 return getMemDataStart() +
static_cast<uint32_t
>(coupons_.size()) *
sizeof(uint32_t);
326 uint32_t CouponList<A>::getCouponCount()
const {
331 uint32_t CouponList<A>::getCompactSerializationBytes()
const {
332 return getMemDataStart() + (couponCount_ << 2);
336 uint32_t CouponList<A>::getMemDataStart()
const {
337 return hll_constants::LIST_INT_ARR_START;
341 uint8_t CouponList<A>::getPreInts()
const {
342 return hll_constants::LIST_PREINTS;
346 bool CouponList<A>::isCompact()
const {
return false; }
349 bool CouponList<A>::isOutOfOrderFlag()
const {
return oooFlag_; }
352 void CouponList<A>::putOutOfOrderFlag(
bool oooFlag) {
357 A CouponList<A>::getAllocator()
const {
358 return coupons_.get_allocator();
362 HllSketchImpl<A>* CouponList<A>::promoteHeapListToSet(CouponList& list) {
363 return HllSketchImplFactory<A>::promoteListToSet(list);
367 HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
368 return HllSketchImplFactory<A>::promoteListOrSetToHll(src);
372 coupon_iterator<A> CouponList<A>::begin(
bool all)
const {
373 return coupon_iterator<A>(coupons_.data(), coupons_.size(), 0, all);
377 coupon_iterator<A> CouponList<A>::end()
const {
378 return coupon_iterator<A>(coupons_.data(), coupons_.size(), coupons_.size(),
false);
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72