23 #include "MurmurHash3.h"
24 #include "RelativeErrorTables.hpp"
25 #include "count_zeros.hpp"
26 #include "common_defs.hpp"
27 #include "ceiling_power_of_2.hpp"
35 enum hll_mode { LIST = 0, SET, HLL };
37 namespace hll_constants {
40 static const uint8_t SER_VER = 1;
41 static const uint8_t FAMILY_ID = 7;
43 static const uint8_t EMPTY_FLAG_MASK = 4;
44 static const uint8_t COMPACT_FLAG_MASK = 8;
45 static const uint8_t OUT_OF_ORDER_FLAG_MASK = 16;
46 static const uint8_t FULL_SIZE_FLAG_MASK = 32;
48 static const uint32_t PREAMBLE_INTS_BYTE = 0;
49 static const uint32_t SER_VER_BYTE = 1;
50 static const uint32_t FAMILY_BYTE = 2;
51 static const uint32_t LG_K_BYTE = 3;
52 static const uint32_t LG_ARR_BYTE = 4;
53 static const uint32_t FLAGS_BYTE = 5;
54 static const uint32_t LIST_COUNT_BYTE = 6;
55 static const uint32_t HLL_CUR_MIN_BYTE = 6;
56 static const uint32_t MODE_BYTE = 7;
59 static const uint32_t LIST_INT_ARR_START = 8;
60 static const uint8_t LIST_PREINTS = 2;
62 static const uint32_t HASH_SET_COUNT_INT = 8;
63 static const uint32_t HASH_SET_INT_ARR_START = 12;
64 static const uint8_t HASH_SET_PREINTS = 3;
66 static const uint8_t HLL_PREINTS = 10;
67 static const uint32_t HLL_BYTE_ARR_START = 40;
68 static const uint32_t HIP_ACCUM_DOUBLE = 8;
69 static const uint32_t KXQ0_DOUBLE = 16;
70 static const uint32_t KXQ1_DOUBLE = 24;
71 static const uint32_t CUR_MIN_COUNT_INT = 32;
72 static const uint32_t AUX_COUNT_INT = 36;
74 static const uint32_t EMPTY_SKETCH_SIZE_BYTES = 8;
77 static const uint8_t KEY_BITS_26 = 26;
78 static const uint8_t VAL_BITS_6 = 6;
79 static const uint32_t KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
80 static const uint32_t VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
81 static const uint32_t EMPTY = 0;
82 static const uint8_t MIN_LOG_K = 4;
83 static const uint8_t MAX_LOG_K = 21;
85 static const double HLL_HIP_RSE_FACTOR = 0.8325546;
86 static const double HLL_NON_HIP_RSE_FACTOR = 1.03896;
87 static const double COUPON_RSE_FACTOR = 0.409;
88 static const double COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
90 static const uint8_t LG_INIT_LIST_SIZE = 3;
91 static const uint8_t LG_INIT_SET_SIZE = 5;
92 static const uint32_t RESIZE_NUMER = 3;
93 static const uint32_t RESIZE_DENOM = 4;
95 static const uint8_t loNibbleMask = 0x0f;
96 static const uint8_t hiNibbleMask = 0xf0;
97 static const uint8_t AUX_TOKEN = 0xf;
103 static const uint8_t LG_AUX_ARR_INTS[] = {
104 0, 2, 2, 2, 2, 2, 2, 3, 3, 3,
105 4, 4, 5, 5, 6, 7, 8, 9, 10, 11,
106 12, 13, 14, 15, 16, 17, 18
114 template<
typename A = std::allocator<u
int8_t> >
115 class HllUtil final {
118 static uint32_t coupon(
const uint64_t hash[]);
119 static uint32_t coupon(
const HashState& hashState);
120 static void hash(
const void* key,
size_t keyLen, uint64_t seed, HashState& result);
121 static uint8_t checkLgK(uint8_t lgK);
122 static void checkMemSize(uint64_t minBytes, uint64_t capBytes);
123 static inline void checkNumStdDev(uint8_t numStdDev);
124 static uint32_t pair(uint32_t slotNo, uint8_t value);
125 static uint32_t getLow26(uint32_t coupon);
126 static uint8_t getValue(uint32_t coupon);
127 static uint8_t simpleIntLog2(uint32_t n);
128 static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
129 static double getRelErr(
bool upperBound,
bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
133 inline uint32_t HllUtil<A>::coupon(
const uint64_t hash[]) {
134 uint32_t addr26 = hash[0] & hll_constants::KEY_MASK_26;
135 uint8_t lz = count_leading_zeros_in_u64(hash[1]);
136 uint8_t value = ((lz > 62 ? 62 : lz) + 1);
137 return (value << hll_constants::KEY_BITS_26) | addr26;
141 inline uint32_t HllUtil<A>::coupon(
const HashState& hashState) {
142 uint32_t addr26 = (int) (hashState.h1 & hll_constants::KEY_MASK_26);
143 uint8_t lz = count_leading_zeros_in_u64(hashState.h2);
144 uint8_t value = ((lz > 62 ? 62 : lz) + 1);
145 return (value << hll_constants::KEY_BITS_26) | addr26;
149 inline void HllUtil<A>::hash(
const void* key,
size_t keyLen, uint64_t seed, HashState& result) {
150 MurmurHash3_x64_128(key, keyLen, seed, result);
154 inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
155 if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
158 throw std::invalid_argument(
"Invalid value of k: " + std::to_string(lgK));
163 inline double HllUtil<A>::getRelErr(
bool upperBound,
bool unioned,
164 uint8_t lgConfigK, uint8_t numStdDev) {
166 if (lgConfigK > 12) {
167 const double rseFactor = unioned ?
168 hll_constants::HLL_NON_HIP_RSE_FACTOR : hll_constants::HLL_HIP_RSE_FACTOR;
169 const uint32_t configK = 1 << lgConfigK;
170 return (upperBound ? -1 : 1) * (numStdDev * rseFactor) / sqrt(configK);
172 return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
177 inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
178 if (capBytes < minBytes) {
179 throw std::invalid_argument(
"Given destination array is not large enough: " + std::to_string(capBytes));
184 inline void HllUtil<A>::checkNumStdDev(uint8_t numStdDev) {
185 if ((numStdDev < 1) || (numStdDev > 3)) {
186 throw std::invalid_argument(
"NumStdDev may not be less than 1 or greater than 3.");
191 inline uint32_t HllUtil<A>::pair(uint32_t slotNo, uint8_t value) {
192 return (value << hll_constants::KEY_BITS_26) | (slotNo & hll_constants::KEY_MASK_26);
196 inline uint32_t HllUtil<A>::getLow26(uint32_t coupon) {
197 return coupon & hll_constants::KEY_MASK_26;
201 inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
202 return coupon >> hll_constants::KEY_BITS_26;
206 inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
208 throw std::logic_error(
"cannot take log of 0");
210 return count_trailing_zeros_in_u32(n);
214 inline uint8_t HllUtil<A>::computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK) {
216 if (mode == LIST) {
return hll_constants::LG_INIT_LIST_SIZE; }
217 uint32_t ceilPwr2 = ceiling_power_of_2(count);
218 if ((hll_constants::RESIZE_DENOM * count) > (hll_constants::RESIZE_NUMER * ceilPwr2)) { ceilPwr2 <<= 1;}
220 return std::max(hll_constants::LG_INIT_SET_SIZE, HllUtil<A>::simpleIntLog2(ceilPwr2));
223 return std::max(hll_constants::LG_AUX_ARR_INTS[lgConfigK], HllUtil<A>::simpleIntLog2(ceilPwr2));
DataSketches namespace.
Definition: binomial_bounds.hpp:38