23#include "MurmurHash3.h"
24#include "RelativeErrorTables.hpp"
25#include "count_zeros.hpp"
26#include "common_defs.hpp"
27#include "ceiling_power_of_2.hpp"
35enum hll_mode { LIST = 0, SET, HLL };
37namespace hll_constants {
40static const uint8_t SER_VER = 1;
41static const uint8_t FAMILY_ID = 7;
43static const uint8_t EMPTY_FLAG_MASK = 4;
44static const uint8_t COMPACT_FLAG_MASK = 8;
45static const uint8_t OUT_OF_ORDER_FLAG_MASK = 16;
46static const uint8_t FULL_SIZE_FLAG_MASK = 32;
48static const uint32_t PREAMBLE_INTS_BYTE = 0;
49static const uint32_t SER_VER_BYTE = 1;
50static const uint32_t FAMILY_BYTE = 2;
51static const uint32_t LG_K_BYTE = 3;
52static const uint32_t LG_ARR_BYTE = 4;
53static const uint32_t FLAGS_BYTE = 5;
54static const uint32_t LIST_COUNT_BYTE = 6;
55static const uint32_t HLL_CUR_MIN_BYTE = 6;
56static const uint32_t MODE_BYTE = 7;
59static const uint32_t LIST_INT_ARR_START = 8;
60static const uint8_t LIST_PREINTS = 2;
62static const uint32_t HASH_SET_COUNT_INT = 8;
63static const uint32_t HASH_SET_INT_ARR_START = 12;
64static const uint8_t HASH_SET_PREINTS = 3;
66static const uint8_t HLL_PREINTS = 10;
67static const uint32_t HLL_BYTE_ARR_START = 40;
68static const uint32_t HIP_ACCUM_DOUBLE = 8;
69static const uint32_t KXQ0_DOUBLE = 16;
70static const uint32_t KXQ1_DOUBLE = 24;
71static const uint32_t CUR_MIN_COUNT_INT = 32;
72static const uint32_t AUX_COUNT_INT = 36;
74static const uint32_t EMPTY_SKETCH_SIZE_BYTES = 8;
77static const uint8_t KEY_BITS_26 = 26;
78static const uint8_t VAL_BITS_6 = 6;
79static const uint32_t KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
80static const uint32_t VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
81static const uint32_t EMPTY = 0;
82static const uint8_t MIN_LOG_K = 4;
83static const uint8_t MAX_LOG_K = 21;
85static const double HLL_HIP_RSE_FACTOR = 0.8325546;
86static const double HLL_NON_HIP_RSE_FACTOR = 1.03896;
87static const double COUPON_RSE_FACTOR = 0.409;
88static const double COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
90static const uint8_t LG_INIT_LIST_SIZE = 3;
91static const uint8_t LG_INIT_SET_SIZE = 5;
92static const uint32_t RESIZE_NUMER = 3;
93static const uint32_t RESIZE_DENOM = 4;
95static const uint8_t loNibbleMask = 0x0f;
96static const uint8_t hiNibbleMask = 0xf0;
97static const uint8_t AUX_TOKEN = 0xf;
103static const uint8_t LG_AUX_ARR_INTS[] = {
104 0, 2, 2, 2, 2, 2, 2, 3, 3, 3,
105 4, 4, 5, 5, 6, 7, 8, 9, 10, 11,
106 12, 13, 14, 15, 16, 17, 18
114template<
typename A = std::allocator<u
int8_t> >
118 static uint32_t coupon(
const uint64_t hash[]);
119 static uint32_t coupon(
const HashState& hashState);
120 static void hash(
const void* key,
size_t keyLen, uint64_t seed, HashState& result);
121 static uint8_t checkLgK(uint8_t lgK);
122 static void checkMemSize(uint64_t minBytes, uint64_t capBytes);
123 static inline void checkNumStdDev(uint8_t numStdDev);
124 static uint32_t pair(uint32_t slotNo, uint8_t value);
125 static uint32_t getLow26(uint32_t coupon);
126 static uint8_t getValue(uint32_t coupon);
127 static uint8_t simpleIntLog2(uint32_t n);
128 static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
129 static double getRelErr(
bool upperBound,
bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
133inline uint32_t HllUtil<A>::coupon(
const uint64_t hash[]) {
134 uint32_t addr26 = hash[0] & hll_constants::KEY_MASK_26;
135 uint8_t lz = count_leading_zeros_in_u64(hash[1]);
136 uint8_t value = ((lz > 62 ? 62 : lz) + 1);
137 return (value << hll_constants::KEY_BITS_26) | addr26;
141inline uint32_t HllUtil<A>::coupon(
const HashState& hashState) {
142 uint32_t addr26 = (int) (hashState.h1 & hll_constants::KEY_MASK_26);
143 uint8_t lz = count_leading_zeros_in_u64(hashState.h2);
144 uint8_t value = ((lz > 62 ? 62 : lz) + 1);
145 return (value << hll_constants::KEY_BITS_26) | addr26;
149inline void HllUtil<A>::hash(
const void* key,
size_t keyLen, uint64_t seed, HashState& result) {
150 MurmurHash3_x64_128(key, keyLen, seed, result);
154inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
155 if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
158 throw std::invalid_argument(
"Invalid value of k: " + std::to_string(lgK));
163inline double HllUtil<A>::getRelErr(
bool upperBound,
bool unioned,
164 uint8_t lgConfigK, uint8_t numStdDev) {
166 if (lgConfigK > 12) {
167 const double rseFactor = unioned ?
168 hll_constants::HLL_NON_HIP_RSE_FACTOR : hll_constants::HLL_HIP_RSE_FACTOR;
169 const uint32_t configK = 1 << lgConfigK;
170 return (upperBound ? -1 : 1) * (numStdDev * rseFactor) / sqrt(configK);
172 return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
177inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
178 if (capBytes < minBytes) {
179 throw std::invalid_argument(
"Given destination array is not large enough: " + std::to_string(capBytes));
184inline void HllUtil<A>::checkNumStdDev(uint8_t numStdDev) {
185 if ((numStdDev < 1) || (numStdDev > 3)) {
186 throw std::invalid_argument(
"NumStdDev may not be less than 1 or greater than 3.");
191inline uint32_t HllUtil<A>::pair(uint32_t slotNo, uint8_t value) {
192 return (value << hll_constants::KEY_BITS_26) | (slotNo & hll_constants::KEY_MASK_26);
196inline uint32_t HllUtil<A>::getLow26(uint32_t coupon) {
197 return coupon & hll_constants::KEY_MASK_26;
201inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
202 return coupon >> hll_constants::KEY_BITS_26;
206inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
208 throw std::logic_error(
"cannot take log of 0");
210 return count_trailing_zeros_in_u32(n);
214inline uint8_t HllUtil<A>::computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK) {
216 if (mode == LIST) {
return hll_constants::LG_INIT_LIST_SIZE; }
217 uint32_t ceilPwr2 = ceiling_power_of_2(count);
218 if ((hll_constants::RESIZE_DENOM * count) > (hll_constants::RESIZE_NUMER * ceilPwr2)) { ceilPwr2 <<= 1;}
220 return std::max(hll_constants::LG_INIT_SET_SIZE, HllUtil<A>::simpleIntLog2(ceilPwr2));
223 return std::max(hll_constants::LG_AUX_ARR_INTS[lgConfigK], HllUtil<A>::simpleIntLog2(ceilPwr2));
DataSketches namespace.
Definition binomial_bounds.hpp:38