datasketches-cpp
Loading...
Searching...
No Matches
AuxHashMap-internal.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef _AUXHASHMAP_INTERNAL_HPP_
21#define _AUXHASHMAP_INTERNAL_HPP_
22
23#include <stdexcept>
24
25#include "HllUtil.hpp"
26#include "AuxHashMap.hpp"
27
28namespace datasketches {
29
30template<typename A>
31AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
32lgConfigK(lgConfigK),
33lgAuxArrInts(lgAuxArrInts),
34auxCount(0),
35entries(1ULL << lgAuxArrInts, 0, allocator)
36{}
37
38template<typename A>
39AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
40 return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
41}
42
43template<typename A>
44AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
45 return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that);
46}
47
48template<typename A>
49AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
50 uint8_t lgConfigK,
51 uint32_t auxCount, uint8_t lgAuxArrInts,
52 bool srcCompact, const A& allocator) {
53 uint8_t lgArrInts = lgAuxArrInts;
54 if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
55 lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
56 } else { // updatable
57 lgArrInts = lgAuxArrInts;
58 }
59
60 const uint32_t configKmask = (1 << lgConfigK) - 1;
61
62 AuxHashMap<A>* auxHashMap;
63 const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
64 if (srcCompact) {
65 if (len < auxCount * sizeof(int)) {
66 throw std::out_of_range("Input array too small to hold AuxHashMap image");
67 }
68 auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
69 for (uint32_t i = 0; i < auxCount; ++i) {
70 const uint32_t pair = auxPtr[i];
71 const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
72 const uint8_t value = HllUtil<A>::getValue(pair);
73 auxHashMap->mustAdd(slotNo, value);
74 }
75 } else { // updatable
76 uint32_t itemsToRead = 1 << lgAuxArrInts;
77 if (len < itemsToRead * sizeof(uint32_t)) {
78 throw std::out_of_range("Input array too small to hold AuxHashMap image");
79 }
80 auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
81 for (uint32_t i = 0; i < itemsToRead; ++i) {
82 const uint32_t pair = auxPtr[i];
83 if (pair == hll_constants::EMPTY) { continue; }
84 const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
85 const uint8_t value = HllUtil<A>::getValue(pair);
86 auxHashMap->mustAdd(slotNo, value);
87 }
88 }
89
90 if (auxHashMap->getAuxCount() != auxCount) {
91 make_deleter()(auxHashMap);
92 throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
93 }
94
95 return auxHashMap;
96}
97
98template<typename A>
99AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
100 uint32_t auxCount, uint8_t lgAuxArrInts,
101 bool srcCompact, const A& allocator) {
102 uint8_t lgArrInts = lgAuxArrInts;
103 if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
104 lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
105 } else { // updatable
106 lgArrInts = lgAuxArrInts;
107 }
108
109 AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
110 typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
111 aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
112
113 const uint32_t configKmask = (1 << lgConfigK) - 1;
114
115 if (srcCompact) {
116 for (uint32_t i = 0; i < auxCount; ++i) {
117 const auto pair = read<int>(is);
118 uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
119 uint8_t value = HllUtil<A>::getValue(pair);
120 auxHashMap->mustAdd(slotNo, value);
121 }
122 } else { // updatable
123 const uint32_t itemsToRead = 1 << lgAuxArrInts;
124 for (uint32_t i = 0; i < itemsToRead; ++i) {
125 const auto pair = read<int>(is);
126 if (pair == hll_constants::EMPTY) { continue; }
127 const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
128 const uint8_t value = HllUtil<A>::getValue(pair);
129 auxHashMap->mustAdd(slotNo, value);
130 }
131 }
132
133 if (auxHashMap->getAuxCount() != auxCount) {
134 make_deleter()(auxHashMap);
135 throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
136 }
137
138 return aux_ptr.release();
139}
140
141template<typename A>
142std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
143 return [](AuxHashMap<A>* ptr) {
144 ahmAlloc alloc(ptr->entries.get_allocator());
145 ptr->~AuxHashMap();
146 alloc.deallocate(ptr, 1);
147 };
148}
149
150template<typename A>
151AuxHashMap<A>* AuxHashMap<A>::copy() const {
152 return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this);
153}
154
155template<typename A>
156uint32_t AuxHashMap<A>::getAuxCount() const {
157 return auxCount;
158}
159
160template<typename A>
161uint32_t* AuxHashMap<A>::getAuxIntArr(){
162 return entries.data();
163}
164
165template<typename A>
166uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
167 return lgAuxArrInts;
168}
169
170template<typename A>
171uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
172 return auxCount << 2;
173}
174
175template<typename A>
176uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
177 return 4 << lgAuxArrInts;
178}
179
180template<typename A>
181void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
182 const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
183 const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
184 if (index >= 0) {
185 throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
186 + std::to_string(slotNo) + ", Value: " + std::to_string(value));
187 }
188
189 // found empty entry
190 entries[~index] = entry_pair;
191 ++auxCount;
192 checkGrow();
193}
194
195template<typename A>
196uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
197 const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
198 if (index >= 0) {
199 return HllUtil<A>::getValue(entries[index]);
200 }
201
202 throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
203}
204
205template<typename A>
206void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
207 const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
208 if (idx >= 0) {
209 entries[idx] = HllUtil<A>::pair(slotNo, value);
210 return;
211 }
212
213 throw std::invalid_argument("Pair not found: SlotNo: " + std::to_string(slotNo)
214 + ", Value: " + std::to_string(value));
215}
216
217template<typename A>
218void AuxHashMap<A>::checkGrow() {
219 if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
220 growAuxSpace();
221 }
222}
223
224template<typename A>
225void AuxHashMap<A>::growAuxSpace() {
226 const int configKmask = (1 << lgConfigK) - 1;
227 const int newArrLen = 1 << ++lgAuxArrInts;
228 vector_int entries_new(newArrLen, 0, entries.get_allocator());
229 for (size_t i = 0; i < entries.size(); ++i) {
230 const uint32_t fetched = entries[i];
231 if (fetched != hll_constants::EMPTY) {
232 // find empty in new array
233 const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
234 entries_new[~idx] = fetched;
235 }
236 }
237 entries = std::move(entries_new);
238}
239
240//Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
241//If entire entry is empty, returns one's complement of index = found empty.
242//If entry contains given slotNo, returns its index = found slotNo.
243//Continues searching.
244//If the probe comes back to original index, throws an exception.
245template<typename A>
246int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
247 const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
248 const uint32_t configKmask = (1 << lgConfigK) - 1;
249 uint32_t probe = slotNo & auxArrMask;
250 const uint32_t loopIndex = probe;
251 do {
252 const uint32_t arrVal = auxArr[probe];
253 if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
254 return ~probe; //empty
255 }
256 else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
257 return probe; //found given slotNo, return probe = index into aux array
258 }
259 const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
260 probe = (probe + stride) & auxArrMask;
261 } while (probe != loopIndex);
262 throw std::runtime_error("Key not found and no empty slots!");
263}
264
265template<typename A>
266coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
267 return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
268}
269
270template<typename A>
271coupon_iterator<A> AuxHashMap<A>::end() const {
272 return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
273}
274
275}
276
277#endif // _AUXHASHMAP_INTERNAL_HPP_
DataSketches namespace.
Definition binomial_bounds.hpp:38