datasketches-cpp
AuxHashMap-internal.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _AUXHASHMAP_INTERNAL_HPP_
21 #define _AUXHASHMAP_INTERNAL_HPP_
22 
23 #include <stdexcept>
24 
25 #include "HllUtil.hpp"
26 #include "AuxHashMap.hpp"
27 
28 namespace datasketches {
29 
30 template<typename A>
31 AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
32 lgConfigK(lgConfigK),
33 lgAuxArrInts(lgAuxArrInts),
34 auxCount(0),
35 entries(1ULL << lgAuxArrInts, 0, allocator)
36 {}
37 
38 template<typename A>
39 AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
40  return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
41 }
42 
43 template<typename A>
44 AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
45  return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that);
46 }
47 
48 template<typename A>
49 AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
50  uint8_t lgConfigK,
51  uint32_t auxCount, uint8_t lgAuxArrInts,
52  bool srcCompact, const A& allocator) {
53  uint8_t lgArrInts = lgAuxArrInts;
54  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
55  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
56  } else { // updatable
57  lgArrInts = lgAuxArrInts;
58  }
59 
60  const uint32_t configKmask = (1 << lgConfigK) - 1;
61 
62  AuxHashMap<A>* auxHashMap;
63  const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
64  if (srcCompact) {
65  if (len < auxCount * sizeof(int)) {
66  throw std::out_of_range("Input array too small to hold AuxHashMap image");
67  }
68  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
69  for (uint32_t i = 0; i < auxCount; ++i) {
70  const uint32_t pair = auxPtr[i];
71  const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
72  const uint8_t value = HllUtil<A>::getValue(pair);
73  auxHashMap->mustAdd(slotNo, value);
74  }
75  } else { // updatable
76  uint32_t itemsToRead = 1 << lgAuxArrInts;
77  if (len < itemsToRead * sizeof(uint32_t)) {
78  throw std::out_of_range("Input array too small to hold AuxHashMap image");
79  }
80  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
81  for (uint32_t i = 0; i < itemsToRead; ++i) {
82  const uint32_t pair = auxPtr[i];
83  if (pair == hll_constants::EMPTY) { continue; }
84  const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
85  const uint8_t value = HllUtil<A>::getValue(pair);
86  auxHashMap->mustAdd(slotNo, value);
87  }
88  }
89 
90  if (auxHashMap->getAuxCount() != auxCount) {
91  make_deleter()(auxHashMap);
92  throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
93  }
94 
95  return auxHashMap;
96 }
97 
98 template<typename A>
99 AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
100  uint32_t auxCount, uint8_t lgAuxArrInts,
101  bool srcCompact, const A& allocator) {
102  uint8_t lgArrInts = lgAuxArrInts;
103  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
104  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
105  } else { // updatable
106  lgArrInts = lgAuxArrInts;
107  }
108 
109  AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
110  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
111  aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
112 
113  const uint32_t configKmask = (1 << lgConfigK) - 1;
114 
115  if (srcCompact) {
116  for (uint32_t i = 0; i < auxCount; ++i) {
117  const auto pair = read<int>(is);
118  uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
119  uint8_t value = HllUtil<A>::getValue(pair);
120  auxHashMap->mustAdd(slotNo, value);
121  }
122  } else { // updatable
123  const uint32_t itemsToRead = 1 << lgAuxArrInts;
124  for (uint32_t i = 0; i < itemsToRead; ++i) {
125  const auto pair = read<int>(is);
126  if (pair == hll_constants::EMPTY) { continue; }
127  const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
128  const uint8_t value = HllUtil<A>::getValue(pair);
129  auxHashMap->mustAdd(slotNo, value);
130  }
131  }
132 
133  if (auxHashMap->getAuxCount() != auxCount) {
134  make_deleter()(auxHashMap);
135  throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
136  }
137 
138  return aux_ptr.release();
139 }
140 
141 template<typename A>
142 std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
143  return [](AuxHashMap<A>* ptr) {
144  ahmAlloc alloc(ptr->entries.get_allocator());
145  ptr->~AuxHashMap();
146  alloc.deallocate(ptr, 1);
147  };
148 }
149 
150 template<typename A>
151 AuxHashMap<A>* AuxHashMap<A>::copy() const {
152  return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this);
153 }
154 
155 template<typename A>
156 uint32_t AuxHashMap<A>::getAuxCount() const {
157  return auxCount;
158 }
159 
160 template<typename A>
161 uint32_t* AuxHashMap<A>::getAuxIntArr(){
162  return entries.data();
163 }
164 
165 template<typename A>
166 uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
167  return lgAuxArrInts;
168 }
169 
170 template<typename A>
171 uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
172  return auxCount << 2;
173 }
174 
175 template<typename A>
176 uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
177  return 4 << lgAuxArrInts;
178 }
179 
180 template<typename A>
181 void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
182  const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
183  const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
184  if (index >= 0) {
185  throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
186  + std::to_string(slotNo) + ", Value: " + std::to_string(value));
187  }
188 
189  // found empty entry
190  entries[~index] = entry_pair;
191  ++auxCount;
192  checkGrow();
193 }
194 
195 template<typename A>
196 uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
197  const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
198  if (index >= 0) {
199  return HllUtil<A>::getValue(entries[index]);
200  }
201 
202  throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
203 }
204 
205 template<typename A>
206 void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
207  const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
208  if (idx >= 0) {
209  entries[idx] = HllUtil<A>::pair(slotNo, value);
210  return;
211  }
212 
213  throw std::invalid_argument("Pair not found: SlotNo: " + std::to_string(slotNo)
214  + ", Value: " + std::to_string(value));
215 }
216 
217 template<typename A>
218 void AuxHashMap<A>::checkGrow() {
219  if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
220  growAuxSpace();
221  }
222 }
223 
224 template<typename A>
225 void AuxHashMap<A>::growAuxSpace() {
226  const int configKmask = (1 << lgConfigK) - 1;
227  const int newArrLen = 1 << ++lgAuxArrInts;
228  vector_int entries_new(newArrLen, 0, entries.get_allocator());
229  for (size_t i = 0; i < entries.size(); ++i) {
230  const uint32_t fetched = entries[i];
231  if (fetched != hll_constants::EMPTY) {
232  // find empty in new array
233  const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
234  entries_new[~idx] = fetched;
235  }
236  }
237  entries = std::move(entries_new);
238 }
239 
240 //Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
241 //If entire entry is empty, returns one's complement of index = found empty.
242 //If entry contains given slotNo, returns its index = found slotNo.
243 //Continues searching.
244 //If the probe comes back to original index, throws an exception.
245 template<typename A>
246 int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
247  const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
248  const uint32_t configKmask = (1 << lgConfigK) - 1;
249  uint32_t probe = slotNo & auxArrMask;
250  const uint32_t loopIndex = probe;
251  do {
252  const uint32_t arrVal = auxArr[probe];
253  if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
254  return ~probe; //empty
255  }
256  else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
257  return probe; //found given slotNo, return probe = index into aux array
258  }
259  const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
260  probe = (probe + stride) & auxArrMask;
261  } while (probe != loopIndex);
262  throw std::runtime_error("Key not found and no empty slots!");
263 }
264 
265 template<typename A>
266 coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
267  return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
268 }
269 
270 template<typename A>
271 coupon_iterator<A> AuxHashMap<A>::end() const {
272  return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
273 }
274 
275 }
276 
277 #endif // _AUXHASHMAP_INTERNAL_HPP_
DataSketches namespace.
Definition: binomial_bounds.hpp:38