datasketches-cpp
u32_table_impl.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 // author Kevin Lang, Oath Research
21 
22 #ifndef U32_TABLE_IMPL_HPP_
23 #define U32_TABLE_IMPL_HPP_
24 
25 #include <stdexcept>
26 #include <algorithm>
27 #include <climits>
28 
29 namespace datasketches {
30 
31 template<typename A>
32 u32_table<A>::u32_table(const A& allocator):
33 lg_size(0),
34 num_valid_bits(0),
35 num_items(0),
36 slots(allocator)
37 {}
38 
39 template<typename A>
40 u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator):
41 lg_size(lg_size),
42 num_valid_bits(num_valid_bits),
43 num_items(0),
44 slots(1ULL << lg_size, UINT32_MAX, allocator)
45 {
46  if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
47  if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
48 }
49 
50 template<typename A>
51 uint32_t u32_table<A>::get_num_items() const {
52  return num_items;
53 }
54 
55 template<typename A>
56 const uint32_t* u32_table<A>::get_slots() const {
57  return slots.data();
58 }
59 
60 template<typename A>
61 uint8_t u32_table<A>::get_lg_size() const {
62  return lg_size;
63 }
64 
65 template<typename A>
66 void u32_table<A>::clear() {
67  std::fill(slots.begin(), slots.end(), UINT32_MAX);
68  num_items = 0;
69 }
70 
71 template<typename A>
72 bool u32_table<A>::maybe_insert(uint32_t item) {
73  const uint32_t index = lookup(item);
74  if (slots[index] == item) return false;
75  if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
76  slots[index] = item;
77  num_items++;
78  if (U32_TABLE_UPSIZE_DENOM * num_items > U32_TABLE_UPSIZE_NUMER * (1 << lg_size)) {
79  rebuild(lg_size + 1);
80  }
81  return true;
82 }
83 
84 template<typename A>
85 bool u32_table<A>::maybe_delete(uint32_t item) {
86  const uint32_t index = lookup(item);
87  if (slots[index] == UINT32_MAX) return false;
88  if (slots[index] != item) throw std::logic_error("item does not exist");
89  if (num_items == 0) throw std::logic_error("delete error");
90  // delete the item
91  slots[index] = UINT32_MAX;
92  num_items--;
93 
94  // re-insert all items between the freed slot and the next empty slot
95  const size_t mask = (1 << lg_size) - 1;
96  size_t probe = (index + 1) & mask;
97  uint32_t fetched = slots[probe];
98  while (fetched != UINT32_MAX) {
99  slots[probe] = UINT32_MAX;
100  must_insert(fetched);
101  probe = (probe + 1) & mask;
102  fetched = slots[probe];
103  }
104  // shrink if necessary
105  if (U32_TABLE_DOWNSIZE_DENOM * num_items < U32_TABLE_DOWNSIZE_NUMER * (1 << lg_size) && lg_size > 2) {
106  rebuild(lg_size - 1);
107  }
108  return true;
109 }
110 
111 // this one is specifically tailored to be a part of fm85 decompression scheme
112 template<typename A>
113 u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator) {
114  uint8_t lg_num_slots = 2;
115  while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
116  u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
117  // Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
118  // However, we are starting out with the correct final table size, so the problem might not occur
119  for (size_t i = 0; i < num_pairs; i++) {
120  table.must_insert(pairs[i]);
121  }
122  table.num_items = num_pairs;
123  return table;
124 }
125 
126 template<typename A>
127 uint32_t u32_table<A>::lookup(uint32_t item) const {
128  const uint32_t size = 1 << lg_size;
129  const uint32_t mask = size - 1;
130  const uint8_t shift = num_valid_bits - lg_size;
131  uint32_t probe = item >> shift;
132  if (probe > mask) throw std::logic_error("probe out of range");
133  while (slots[probe] != item && slots[probe] != UINT32_MAX) {
134  probe = (probe + 1) & mask;
135  }
136  return probe;
137 }
138 
139 // counts and resizing must be handled by the caller
140 template<typename A>
141 void u32_table<A>::must_insert(uint32_t item) {
142  const uint32_t index = lookup(item);
143  if (slots[index] == item) throw std::logic_error("item exists");
144  if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
145  slots[index] = item;
146 }
147 
148 template<typename A>
149 void u32_table<A>::rebuild(uint8_t new_lg_size) {
150  if (new_lg_size < 2) throw std::logic_error("lg_size must be >= 2");
151  const uint32_t old_size = 1 << lg_size;
152  const uint32_t new_size = 1 << new_lg_size;
153  if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
154  vector_u32 old_slots = std::move(slots);
155  slots = vector_u32(new_size, UINT32_MAX, old_slots.get_allocator());
156  lg_size = new_lg_size;
157  for (uint32_t i = 0; i < old_size; i++) {
158  if (old_slots[i] != UINT32_MAX) {
159  must_insert(old_slots[i]);
160  }
161  }
162 }
163 
164 // While extracting the items from a linear probing hashtable,
165 // this will usually undo the wrap-around provided that the table
166 // isn't too full. Experiments suggest that for sufficiently large tables
167 // the load factor would have to be over 90 percent before this would fail frequently,
168 // and even then the subsequent sort would fix things up.
169 // The result is nearly sorted, so make sure to use an efficient sort for that case
170 template<typename A>
171 auto u32_table<A>::unwrapping_get_items() const -> vector_u32 {
172  if (num_items == 0) return vector_u32(slots.get_allocator());
173  const uint32_t table_size = 1 << lg_size;
174  vector_u32 result(num_items, 0, slots.get_allocator());
175  size_t i = 0;
176  size_t l = 0;
177  size_t r = num_items - 1;
178 
179  // special rules for the region before the first empty slot
180  uint32_t hi_bit = 1 << (num_valid_bits - 1);
181  while (i < table_size && slots[i] != UINT32_MAX) {
182  const uint32_t item = slots[i++];
183  if (item & hi_bit) { result[r--] = item; } // this item was probably wrapped, so move to end
184  else { result[l++] = item; }
185  }
186 
187  // the rest of the table is processed normally
188  while (i < table_size) {
189  const uint32_t item = slots[i++];
190  if (item != UINT32_MAX) result[l++] = item;
191  }
192  if (l != r + 1) throw std::logic_error("unwrapping error");
193  return result;
194 }
195 
196 // This merge is safe to use in carefully designed overlapping scenarios.
197 template<typename A>
198 void u32_table<A>::merge(
199  const uint32_t* arr_a, size_t start_a, size_t length_a, // input
200  const uint32_t* arr_b, size_t start_b, size_t length_b, // input
201  uint32_t* arr_c, size_t start_c // output
202 ) {
203  const size_t length_c = length_a + length_b;
204  const size_t lim_a = start_a + length_a;
205  const size_t lim_b = start_b + length_b;
206  const size_t lim_c = start_c + length_c;
207  size_t a = start_a;
208  size_t b = start_b;
209  size_t c = start_c;
210  for ( ; c < lim_c ; c++) {
211  if (b >= lim_b) { arr_c[c] = arr_a[a++]; }
212  else if (a >= lim_a) { arr_c[c] = arr_b[b++]; }
213  else if (arr_a[a] < arr_b[b]) { arr_c[c] = arr_a[a++]; }
214  else { arr_c[c] = arr_b[b++]; }
215  }
216  if (a != lim_a || b != lim_b) throw std::logic_error("merging error");
217 }
218 
219 // In applications where the input array is already nearly sorted,
220 // insertion sort runs in linear time with a very small constant.
221 // This introspective version of insertion sort protects against
222 // the quadratic cost of sorting bad input arrays.
223 // It keeps track of how much work has been done, and if that exceeds a
224 // constant times the array length, it switches to a different sorting algorithm.
225 
226 template<typename A>
227 void u32_table<A>::introspective_insertion_sort(uint32_t* a, size_t l, size_t r) { // r points past the rightmost element
228  const size_t length = r - l;
229  const size_t cost_limit = 8 * length;
230  size_t cost = 0;
231  for (size_t i = l + 1; i < r; i++) {
232  size_t j = i;
233  uint32_t v = a[i];
234  while (j >= l + 1 && v < a[j - 1]) {
235  a[j] = a[j - 1];
236  j--;
237  }
238  a[j] = v;
239  cost += i - j; // distance moved is a measure of work
240  if (cost > cost_limit) {
241  knuth_shell_sort3(a, l, r);
242  return;
243  }
244  }
245 }
246 
247 template<typename A>
248 void u32_table<A>::knuth_shell_sort3(uint32_t* a, size_t l, size_t r) {
249  size_t h;
250  for (h = 1; h < (r - l) / 9; h = 3 * h + 1);
251  for ( ; h > 0; h /= 3) {
252  for (size_t i = l + h; i < r; i++) {
253  size_t j = i;
254  const uint32_t v = a[i];
255  while (j >= l + h && v < a[j - h]) {
256  a[j] = a[j - h];
257  j -= h;
258  }
259  a[j] = v;
260  }
261  }
262 }
263 
264 } /* namespace datasketches */
265 
266 #endif
DataSketches namespace.
Definition: binomial_bounds.hpp:38