datasketches-cpp
kll_helper.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef KLL_HELPER_HPP_
21 #define KLL_HELPER_HPP_
22 
23 #include <stdexcept>
24 
25 namespace datasketches {
26 
27 #ifdef KLL_VALIDATION
28 extern uint32_t kll_next_offset;
29 #endif
30 
31 // 0 <= power <= 30
32 static const uint64_t powers_of_three[] = {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441,
33 1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467,
34 3486784401, 10460353203, 31381059609, 94143178827, 282429536481,
35 847288609443, 2541865828329, 7625597484987, 22876792454961, 68630377364883,
36 205891132094649};
37 
38 class kll_helper {
39  public:
40  static inline bool is_even(uint32_t value);
41  static inline bool is_odd(uint32_t value);
42  static inline uint8_t floor_of_log2_of_fraction(uint64_t numer, uint64_t denom);
43  static inline uint8_t ub_on_num_levels(uint64_t n);
44  static inline uint32_t compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels);
45  static inline uint16_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
46  static inline uint16_t int_cap_aux(uint16_t k, uint8_t depth);
47  static inline uint16_t int_cap_aux_aux(uint16_t k, uint8_t depth);
48  static inline uint64_t sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels);
49 
50  template <typename T>
51  static void randomly_halve_down(T* buf, uint32_t start, uint32_t length);
52 
53  template <typename T>
54  static void randomly_halve_up(T* buf, uint32_t start, uint32_t length);
55 
56  // this version moves objects within the same buffer
57  // assumes that destination has initialized objects
58  // does not destroy the originals after the move
59  template <typename T, typename C>
60  static void merge_sorted_arrays(T* buf, uint32_t start_a, uint32_t len_a, uint32_t start_b, uint32_t len_b, uint32_t start_c);
61 
62  // this version is to merge from two different buffers into a third buffer
63  // initializes objects is the destination buffer
64  // moves objects from buf_a and destroys the originals
65  // copies objects from buf_b
66  template <typename T, typename C>
67  static void merge_sorted_arrays(const T* buf_a, uint32_t start_a, uint32_t len_a, const T* buf_b, uint32_t start_b, uint32_t len_b, T* buf_c, uint32_t start_c);
68 
69  struct compress_result {
70  uint8_t final_num_levels;
71  uint32_t final_capacity;
72  uint32_t final_num_items;
73  };
74 
75  /*
76  * Here is what we do for each level:
77  * If it does not need to be compacted, then simply copy it over.
78  *
79  * Otherwise, it does need to be compacted, so...
80  * Copy zero or one guy over.
81  * If the level above is empty, halve up.
82  * Else the level above is nonempty, so...
83  * halve down, then merge up.
84  * Adjust the boundaries of the level above.
85  *
86  * It can be proved that general_compress returns a sketch that satisfies the space constraints
87  * no matter how much data is passed in.
88  * All levels except for level zero must be sorted before calling this, and will still be
89  * sorted afterwards.
90  * Level zero is not required to be sorted before, and may not be sorted afterwards.
91  */
92  template <typename T, typename C>
93  static compress_result general_compress(uint16_t k, uint8_t m, uint8_t num_levels_in, T* items,
94  uint32_t* in_levels, uint32_t* out_levels, bool is_level_zero_sorted);
95 
96  template<typename T>
97  static void copy_construct(const T* src, size_t src_first, size_t src_last, T* dst, size_t dst_first);
98 
99  template<typename T>
100  static void move_construct(T* src, size_t src_first, size_t src_last, T* dst, size_t dst_first, bool destroy);
101 
102 #ifdef KLL_VALIDATION
103  private:
104 
105  static inline uint32_t deterministic_offset();
106 #endif
107 
108 };
109 
110 } /* namespace datasketches */
111 
112 #include "kll_helper_impl.hpp"
113 
114 #endif // KLL_HELPER_HPP_
DataSketches namespace.
Definition: binomial_bounds.hpp:38