datasketches-cpp
quantiles_sorted_view.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef QUANTILES_SORTED_VIEW_HPP_
21 #define QUANTILES_SORTED_VIEW_HPP_
22 
23 #include <functional>
24 #include <cmath>
25 
26 #include "common_defs.hpp"
27 
28 namespace datasketches {
29 
33 template<
34  typename T,
35  typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
36  typename Allocator
37 >
39 public:
41  using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
42  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
43  using Container = std::vector<Entry, AllocEntry>;
44 
46  quantiles_sorted_view(uint32_t num, const Comparator& comparator, const Allocator& allocator);
47 
49  template<typename Iterator>
50  void add(Iterator begin, Iterator end, uint64_t weight);
51 
53  void convert_to_cummulative();
54 
55  class const_iterator;
56 
62  const_iterator begin() const;
63 
70  const_iterator end() const;
71 
73  size_t size() const;
74 
87  double get_rank(const T& item, bool inclusive = true) const;
88 
93  using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
94 
106  quantile_return_type get_quantile(double rank, bool inclusive = true) const;
107 
108  using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
109 
132  vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
133 
153  vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
154 
155 private:
156  Comparator comparator_;
157  uint64_t total_weight_;
158  Container entries_;
159 
160  static inline const T& deref_helper(const T* t) { return *t; }
161  static inline T deref_helper(T t) { return t; }
162 
163  struct compare_pairs_by_first {
164  explicit compare_pairs_by_first(const Comparator& comparator): comparator_(comparator) {}
165  bool operator()(const Entry& a, const Entry& b) const {
166  return comparator_(deref_helper(a.first), deref_helper(b.first));
167  }
168  Comparator comparator_;
169  };
170 
171  struct compare_pairs_by_second {
172  bool operator()(const Entry& a, const Entry& b) const {
173  return a.second < b.second;
174  }
175  };
176 
177  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
178  static inline T ref_helper(const T& t) { return t; }
179 
180  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
181  static inline const T* ref_helper(const T& t) { return std::addressof(t); }
182 
183  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
184  static inline Entry make_dummy_entry(uint64_t weight) { return Entry(0, weight); }
185 
186  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
187  static inline Entry make_dummy_entry(uint64_t weight) { return Entry(nullptr, weight); }
188 
189  template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
190  static inline void check_split_points(const T* items, uint32_t size) {
191  for (uint32_t i = 0; i < size ; i++) {
192  if (std::isnan(items[i])) {
193  throw std::invalid_argument("Values must not be NaN");
194  }
195  if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
196  throw std::invalid_argument("Values must be unique and monotonically increasing");
197  }
198  }
199  }
200 
201  template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
202  static inline void check_split_points(const T* items, uint32_t size) {
203  for (uint32_t i = 0; i < size ; i++) {
204  if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
205  throw std::invalid_argument("Items must be unique and monotonically increasing");
206  }
207  }
208  }
209 };
210 
211 template<typename T, typename C, typename A>
212 class quantiles_sorted_view<T, C, A>::const_iterator: public quantiles_sorted_view<T, C, A>::Container::const_iterator {
213 public:
214  using Base = typename quantiles_sorted_view<T, C, A>::Container::const_iterator;
215  using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;
216 
217  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
218  const value_type operator*() const { return Base::operator*(); }
219 
220  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
221  const value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }
222 
223  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
224  const value_type* operator->() const { return Base::operator->(); }
225 
226  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
227  const return_value_holder<value_type> operator->() const { return **this; }
228 
229  uint64_t get_weight() const {
230  if (*this == begin) return Base::operator*().second;
231  return Base::operator*().second - (*this - 1).operator*().second;
232  }
233 
234  uint64_t get_cumulative_weight(bool inclusive = true) const {
235  return inclusive ? Base::operator*().second : Base::operator*().second - get_weight();
236  }
237 
238 private:
239  Base begin;
240 
241  friend class quantiles_sorted_view<T, C, A>;
242  const_iterator(const Base& it, const Base& begin): Base(it), begin(begin) {}
243 };
244 
245 } /* namespace datasketches */
246 
247 #include "quantiles_sorted_view_impl.hpp"
248 
249 #endif
Sorted view for quantiles sketches (REQ, KLL and Quantiles)
Definition: quantiles_sorted_view.hpp:38
vector_double get_CDF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analo...
Definition: quantiles_sorted_view_impl.hpp:87
size_t size() const
Definition: quantiles_sorted_view_impl.hpp:117
vector_double get_PMF(const T *split_points, uint32_t size, bool inclusive=true) const
Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of sp...
Definition: quantiles_sorted_view_impl.hpp:98
typename std::conditional< std::is_arithmetic< T >::value, T, const T & >::type quantile_return_type
Quantile return type.
Definition: quantiles_sorted_view.hpp:93
double get_rank(const T &item, bool inclusive=true) const
Returns an approximation to the normalized rank of the given item.
Definition: quantiles_sorted_view_impl.hpp:64
quantile_return_type get_quantile(double rank, bool inclusive=true) const
Returns an item from the sketch that is the best approximation to an item from the original stream wi...
Definition: quantiles_sorted_view_impl.hpp:76
const_iterator begin() const
Iterator pointing to the first entry in the view.
Definition: quantiles_sorted_view_impl.hpp:107
const_iterator end() const
Iterator pointing to the past-the-end entry in the view.
Definition: quantiles_sorted_view_impl.hpp:112
typename std::conditional< std::is_arithmetic< T >::value, std::pair< T, uint64_t >, std::pair< const T *, uint64_t > >::type Entry
Entry type.
Definition: quantiles_sorted_view.hpp:41
DataSketches namespace.
Definition: binomial_bounds.hpp:38