datasketches-cpp
quantiles_sorted_view_impl.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef QUANTILES_SORTED_VIEW_IMPL_HPP_
21 #define QUANTILES_SORTED_VIEW_IMPL_HPP_
22 
23 #include <algorithm>
24 #include <stdexcept>
25 #include <cmath>
26 
27 namespace datasketches {
28 
29 template<typename T, typename C, typename A>
30 quantiles_sorted_view<T, C, A>::quantiles_sorted_view(uint32_t num, const C& comparator, const A& allocator):
31 comparator_(comparator),
32 total_weight_(0),
33 entries_(allocator)
34 {
35  entries_.reserve(num);
36 }
37 
38 template<typename T, typename C, typename A>
39 template<typename Iterator>
40 void quantiles_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
41  const size_t size_before = entries_.size();
42  for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
43  if (size_before > 0) {
44  Container tmp(entries_.get_allocator());
45  tmp.reserve(entries_.capacity());
46  std::merge(
47  entries_.begin(), entries_.begin() + size_before,
48  entries_.begin() + size_before, entries_.end(),
49  std::back_inserter(tmp), compare_pairs_by_first(comparator_)
50  );
51  std::swap(tmp, entries_);
52  }
53 }
54 
55 template<typename T, typename C, typename A>
56 void quantiles_sorted_view<T, C, A>::convert_to_cummulative() {
57  for (auto& entry: entries_) {
58  total_weight_ += entry.second;
59  entry.second = total_weight_;
60  }
61 }
62 
63 template<typename T, typename C, typename A>
64 double quantiles_sorted_view<T, C, A>::get_rank(const T& item, bool inclusive) const {
65  if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
66  auto it = inclusive ?
67  std::upper_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_))
68  : std::lower_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_));
69  // we need item just before
70  if (it == entries_.begin()) return 0;
71  --it;
72  return static_cast<double>(it->second) / total_weight_;
73 }
74 
75 template<typename T, typename C, typename A>
76 auto quantiles_sorted_view<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
77  if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
78  uint64_t weight = static_cast<uint64_t>(inclusive ? std::ceil(rank * total_weight_) : rank * total_weight_);
79  auto it = inclusive ?
80  std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second())
81  : std::upper_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
82  if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
83  return deref_helper(it->first);
84 }
85 
86 template<typename T, typename C, typename A>
87 auto quantiles_sorted_view<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
88  if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
89  check_split_points(split_points, size);
90  vector_double ranks(entries_.get_allocator());
91  ranks.reserve(size + 1);
92  for (uint32_t i = 0; i < size; ++i) ranks.push_back(get_rank(split_points[i], inclusive));
93  ranks.push_back(1);
94  return ranks;
95 }
96 
97 template<typename T, typename C, typename A>
98 auto quantiles_sorted_view<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
99  auto buckets = get_CDF(split_points, size, inclusive);
100  for (uint32_t i = size; i > 0; --i) {
101  buckets[i] -= buckets[i - 1];
102  }
103  return buckets;
104 }
105 
106 template<typename T, typename C, typename A>
107 auto quantiles_sorted_view<T, C, A>::begin() const -> const_iterator {
108  return const_iterator(entries_.begin(), entries_.begin());
109 }
110 
111 template<typename T, typename C, typename A>
112 auto quantiles_sorted_view<T, C, A>::end() const -> const_iterator {
113  return const_iterator(entries_.end(), entries_.begin());
114 }
115 
116 template<typename T, typename C, typename A>
118  return entries_.size();
119 }
120 
121 } /* namespace datasketches */
122 
123 #endif
Sorted view for quantiles sketches (REQ, KLL and Quantiles)
Definition: quantiles_sorted_view.hpp:38
size_t size() const
Definition: quantiles_sorted_view_impl.hpp:117
typename std::conditional< std::is_arithmetic< T >::value, T, const T & >::type quantile_return_type
Quantile return type.
Definition: quantiles_sorted_view.hpp:93
typename std::conditional< std::is_arithmetic< T >::value, std::pair< T, uint64_t >, std::pair< const T *, uint64_t > >::type Entry
Entry type.
Definition: quantiles_sorted_view.hpp:41
DataSketches namespace.
Definition: binomial_bounds.hpp:38