datasketches-cpp
Loading...
Searching...
No Matches
quantiles_sorted_view_impl.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef QUANTILES_SORTED_VIEW_IMPL_HPP_
21#define QUANTILES_SORTED_VIEW_IMPL_HPP_
22
23#include <algorithm>
24#include <stdexcept>
25#include <cmath>
26
27namespace datasketches {
28
29template<typename T, typename C, typename A>
30quantiles_sorted_view<T, C, A>::quantiles_sorted_view(uint32_t num, const C& comparator, const A& allocator):
31comparator_(comparator),
32total_weight_(0),
33entries_(allocator)
34{
35 entries_.reserve(num);
36}
37
38template<typename T, typename C, typename A>
39template<typename Iterator>
40void quantiles_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
41 const size_t size_before = entries_.size();
42 for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
43 if (size_before > 0) {
44 Container tmp(entries_.get_allocator());
45 tmp.reserve(entries_.capacity());
46 std::merge(
47 entries_.begin(), entries_.begin() + size_before,
48 entries_.begin() + size_before, entries_.end(),
49 std::back_inserter(tmp), compare_pairs_by_first(comparator_)
50 );
51 std::swap(tmp, entries_);
52 }
53}
54
55template<typename T, typename C, typename A>
56void quantiles_sorted_view<T, C, A>::convert_to_cummulative() {
57 for (auto& entry: entries_) {
58 total_weight_ += entry.second;
59 entry.second = total_weight_;
60 }
61}
63template<typename T, typename C, typename A>
64double quantiles_sorted_view<T, C, A>::get_rank(const T& item, bool inclusive) const {
65 if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
66 auto it = inclusive ?
67 std::upper_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_))
68 : std::lower_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_));
69 // we need item just before
70 if (it == entries_.begin()) return 0;
71 --it;
72 return static_cast<double>(it->second) / total_weight_;
74
75template<typename T, typename C, typename A>
76auto quantiles_sorted_view<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
77 if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
78 uint64_t weight = static_cast<uint64_t>(inclusive ? std::ceil(rank * total_weight_) : rank * total_weight_);
79 auto it = inclusive ?
80 std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second())
81 : std::upper_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
82 if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
83 return deref_helper(it->first);
84}
85
86template<typename T, typename C, typename A>
87auto quantiles_sorted_view<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
88 if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
89 check_split_points(split_points, size);
90 vector_double ranks(entries_.get_allocator());
91 ranks.reserve(size + 1);
92 for (uint32_t i = 0; i < size; ++i) ranks.push_back(get_rank(split_points[i], inclusive));
93 ranks.push_back(1);
94 return ranks;
95}
96
97template<typename T, typename C, typename A>
98auto quantiles_sorted_view<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
99 auto buckets = get_CDF(split_points, size, inclusive);
100 for (uint32_t i = size; i > 0; --i) {
101 buckets[i] -= buckets[i - 1];
102 }
103 return buckets;
104}
105
106template<typename T, typename C, typename A>
107auto quantiles_sorted_view<T, C, A>::begin() const -> const_iterator {
108 return const_iterator(entries_.begin(), entries_.begin());
109}
110
111template<typename T, typename C, typename A>
112auto quantiles_sorted_view<T, C, A>::end() const -> const_iterator {
113 return const_iterator(entries_.end(), entries_.begin());
114}
115
116template<typename T, typename C, typename A>
118 return entries_.size();
119}
120
121} /* namespace datasketches */
122
123#endif
Sorted view for quantiles sketches (REQ, KLL and Quantiles)
Definition quantiles_sorted_view.hpp:38
size_t size() const
Definition quantiles_sorted_view_impl.hpp:117
typename std::conditional< std::is_arithmetic< T >::value, T, const T & >::type quantile_return_type
Quantile return type.
Definition quantiles_sorted_view.hpp:93
typename std::conditional< std::is_arithmetic< T >::value, std::pair< T, uint64_t >, std::pair< const T *, uint64_t > >::type Entry
Entry type.
Definition quantiles_sorted_view.hpp:41
DataSketches namespace.
Definition binomial_bounds.hpp:38