datasketches-cpp
Loading...
Searching...
No Matches
bounds_on_ratios_in_theta_sketched_sets.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
21#define BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
22
23#include <cstdint>
24#include <stdexcept>
25
26#include "bounds_on_ratios_in_sampled_sets.hpp"
27
28namespace datasketches {
29
49template<typename ExtractKey>
51public:
58 template<typename SketchA, typename SketchB>
59 static double lower_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
60 const uint64_t theta64_a = sketch_a.get_theta64();
61 const uint64_t theta64_b = sketch_b.get_theta64();
62 check_thetas(theta64_a, theta64_b);
63
64 const uint64_t count_b = sketch_b.get_num_retained();
65 const uint64_t count_a = theta64_a == theta64_b
66 ? sketch_a.get_num_retained()
67 : count_less_than_theta64(sketch_a, theta64_b);
68
69 if (count_a == 0) return 0;
70 const double f = sketch_b.get_theta();
72 }
73
80 template<typename SketchA, typename SketchB>
81 static double upper_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
82 const uint64_t theta64_a = sketch_a.get_theta64();
83 const uint64_t theta64_b = sketch_b.get_theta64();
84 check_thetas(theta64_a, theta64_b);
85
86 const uint64_t count_b = sketch_b.get_num_retained();
87 const uint64_t count_a = (theta64_a == theta64_b)
88 ? sketch_a.get_num_retained()
89 : count_less_than_theta64(sketch_a, theta64_b);
90
91 if (count_a == 0) return 1;
92 const double f = sketch_b.get_theta();
94 }
95
102 template<typename SketchA, typename SketchB>
103 static double estimate_of_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
104 const uint64_t theta64_a = sketch_a.get_theta64();
105 const uint64_t theta64_b = sketch_b.get_theta64();
106 check_thetas(theta64_a, theta64_b);
107
108 const uint64_t count_b = sketch_b.get_num_retained();
109 const uint64_t count_a = (theta64_a == theta64_b)
110 ? sketch_a.get_num_retained()
111 : count_less_than_theta64(sketch_a, theta64_b);
112
113 if (count_a == 0) return 0.5;
114 return static_cast<double>(count_b) / static_cast<double>(count_a);
115 }
116
117private:
118
119 static inline void check_thetas(uint64_t theta_a, uint64_t theta_b) {
120 if (theta_b > theta_a) {
121 throw std::invalid_argument("theta_a must be <= theta_b");
122 }
123 }
124
125 template<typename Sketch>
126 static uint64_t count_less_than_theta64(const Sketch& sketch, uint64_t theta) {
127 uint64_t count = 0;
128 for (const auto& entry: sketch) if (ExtractKey()(entry) < theta) ++count;
129 return count;
130 }
131
132};
133
134} /* namespace datasketches */
135
136# endif
static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate upper bound based on a 95% confidence interval.
Definition bounds_on_ratios_in_sampled_sets.hpp:70
static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate lower bound based on a 95% confidence interval.
Definition bounds_on_ratios_in_sampled_sets.hpp:56
Bounds on ratios in Theta sketched sets.
Definition bounds_on_ratios_in_theta_sketched_sets.hpp:50
static double estimate_of_b_over_a(const SketchA &sketch_a, const SketchB &sketch_b)
Gets the estimate for B over A.
Definition bounds_on_ratios_in_theta_sketched_sets.hpp:103
static double upper_bound_for_b_over_a(const SketchA &sketch_a, const SketchB &sketch_b)
Gets the approximate upper bound for B over A based on a 95% confidence interval.
Definition bounds_on_ratios_in_theta_sketched_sets.hpp:81
static double lower_bound_for_b_over_a(const SketchA &sketch_a, const SketchB &sketch_b)
Gets the approximate lower bound for B over A based on a 95% confidence interval.
Definition bounds_on_ratios_in_theta_sketched_sets.hpp:59
DataSketches namespace.
Definition binomial_bounds.hpp:38