datasketches-cpp
Loading...
Searching...
No Matches
bounds_on_ratios_in_sampled_sets.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
21#define BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
22
23#include <cstdint>
24#include <string>
25#include <stdexcept>
26
27#include "bounds_binomial_proportions.hpp"
28
29namespace datasketches {
30
44public:
45 static constexpr double NUM_STD_DEVS = 2.0;
46
56 static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
57 check_inputs(a, b, f);
58 if (a == 0) return 0.0;
59 if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
60 return bounds_binomial_proportions::approximate_lower_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
61 }
62
70 static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
71 check_inputs(a, b, f);
72 if (a == 0) return 1.0;
73 if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
74 return bounds_binomial_proportions::approximate_upper_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
75 }
76
83 static double get_estimate_of_b_over_a(uint64_t a, uint64_t b) {
84 check_inputs(a, b, 0.3);
85 if (a == 0) return 0.5;
86 return static_cast<double>(b) / static_cast<double>(a);
87 }
88
95 static double estimate_of_a(uint64_t a, double f) {
96 check_inputs(a, 1, f);
97 return a / f;
98 }
99
106 static double estimate_of_b(uint64_t b, double f) {
107 check_inputs(b + 1, b, f);
108 return b / f;
109 }
110
111private:
120 static double hacky_adjuster(double f) {
121 const double tmp = sqrt(1.0 - f);
122 return (f <= 0.5) ? tmp : tmp + (0.01 * (f - 0.5));
123 }
124
125 static void check_inputs(uint64_t a, uint64_t b, double f) {
126 if (a < b) {
127 throw std::invalid_argument("a must be >= b: a = " + std::to_string(a) + ", b = " + std::to_string(b));
128 }
129 if ((f > 1.0) || (f <= 0.0)) {
130 throw std::invalid_argument("Required: ((f <= 1.0) && (f > 0.0)): " + std::to_string(f));
131 }
132 }
133
134};
135
136} /* namespace datasketches */
137
138# endif
static double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs)
Computes upper bound of approximate Clopper-Pearson confidence interval for a binomial proportion.
Definition bounds_binomial_proportions.hpp:148
static double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs)
Computes lower bound of approximate Clopper-Pearson confidence interval for a binomial proportion.
Definition bounds_binomial_proportions.hpp:113
Bounds on ratios in sampled sets.
Definition bounds_on_ratios_in_sampled_sets.hpp:43
static double get_estimate_of_b_over_a(uint64_t a, uint64_t b)
Return the estimate of b over a.
Definition bounds_on_ratios_in_sampled_sets.hpp:83
static double estimate_of_b(uint64_t b, double f)
Return the estimate of B.
Definition bounds_on_ratios_in_sampled_sets.hpp:106
static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate upper bound based on a 95% confidence interval.
Definition bounds_on_ratios_in_sampled_sets.hpp:70
static double estimate_of_a(uint64_t a, double f)
Return the estimate of A.
Definition bounds_on_ratios_in_sampled_sets.hpp:95
static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate lower bound based on a 95% confidence interval.
Definition bounds_on_ratios_in_sampled_sets.hpp:56
DataSketches namespace.
Definition binomial_bounds.hpp:38