datasketches-cpp
bounds_on_ratios_in_sampled_sets.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
21 #define BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
22 
23 #include <cstdint>
24 #include <string>
25 #include <stdexcept>
26 
27 #include "bounds_binomial_proportions.hpp"
28 
29 namespace datasketches {
30 
44 public:
45  static constexpr double NUM_STD_DEVS = 2.0;
46 
56  static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
57  check_inputs(a, b, f);
58  if (a == 0) return 0.0;
59  if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
60  return bounds_binomial_proportions::approximate_lower_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
61  }
62 
70  static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
71  check_inputs(a, b, f);
72  if (a == 0) return 1.0;
73  if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
74  return bounds_binomial_proportions::approximate_upper_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
75  }
76 
83  static double get_estimate_of_b_over_a(uint64_t a, uint64_t b) {
84  check_inputs(a, b, 0.3);
85  if (a == 0) return 0.5;
86  return static_cast<double>(b) / static_cast<double>(a);
87  }
88 
95  static double estimate_of_a(uint64_t a, double f) {
96  check_inputs(a, 1, f);
97  return a / f;
98  }
99 
106  static double estimate_of_b(uint64_t b, double f) {
107  check_inputs(b + 1, b, f);
108  return b / f;
109  }
110 
111 private:
120  static double hacky_adjuster(double f) {
121  const double tmp = sqrt(1.0 - f);
122  return (f <= 0.5) ? tmp : tmp + (0.01 * (f - 0.5));
123  }
124 
125  static void check_inputs(uint64_t a, uint64_t b, double f) {
126  if (a < b) {
127  throw std::invalid_argument("a must be >= b: a = " + std::to_string(a) + ", b = " + std::to_string(b));
128  }
129  if ((f > 1.0) || (f <= 0.0)) {
130  throw std::invalid_argument("Required: ((f <= 1.0) && (f > 0.0)): " + std::to_string(f));
131  }
132  }
133 
134 };
135 
136 } /* namespace datasketches */
137 
138 # endif
static double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs)
Computes upper bound of approximate Clopper-Pearson confidence interval for a binomial proportion.
Definition: bounds_binomial_proportions.hpp:148
static double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs)
Computes lower bound of approximate Clopper-Pearson confidence interval for a binomial proportion.
Definition: bounds_binomial_proportions.hpp:113
Bounds on ratios in sampled sets.
Definition: bounds_on_ratios_in_sampled_sets.hpp:43
static double get_estimate_of_b_over_a(uint64_t a, uint64_t b)
Return the estimate of b over a.
Definition: bounds_on_ratios_in_sampled_sets.hpp:83
static double estimate_of_b(uint64_t b, double f)
Return the estimate of B.
Definition: bounds_on_ratios_in_sampled_sets.hpp:106
static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate upper bound based on a 95% confidence interval.
Definition: bounds_on_ratios_in_sampled_sets.hpp:70
static double estimate_of_a(uint64_t a, double f)
Return the estimate of A.
Definition: bounds_on_ratios_in_sampled_sets.hpp:95
static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate lower bound based on a 95% confidence interval.
Definition: bounds_on_ratios_in_sampled_sets.hpp:56
DataSketches namespace.
Definition: binomial_bounds.hpp:38