datasketches-cpp
bounds_on_ratios_in_theta_sketched_sets.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
21 #define BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
22 
23 #include <cstdint>
24 #include <stdexcept>
25 
26 #include "bounds_on_ratios_in_sampled_sets.hpp"
27 
28 namespace datasketches {
29 
49 template<typename ExtractKey>
51 public:
58  template<typename SketchA, typename SketchB>
59  static double lower_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
60  const uint64_t theta64_a = sketch_a.get_theta64();
61  const uint64_t theta64_b = sketch_b.get_theta64();
62  check_thetas(theta64_a, theta64_b);
63 
64  const uint64_t count_b = sketch_b.get_num_retained();
65  const uint64_t count_a = theta64_a == theta64_b
66  ? sketch_a.get_num_retained()
67  : count_less_than_theta64(sketch_a, theta64_b);
68 
69  if (count_a == 0) return 0;
70  const double f = sketch_b.get_theta();
72  }
73 
80  template<typename SketchA, typename SketchB>
81  static double upper_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
82  const uint64_t theta64_a = sketch_a.get_theta64();
83  const uint64_t theta64_b = sketch_b.get_theta64();
84  check_thetas(theta64_a, theta64_b);
85 
86  const uint64_t count_b = sketch_b.get_num_retained();
87  const uint64_t count_a = (theta64_a == theta64_b)
88  ? sketch_a.get_num_retained()
89  : count_less_than_theta64(sketch_a, theta64_b);
90 
91  if (count_a == 0) return 1;
92  const double f = sketch_b.get_theta();
94  }
95 
102  template<typename SketchA, typename SketchB>
103  static double estimate_of_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
104  const uint64_t theta64_a = sketch_a.get_theta64();
105  const uint64_t theta64_b = sketch_b.get_theta64();
106  check_thetas(theta64_a, theta64_b);
107 
108  const uint64_t count_b = sketch_b.get_num_retained();
109  const uint64_t count_a = (theta64_a == theta64_b)
110  ? sketch_a.get_num_retained()
111  : count_less_than_theta64(sketch_a, theta64_b);
112 
113  if (count_a == 0) return 0.5;
114  return static_cast<double>(count_b) / static_cast<double>(count_a);
115  }
116 
117 private:
118 
119  static inline void check_thetas(uint64_t theta_a, uint64_t theta_b) {
120  if (theta_b > theta_a) {
121  throw std::invalid_argument("theta_a must be <= theta_b");
122  }
123  }
124 
125  template<typename Sketch>
126  static uint64_t count_less_than_theta64(const Sketch& sketch, uint64_t theta) {
127  uint64_t count = 0;
128  for (const auto& entry: sketch) if (ExtractKey()(entry) < theta) ++count;
129  return count;
130  }
131 
132 };
133 
134 } /* namespace datasketches */
135 
136 # endif
static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate upper bound based on a 95% confidence interval.
Definition: bounds_on_ratios_in_sampled_sets.hpp:70
static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f)
Return the approximate lower bound based on a 95% confidence interval.
Definition: bounds_on_ratios_in_sampled_sets.hpp:56
Bounds on ratios in Theta sketched sets.
Definition: bounds_on_ratios_in_theta_sketched_sets.hpp:50
static double estimate_of_b_over_a(const SketchA &sketch_a, const SketchB &sketch_b)
Gets the estimate for B over A.
Definition: bounds_on_ratios_in_theta_sketched_sets.hpp:103
static double upper_bound_for_b_over_a(const SketchA &sketch_a, const SketchB &sketch_b)
Gets the approximate upper bound for B over A based on a 95% confidence interval.
Definition: bounds_on_ratios_in_theta_sketched_sets.hpp:81
static double lower_bound_for_b_over_a(const SketchA &sketch_a, const SketchB &sketch_b)
Gets the approximate lower bound for B over A based on a 95% confidence interval.
Definition: bounds_on_ratios_in_theta_sketched_sets.hpp:59
DataSketches namespace.
Definition: binomial_bounds.hpp:38