datasketches-cpp
cpc_confidence.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 // author Kevin Lang, Oath Research
21 
22 #ifndef CPC_CONFIDENCE_HPP_
23 #define CPC_CONFIDENCE_HPP_
24 
25 #include <cmath>
26 #include <stdexcept>
27 
28 #include "cpc_sketch.hpp"
29 
30 namespace datasketches {
31 
32 // ln 2.0
33 static const double ICON_ERROR_CONSTANT = 0.693147180559945286;
34 
35 // 1, 2, 3, // kappa
36 static const int16_t ICON_LOW_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
37  6037, 5720, 5328, // 4 1000000
38  6411, 6262, 5682, // 5 1000000
39  6724, 6403, 6127, // 6 1000000
40  6665, 6411, 6208, // 7 1000000
41  6959, 6525, 6427, // 8 1000000
42  6892, 6665, 6619, // 9 1000000
43  6792, 6752, 6690, // 10 1000000
44  6899, 6818, 6708, // 11 1000000
45  6871, 6845, 6812, // 12 1046369
46  6909, 6861, 6828, // 13 1043411
47  6919, 6897, 6842, // 14 1000297
48 }; // lgK numtrials
49 
50 // 1, 2, 3, // kappa
51 static const int16_t ICON_HIGH_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
52  8031, 8559, 9309, // 4 1000000
53  7084, 7959, 8660, // 5 1000000
54  7141, 7514, 7876, // 6 1000000
55  7458, 7430, 7572, // 7 1000000
56  6892, 7141, 7497, // 8 1000000
57  6889, 7132, 7290, // 9 1000000
58  7075, 7118, 7185, // 10 1000000
59  7040, 7047, 7085, // 11 1000000
60  6993, 7019, 7053, // 12 1046369
61  6953, 7001, 6983, // 13 1043411
62  6944, 6966, 7004, // 14 1000297
63 }; // lgK numtrials
64 
65 // sqrt((ln 2.0) / 2.0)
66 static const double HIP_ERROR_CONSTANT = 0.588705011257737332;
67 
68 // 1, 2, 3, // kappa
69 static const int16_t HIP_LOW_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
70  5871, 5247, 4826, // 4 1000000
71  5877, 5403, 5070, // 5 1000000
72  5873, 5533, 5304, // 6 1000000
73  5878, 5632, 5464, // 7 1000000
74  5874, 5690, 5564, // 8 1000000
75  5880, 5745, 5619, // 9 1000000
76  5875, 5784, 5701, // 10 1000000
77  5866, 5789, 5742, // 11 1000000
78  5869, 5827, 5784, // 12 1046369
79  5876, 5860, 5827, // 13 1043411
80  5881, 5853, 5842, // 14 1000297
81 }; // lgK numtrials
82 
83 // 1, 2, 3, // kappa
84 static const int16_t HIP_HIGH_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
85  5855, 6688, 7391, // 4 1000000
86  5886, 6444, 6923, // 5 1000000
87  5885, 6254, 6594, // 6 1000000
88  5889, 6134, 6326, // 7 1000000
89  5900, 6072, 6203, // 8 1000000
90  5875, 6005, 6089, // 9 1000000
91  5871, 5980, 6040, // 10 1000000
92  5889, 5941, 6015, // 11 1000000
93  5871, 5926, 5973, // 12 1046369
94  5866, 5901, 5915, // 13 1043411
95  5880, 5914, 5953, // 14 1000297
96 }; // lgK numtrials
97 
98 template<typename A>
99 double get_icon_confidence_lb(const cpc_sketch_alloc<A>& sketch, int kappa) {
100  if (sketch.get_num_coupons() == 0) return 0.0;
101  const int lg_k = sketch.get_lg_k();
102  const long k = 1 << lg_k;
103  if (lg_k < 4) throw std::logic_error("lgk < 4");
104  if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
105  double x = ICON_ERROR_CONSTANT;
106  if (lg_k <= 14) x = ((double) ICON_HIGH_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
107  const double rel = x / sqrt(k);
108  const double eps = kappa * rel;
109  const double est = sketch.get_icon_estimate();
110  double result = est / (1.0 + eps);
111  const double check = sketch.get_num_coupons();
112  if (result < check) result = check;
113  return result;
114 }
115 
116 template<typename A>
117 double get_icon_confidence_ub(const cpc_sketch_alloc<A>& sketch, int kappa) {
118  if (sketch.get_num_coupons() == 0) return 0.0;
119  const int lg_k = sketch.get_lg_k();
120  const long k = 1 << lg_k;
121  if (lg_k < 4) throw std::logic_error("lgk < 4");
122  if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
123  double x = ICON_ERROR_CONSTANT;
124  if (lg_k <= 14) x = ((double) ICON_LOW_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
125  const double rel = x / sqrt(k);
126  const double eps = kappa * rel;
127  const double est = sketch.get_icon_estimate();
128  const double result = est / (1.0 - eps);
129  return ceil(result); // widening for coverage
130 }
131 
132 template<typename A>
133 double get_hip_confidence_lb(const cpc_sketch_alloc<A>& sketch, int kappa) {
134  if (sketch.get_num_coupons() == 0) return 0.0;
135  const int lg_k = sketch.get_lg_k();
136  const long k = 1 << lg_k;
137  if (lg_k < 4) throw std::logic_error("lgk < 4");
138  if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
139  double x = HIP_ERROR_CONSTANT;
140  if (lg_k <= 14) x = ((double) HIP_HIGH_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
141  const double rel = x / (sqrt((double) k));
142  const double eps = ((double) kappa) * rel;
143  const double est = sketch.get_hip_estimate();
144  double result = est / (1.0 + eps);
145  const double check = (double) sketch.get_num_coupons();
146  if (result < check) result = check;
147  return result;
148 }
149 
150 template<typename A>
151 double get_hip_confidence_ub(const cpc_sketch_alloc<A>& sketch, int kappa) {
152  if (sketch.get_num_coupons() == 0) return 0.0;
153  const int lg_k = sketch.get_lg_k();
154  const long k = 1 << lg_k;
155  if (lg_k < 4) throw std::logic_error("lgk < 4");
156  if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
157  double x = HIP_ERROR_CONSTANT;
158  if (lg_k <= 14) x = ((double) HIP_LOW_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
159  const double rel = x / sqrt(k);
160  const double eps = kappa * rel;
161  const double est = sketch.get_hip_estimate();
162  const double result = est / (1.0 - eps);
163  return ceil(result); // widening for coverage
164 }
165 
166 } /* namespace datasketches */
167 
168 #endif
DataSketches namespace.
Definition: binomial_bounds.hpp:38