datasketches-cpp
Loading...
Searching...
No Matches
cpc_confidence.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20// author Kevin Lang, Oath Research
21
22#ifndef CPC_CONFIDENCE_HPP_
23#define CPC_CONFIDENCE_HPP_
24
25#include <cmath>
26#include <stdexcept>
27
28#include "cpc_sketch.hpp"
29
30namespace datasketches {
31
32// ln 2.0
33static const double ICON_ERROR_CONSTANT = 0.693147180559945286;
34
35// 1, 2, 3, // kappa
36static const int16_t ICON_LOW_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
37 6037, 5720, 5328, // 4 1000000
38 6411, 6262, 5682, // 5 1000000
39 6724, 6403, 6127, // 6 1000000
40 6665, 6411, 6208, // 7 1000000
41 6959, 6525, 6427, // 8 1000000
42 6892, 6665, 6619, // 9 1000000
43 6792, 6752, 6690, // 10 1000000
44 6899, 6818, 6708, // 11 1000000
45 6871, 6845, 6812, // 12 1046369
46 6909, 6861, 6828, // 13 1043411
47 6919, 6897, 6842, // 14 1000297
48}; // lgK numtrials
49
50// 1, 2, 3, // kappa
51static const int16_t ICON_HIGH_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
52 8031, 8559, 9309, // 4 1000000
53 7084, 7959, 8660, // 5 1000000
54 7141, 7514, 7876, // 6 1000000
55 7458, 7430, 7572, // 7 1000000
56 6892, 7141, 7497, // 8 1000000
57 6889, 7132, 7290, // 9 1000000
58 7075, 7118, 7185, // 10 1000000
59 7040, 7047, 7085, // 11 1000000
60 6993, 7019, 7053, // 12 1046369
61 6953, 7001, 6983, // 13 1043411
62 6944, 6966, 7004, // 14 1000297
63}; // lgK numtrials
64
65// sqrt((ln 2.0) / 2.0)
66static const double HIP_ERROR_CONSTANT = 0.588705011257737332;
67
68// 1, 2, 3, // kappa
69static const int16_t HIP_LOW_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
70 5871, 5247, 4826, // 4 1000000
71 5877, 5403, 5070, // 5 1000000
72 5873, 5533, 5304, // 6 1000000
73 5878, 5632, 5464, // 7 1000000
74 5874, 5690, 5564, // 8 1000000
75 5880, 5745, 5619, // 9 1000000
76 5875, 5784, 5701, // 10 1000000
77 5866, 5789, 5742, // 11 1000000
78 5869, 5827, 5784, // 12 1046369
79 5876, 5860, 5827, // 13 1043411
80 5881, 5853, 5842, // 14 1000297
81}; // lgK numtrials
82
83// 1, 2, 3, // kappa
84static const int16_t HIP_HIGH_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
85 5855, 6688, 7391, // 4 1000000
86 5886, 6444, 6923, // 5 1000000
87 5885, 6254, 6594, // 6 1000000
88 5889, 6134, 6326, // 7 1000000
89 5900, 6072, 6203, // 8 1000000
90 5875, 6005, 6089, // 9 1000000
91 5871, 5980, 6040, // 10 1000000
92 5889, 5941, 6015, // 11 1000000
93 5871, 5926, 5973, // 12 1046369
94 5866, 5901, 5915, // 13 1043411
95 5880, 5914, 5953, // 14 1000297
96}; // lgK numtrials
97
98template<typename A>
99double get_icon_confidence_lb(const cpc_sketch_alloc<A>& sketch, int kappa) {
100 if (sketch.get_num_coupons() == 0) return 0.0;
101 const int lg_k = sketch.get_lg_k();
102 const long k = 1 << lg_k;
103 if (lg_k < 4) throw std::logic_error("lgk < 4");
104 if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
105 double x = ICON_ERROR_CONSTANT;
106 if (lg_k <= 14) x = ((double) ICON_HIGH_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
107 const double rel = x / sqrt(k);
108 const double eps = kappa * rel;
109 const double est = sketch.get_icon_estimate();
110 double result = est / (1.0 + eps);
111 const double check = sketch.get_num_coupons();
112 if (result < check) result = check;
113 return result;
114}
115
116template<typename A>
117double get_icon_confidence_ub(const cpc_sketch_alloc<A>& sketch, int kappa) {
118 if (sketch.get_num_coupons() == 0) return 0.0;
119 const int lg_k = sketch.get_lg_k();
120 const long k = 1 << lg_k;
121 if (lg_k < 4) throw std::logic_error("lgk < 4");
122 if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
123 double x = ICON_ERROR_CONSTANT;
124 if (lg_k <= 14) x = ((double) ICON_LOW_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
125 const double rel = x / sqrt(k);
126 const double eps = kappa * rel;
127 const double est = sketch.get_icon_estimate();
128 const double result = est / (1.0 - eps);
129 return ceil(result); // widening for coverage
130}
131
132template<typename A>
133double get_hip_confidence_lb(const cpc_sketch_alloc<A>& sketch, int kappa) {
134 if (sketch.get_num_coupons() == 0) return 0.0;
135 const int lg_k = sketch.get_lg_k();
136 const long k = 1 << lg_k;
137 if (lg_k < 4) throw std::logic_error("lgk < 4");
138 if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
139 double x = HIP_ERROR_CONSTANT;
140 if (lg_k <= 14) x = ((double) HIP_HIGH_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
141 const double rel = x / (sqrt((double) k));
142 const double eps = ((double) kappa) * rel;
143 const double est = sketch.get_hip_estimate();
144 double result = est / (1.0 + eps);
145 const double check = (double) sketch.get_num_coupons();
146 if (result < check) result = check;
147 return result;
148}
149
150template<typename A>
151double get_hip_confidence_ub(const cpc_sketch_alloc<A>& sketch, int kappa) {
152 if (sketch.get_num_coupons() == 0) return 0.0;
153 const int lg_k = sketch.get_lg_k();
154 const long k = 1 << lg_k;
155 if (lg_k < 4) throw std::logic_error("lgk < 4");
156 if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
157 double x = HIP_ERROR_CONSTANT;
158 if (lg_k <= 14) x = ((double) HIP_LOW_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
159 const double rel = x / sqrt(k);
160 const double eps = kappa * rel;
161 const double est = sketch.get_hip_estimate();
162 const double result = est / (1.0 - eps);
163 return ceil(result); // widening for coverage
164}
165
166} /* namespace datasketches */
167
168#endif
DataSketches namespace.
Definition binomial_bounds.hpp:38