datasketches-cpp
All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
ebpps_sample.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef _EBPPS_SAMPLE_HPP_
21#define _EBPPS_SAMPLE_HPP_
22
23#include "common_defs.hpp"
24#include "optional.hpp"
25#include "serde.hpp"
26
27#include <memory>
28#include <vector>
29
30namespace datasketches {
31
32template<
33 typename T,
34 typename A = std::allocator<T>
35>
36class ebpps_sample {
37 public:
38 explicit ebpps_sample(uint32_t k, const A& allocator = A());
39
40 // for deserialization
41 class items_deleter;
42 ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator = A());
43
44 // used instead of having a single-item constructor for update/merge calls
45 template<typename TT>
46 void replace_content(TT&& item, double theta);
47
48 void reset();
49 void downsample(double theta);
50
51 template<typename FwdSample>
52 void merge(FwdSample&& other);
53
54 // standard way to query the sample
55 using result_type = std::vector<T, A>;
56 result_type get_sample() const;
57
58 double get_c() const;
59
60 // intended for internal use
61 // returns only full items
62 result_type get_full_items() const;
63
64 // intended for internal use
65 // handles only the partial item
66 bool has_partial_item() const;
67 T get_partial_item() const;
68
69 string<A> to_string() const;
70
77 inline uint32_t get_num_retained_items() const;
78
85 template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
86 inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
87
94 template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
95 inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
96
97 // This is a convenience alias for users
98 // The type returned by the following serialize method
99 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
100
108 template<typename SerDe = serde<T>>
109 size_t serialize(uint8_t* ptr, const uint8_t* end_ptr, const SerDe& sd = SerDe()) const;
110
116 template<typename SerDe = serde<T>>
117 void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
118
127 template<typename SerDe = serde<T>>
128 static std::pair<ebpps_sample, size_t> deserialize(const uint8_t* ptr, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
129
137 template<typename SerDe = serde<T>>
138 static ebpps_sample deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
139
140 class const_iterator;
141
148 const_iterator begin() const;
149
156 const_iterator end() const;
157
158 private:
159 A allocator_;
160 double c_; // Current sample size, including fractional part
161 optional<T> partial_item_; // a sample item corresponding to a partial weight
162 std::vector<T, A> data_; // stored sampled items
163
164 template<typename FwdItem>
165 inline void set_partial(FwdItem&& item);
166 void swap_with_partial();
167 void move_one_to_partial();
168 void subsample(uint32_t num_samples);
169
170 static inline uint32_t random_idx(uint32_t max);
171 static inline double next_double();
172
173 friend class const_iterator;
174};
175
176template<typename T, typename A>
177class ebpps_sample<T, A>::const_iterator {
178public:
179 using iterator_category = std::input_iterator_tag;
180 using value_type = const T&;
181 using difference_type = void;
182 using pointer = const return_value_holder<value_type>;
183 using reference = value_type;
184
185 const_iterator(const const_iterator& other);
186 const_iterator& operator++();
187 const_iterator& operator++(int);
188 bool operator==(const const_iterator& other) const;
189 bool operator!=(const const_iterator& other) const;
190 reference operator*() const;
191 pointer operator->() const;
192
193private:
194 static const size_t PARTIAL_IDX = static_cast<size_t>(-1);
195
196 // default iterator over sample
197 const_iterator(const ebpps_sample<T, A>* sample);
198
199 const ebpps_sample<T, A>* sample_;
200 size_t idx_;
201 bool use_partial_;
202
203 friend class ebpps_sample;
204};
205
206} // namespace datasketches
207
208#include "ebpps_sample_impl.hpp"
209
210#endif // _EBPPS_SAMPLE_HPP_
DataSketches namespace.
Definition binomial_bounds.hpp:38