datasketches-cpp
ebpps_sample.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _EBPPS_SAMPLE_HPP_
21 #define _EBPPS_SAMPLE_HPP_
22 
23 #include "common_defs.hpp"
24 #include "optional.hpp"
25 #include "serde.hpp"
26 
27 #include <memory>
28 #include <vector>
29 
30 namespace datasketches {
31 
32 template<
33  typename T,
34  typename A = std::allocator<T>
35 >
36 class ebpps_sample {
37  public:
38  explicit ebpps_sample(uint32_t k, const A& allocator = A());
39 
40  // for deserialization
41  class items_deleter;
42  ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator = A());
43 
44  // used instead of having a single-item constructor for update/merge calls
45  template<typename TT>
46  void replace_content(TT&& item, double theta);
47 
48  void reset();
49  void downsample(double theta);
50 
51  template<typename FwdSample>
52  void merge(FwdSample&& other);
53 
54  // standard way to query the sample
55  using result_type = std::vector<T, A>;
56  result_type get_sample() const;
57 
58  double get_c() const;
59 
60  // intended for internal use
61  // returns only full items
62  result_type get_full_items() const;
63 
64  // intended for internal use
65  // handles only the partial item
66  bool has_partial_item() const;
67  T get_partial_item() const;
68 
69  string<A> to_string() const;
70 
77  inline uint32_t get_num_retained_items() const;
78 
85  template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
86  inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
87 
94  template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
95  inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
96 
97  // This is a convenience alias for users
98  // The type returned by the following serialize method
99  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
100 
108  template<typename SerDe = serde<T>>
109  size_t serialize(uint8_t* ptr, const uint8_t* end_ptr, const SerDe& sd = SerDe()) const;
110 
116  template<typename SerDe = serde<T>>
117  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
118 
127  template<typename SerDe = serde<T>>
128  static std::pair<ebpps_sample, size_t> deserialize(const uint8_t* ptr, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
129 
137  template<typename SerDe = serde<T>>
138  static ebpps_sample deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
139 
140  class const_iterator;
141 
148  const_iterator begin() const;
149 
156  const_iterator end() const;
157 
158  private:
159  A allocator_;
160  double c_; // Current sample size, including fractional part
161  optional<T> partial_item_; // a sample item corresponding to a partial weight
162  std::vector<T, A> data_; // stored sampled items
163 
164  template<typename FwdItem>
165  inline void set_partial(FwdItem&& item);
166  void swap_with_partial();
167  void move_one_to_partial();
168  void subsample(uint32_t num_samples);
169 
170  static inline uint32_t random_idx(uint32_t max);
171  static inline double next_double();
172 
173  friend class const_iterator;
174 };
175 
176 template<typename T, typename A>
177 class ebpps_sample<T, A>::const_iterator {
178 public:
179  using iterator_category = std::input_iterator_tag;
180  using value_type = const T&;
181  using difference_type = void;
182  using pointer = const return_value_holder<value_type>;
183  using reference = value_type;
184 
185  const_iterator(const const_iterator& other);
186  const_iterator& operator++();
187  const_iterator& operator++(int);
188  bool operator==(const const_iterator& other) const;
189  bool operator!=(const const_iterator& other) const;
190  reference operator*() const;
191  pointer operator->() const;
192 
193 private:
194  static const size_t PARTIAL_IDX = static_cast<size_t>(-1);
195 
196  // default iterator over sample
197  const_iterator(const ebpps_sample<T, A>* sample);
198 
199  const ebpps_sample<T, A>* sample_;
200  size_t idx_;
201  bool use_partial_;
202 
203  friend class ebpps_sample;
204 };
205 
206 } // namespace datasketches
207 
208 #include "ebpps_sample_impl.hpp"
209 
210 #endif // _EBPPS_SAMPLE_HPP_
DataSketches namespace.
Definition: binomial_bounds.hpp:38