20#ifndef _EBPPS_SAMPLE_IMPL_HPP_
21#define _EBPPS_SAMPLE_IMPL_HPP_
23#include "common_defs.hpp"
24#include "conditional_forward.hpp"
25#include "ebpps_sample.hpp"
35template<
typename T,
typename A>
36ebpps_sample<T,A>::ebpps_sample(uint32_t reserved_size,
const A& allocator) :
37 allocator_(allocator),
42 data_.reserve(reserved_size);
45template<
typename T,
typename A>
46ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item,
double c,
const A& allocator) :
47 allocator_(allocator),
49 partial_item_(partial_item),
50 data_(data, allocator)
53template<
typename T,
typename A>
55void ebpps_sample<T,A>::replace_content(TT&& item,
double theta) {
58 partial_item_.reset();
60 data_.emplace_back(std::forward<TT>(item));
62 partial_item_.emplace(std::forward<TT>(item));
66template<
typename T,
typename A>
67auto ebpps_sample<T,A>::get_sample() const -> result_type {
69 const double c_frac = std::modf(c_, &unused);
70 const bool include_partial = next_double() < c_frac;
71 const uint32_t result_size =
static_cast<uint32_t
>(data_.size()) + (include_partial ? 1 : 0);
74 result.reserve(result_size);
75 std::copy(data_.begin(), data_.end(), std::back_inserter(result));
77 result.emplace_back(
static_cast<const T&
>(*partial_item_));
82template<
typename T,
typename A>
83void ebpps_sample<T,A>::downsample(
double theta) {
84 if (theta >= 1.0)
return;
86 const double new_c = theta * c_;
88 const double new_c_frac = std::modf(new_c, &new_c_int);
90 const double c_frac = std::modf(c_, &c_int);
92 if (new_c_int == 0.0) {
94 if (next_double() > (c_frac / c_)) {
98 }
else if (new_c_int == c_int) {
100 if (next_double() > (1 - theta * c_frac)/(1 - new_c_frac)) {
104 if (next_double() < theta * c_frac) {
107 subsample(
static_cast<uint32_t
>(new_c_int));
111 subsample(
static_cast<uint32_t
>(new_c_int) + 1);
112 move_one_to_partial();
116 if (new_c == new_c_int)
117 partial_item_.reset();
122template<
typename T,
typename A>
123template<
typename FwdSample>
124void ebpps_sample<T,A>::merge(FwdSample&& other) {
126 const double c_frac = std::modf(c_, &c_int);
129 const double other_c_frac = std::modf(other.c_, &unused);
134 for (uint32_t i = 0; i < other.data_.size(); ++i)
135 data_.emplace_back(conditional_forward<FwdSample>(other.data_[i]));
146 if (c_frac == 0.0 && other_c_frac == 0.0) {
147 partial_item_.reset();
148 }
else if (c_frac + other_c_frac == 1.0 || c_ == std::floor(c_)) {
149 if (next_double() <= c_frac) {
151 data_.emplace_back(std::move(*partial_item_));
153 if (other.partial_item_)
154 data_.emplace_back(conditional_forward<FwdSample>(*other.partial_item_));
156 partial_item_.reset();
157 }
else if (c_frac + other_c_frac < 1.0) {
158 if (next_double() > c_frac / (c_frac + other_c_frac)) {
159 set_partial(conditional_forward<FwdSample>(*other.partial_item_));
162 if (next_double() <= (1 - c_frac) / ((1 - c_frac) + (1 - other_c_frac))) {
163 data_.emplace_back(conditional_forward<FwdSample>(*other.partial_item_));
165 data_.emplace_back(std::move(*partial_item_));
166 partial_item_.reset();
167 set_partial(conditional_forward<FwdSample>(*other.partial_item_));
172template<
typename T,
typename A>
173string<A> ebpps_sample<T ,A>::to_string()
const {
174 std::ostringstream oss;
175 oss <<
" sample:" << std::endl;
177 for (
const T& item : data_)
178 oss <<
"\t" << idx++ <<
":\t" << item << std::endl;
180 if (
bool(partial_item_))
181 oss << (*partial_item_) << std::endl;
183 oss <<
"NULL" << std::endl;
188template<
typename T,
typename A>
189void ebpps_sample<T,A>::subsample(uint32_t num_samples) {
196 if (num_samples == data_.size())
return;
198 auto erase_start = data_.begin();
199 uint32_t data_len =
static_cast<uint32_t
>(data_.size());
200 for (uint32_t i = 0; i < num_samples; ++i, ++erase_start) {
201 uint32_t j = i + random_idx(data_len - i);
202 std::swap(data_[i], data_[j]);
205 data_.erase(erase_start, data_.end());
208template<
typename T,
typename A>
209template<
typename FwdItem>
210void ebpps_sample<T,A>::set_partial(FwdItem&& item) {
212 *partial_item_ = conditional_forward<FwdItem>(item);
214 partial_item_.emplace(conditional_forward<FwdItem>(item));
217template<
typename T,
typename A>
218void ebpps_sample<T,A>::move_one_to_partial() {
219 const size_t idx = random_idx(
static_cast<uint32_t
>(data_.size()));
221 const size_t last_idx = data_.size() - 1;
222 if (idx != last_idx) {
223 std::swap(data_[idx], data_[last_idx]);
226 set_partial(std::move(data_[last_idx]));
231template<
typename T,
typename A>
232void ebpps_sample<T,A>::swap_with_partial() {
234 const size_t idx = random_idx(
static_cast<uint32_t
>(data_.size()));
235 std::swap(data_[idx], *partial_item_);
237 move_one_to_partial();
241template<
typename T,
typename A>
242void ebpps_sample<T,A>::reset() {
244 partial_item_.reset();
248template<
typename T,
typename A>
249double ebpps_sample<T,A>::get_c()
const {
253template<
typename T,
typename A>
254auto ebpps_sample<T,A>::get_full_items() const -> result_type {
255 return result_type(data_);
258template<
typename T,
typename A>
259bool ebpps_sample<T,A>::has_partial_item()
const {
260 return bool(partial_item_);
263template<
typename T,
typename A>
264T ebpps_sample<T,A>::get_partial_item()
const {
265 if (!partial_item_)
throw std::runtime_error(
"Call to get_partial_item() when no partial item exists");
266 return *partial_item_;
269template<
typename T,
typename A>
270uint32_t ebpps_sample<T,A>::random_idx(uint32_t max) {
271 static std::uniform_int_distribution<uint32_t> dist;
272 return dist(random_utils::rand, std::uniform_int_distribution<uint32_t>::param_type(0, max - 1));
275template<
typename T,
typename A>
276double ebpps_sample<T,A>::next_double() {
277 return random_utils::next_double(random_utils::rand);
280template<
typename T,
typename A>
281uint32_t ebpps_sample<T,A>::get_num_retained_items()
const {
282 return static_cast<uint32_t
>(data_.size() + (partial_item_ ? 1 : 0));
286template<
typename T,
typename A>
287template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type>
288size_t ebpps_sample<T, A>::get_serialized_size_bytes(
const SerDe&)
const {
292 return sizeof(c_) + (data_.size() + (partial_item_ ? 1 : 0)) *
sizeof(T);
296template<
typename T,
typename A>
297template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type>
298size_t ebpps_sample<T, A>::get_serialized_size_bytes(
const SerDe& sd)
const {
299 if (c_ == 0.0)
return 0;
301 size_t num_bytes =
sizeof(c_);
302 for (
auto it : data_)
303 num_bytes += sd.size_of_item(it);
306 num_bytes += sd.size_of_item(*partial_item_);
311template<
typename T,
typename A>
312template<
typename SerDe>
313size_t ebpps_sample<T,A>::serialize(uint8_t* ptr,
const uint8_t* end_ptr,
const SerDe& sd)
const {
314 uint8_t* st_ptr = ptr;
316 ensure_minimum_memory(end_ptr - ptr,
sizeof(c_));
317 ptr += copy_to_mem(c_, ptr);
319 ptr += sd.serialize(ptr, end_ptr - ptr, data_.data(),
static_cast<unsigned>(data_.size()));
322 ptr += sd.serialize(ptr, end_ptr - ptr, &*partial_item_, 1);
328template<
typename T,
typename A>
329template<
typename SerDe>
330void ebpps_sample<T,A>::serialize(std::ostream& os,
const SerDe& sd)
const {
333 sd.serialize(os, data_.data(),
static_cast<unsigned>(data_.size()));
336 sd.serialize(os, &*partial_item_, 1);
338 if (!os.good())
throw std::runtime_error(
"error writing to std::ostream");
341template<
typename T,
typename A>
342template<
typename SerDe>
343std::pair<ebpps_sample<T, A>,
size_t> ebpps_sample<T, A>::deserialize(
const uint8_t* ptr,
size_t size,
const SerDe& sd,
const A& allocator) {
344 const uint8_t* st_ptr = ptr;
345 const uint8_t* end_ptr = ptr + size;
347 ensure_minimum_memory(size,
sizeof(
double));
349 ptr += copy_from_mem(ptr, c);
351 throw std::runtime_error(
"sketch image has C < 0.0 during deserializaiton");
354 const double c_frac = std::modf(c, &c_int);
355 const bool has_partial = c_frac != 0.0;
357 const uint32_t num_full_items =
static_cast<uint32_t
>(c_int);
359 std::unique_ptr<T, items_deleter> items(alloc.allocate(num_full_items), items_deleter(allocator,
false, num_full_items));
360 ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), num_full_items);
362 items.get_deleter().set_destroy(
true);
363 std::vector<T, A> data(std::make_move_iterator(items.get()),
364 std::make_move_iterator(items.get() + num_full_items),
367 optional<T> partial_item;
371 typename std::aligned_storage<
sizeof(T),
alignof(T)>::type tmp_storage;
372 T* tmp =
reinterpret_cast<T*
>(&tmp_storage);
374 ptr += sd.deserialize(ptr, end_ptr - ptr, tmp, 1);
376 partial_item.emplace(std::move(*tmp));
380 return std::pair<ebpps_sample<T,A>,
size_t>(
381 ebpps_sample<T,A>(std::move(data), std::move(partial_item), c, allocator),
385template<
typename T,
typename A>
386template<
typename SerDe>
387ebpps_sample<T, A> ebpps_sample<T, A>::deserialize(std::istream& is,
const SerDe& sd,
const A& allocator) {
388 const double c = read<double>(is);
390 throw std::runtime_error(
"sketch image has C < 0.0 during deserializaiton");
393 const double c_frac = std::modf(c, &c_int);
394 const bool has_partial = c_frac != 0.0;
396 const uint32_t num_full_items =
static_cast<uint32_t
>(c_int);
398 std::unique_ptr<T, items_deleter> items(alloc.allocate(num_full_items), items_deleter(allocator,
false, num_full_items));
399 sd.deserialize(is, items.get(), num_full_items);
401 items.get_deleter().set_destroy(
true);
402 std::vector<T, A> data(std::make_move_iterator(items.get()),
403 std::make_move_iterator(items.get() + num_full_items),
406 optional<T> partial_item;
410 typename std::aligned_storage<
sizeof(T),
alignof(T)>::type tmp_storage;
411 T* tmp =
reinterpret_cast<T*
>(&tmp_storage);
413 sd.deserialize(is, tmp, 1);
415 partial_item.emplace(std::move(*tmp));
419 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
421 return ebpps_sample<T,A>(std::move(data), std::move(partial_item), c, allocator);
425template<
typename T,
typename A>
426typename ebpps_sample<T, A>::const_iterator ebpps_sample<T, A>::begin()
const {
427 return const_iterator(
this);
430template<
typename T,
typename A>
431typename ebpps_sample<T, A>::const_iterator ebpps_sample<T, A>::end()
const {
432 return const_iterator(
nullptr);
438template<
typename T,
typename A>
439ebpps_sample<T, A>::const_iterator::const_iterator(
const ebpps_sample* sample) :
444 if (sample ==
nullptr)
449 const double c_frac = std::modf(sample_->get_c(), &c_int);
450 use_partial_ = sample->next_double() < c_frac;
453 if (sample_->data_.size() == 0 && use_partial_) {
457 if (sample_->c_== 0.0 || (sample_->data_.size() == 0 && !sample_->has_partial_item())) { sample_ =
nullptr; }
460template<
typename T,
typename A>
461ebpps_sample<T, A>::const_iterator::const_iterator(
const const_iterator& other) :
462 sample_(other.sample_),
464 use_partial_(other.use_partial_)
467template<
typename T,
typename A>
468typename ebpps_sample<T, A>::const_iterator& ebpps_sample<T, A>::const_iterator::operator++() {
469 if (sample_ ==
nullptr)
471 else if (idx_ == PARTIAL_IDX) {
472 idx_ = sample_->data_.size();
479 if (idx_ == sample_->data_.size()) {
489template<
typename T,
typename A>
490typename ebpps_sample<T, A>::const_iterator& ebpps_sample<T, A>::const_iterator::operator++(
int) {
491 const_iterator tmp(*
this);
496template<
typename T,
typename A>
497bool ebpps_sample<T, A>::const_iterator::operator==(
const const_iterator& other)
const {
498 if (sample_ != other.sample_)
return false;
499 if (sample_ ==
nullptr)
return true;
500 return idx_ == other.idx_;
503template<
typename T,
typename A>
504bool ebpps_sample<T, A>::const_iterator::operator!=(
const const_iterator& other)
const {
505 return !operator==(other);
508template<
typename T,
typename A>
509auto ebpps_sample<T, A>::const_iterator::operator*() const -> reference {
510 if (idx_ == PARTIAL_IDX)
511 return *(sample_->partial_item_);
513 return sample_->data_[idx_];
516template<
typename T,
typename A>
517auto ebpps_sample<T, A>::const_iterator::operator->() const -> pointer {
521template<
typename T,
typename A>
522class ebpps_sample<T, A>::items_deleter {
524 items_deleter(
const A& allocator,
bool destroy,
size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
525 void operator() (T* ptr) {
526 if (ptr !=
nullptr) {
528 for (
size_t i = 0; i < num_; ++i) {
532 allocator_.deallocate(ptr, num_);
535 void set_destroy(
bool destroy) { destroy_ = destroy; }
DataSketches namespace.
Definition binomial_bounds.hpp:38