20#ifndef _EBPPS_SAMPLE_IMPL_HPP_
21#define _EBPPS_SAMPLE_IMPL_HPP_
23#include "common_defs.hpp"
24#include "conditional_forward.hpp"
25#include "ebpps_sample.hpp"
34template<
typename T,
typename A>
35ebpps_sample<T,A>::ebpps_sample(uint32_t reserved_size,
const A& allocator) :
36 allocator_(allocator),
41 data_.reserve(reserved_size);
44template<
typename T,
typename A>
45ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item,
double c,
const A& allocator) :
46 allocator_(allocator),
48 partial_item_(partial_item),
49 data_(data, allocator)
52template<
typename T,
typename A>
54void ebpps_sample<T,A>::replace_content(TT&& item,
double theta) {
57 partial_item_.reset();
59 data_.emplace_back(std::forward<TT>(item));
61 partial_item_.emplace(std::forward<TT>(item));
65template<
typename T,
typename A>
66auto ebpps_sample<T,A>::get_sample() const -> result_type {
68 const double c_frac = std::modf(c_, &unused);
69 const bool include_partial = next_double() < c_frac;
70 const uint32_t result_size =
static_cast<uint32_t
>(data_.size()) + (include_partial ? 1 : 0);
73 result.reserve(result_size);
74 std::copy(data_.begin(), data_.end(), std::back_inserter(result));
76 result.emplace_back(
static_cast<const T&
>(*partial_item_));
81template<
typename T,
typename A>
82void ebpps_sample<T,A>::downsample(
double theta) {
83 if (theta >= 1.0)
return;
85 const double new_c = theta * c_;
87 const double new_c_frac = std::modf(new_c, &new_c_int);
89 const double c_frac = std::modf(c_, &c_int);
91 if (new_c_int == 0.0) {
93 if (next_double() > (c_frac / c_)) {
97 }
else if (new_c_int == c_int) {
99 if (next_double() > (1 - theta * c_frac)/(1 - new_c_frac)) {
103 if (next_double() < theta * c_frac) {
106 subsample(
static_cast<uint32_t
>(new_c_int));
110 subsample(
static_cast<uint32_t
>(new_c_int) + 1);
111 move_one_to_partial();
115 if (new_c == new_c_int)
116 partial_item_.reset();
121template<
typename T,
typename A>
122template<
typename FwdSample>
123void ebpps_sample<T,A>::merge(FwdSample&& other) {
125 const double c_frac = std::modf(c_, &c_int);
128 const double other_c_frac = std::modf(other.c_, &unused);
133 for (uint32_t i = 0; i < other.data_.size(); ++i)
134 data_.emplace_back(conditional_forward<FwdSample>(other.data_[i]));
145 if (c_frac == 0.0 && other_c_frac == 0.0) {
146 partial_item_.reset();
147 }
else if (c_frac + other_c_frac == 1.0 || c_ == std::floor(c_)) {
148 if (next_double() <= c_frac) {
150 data_.emplace_back(std::move(*partial_item_));
152 if (other.partial_item_)
153 data_.emplace_back(conditional_forward<FwdSample>(*other.partial_item_));
155 partial_item_.reset();
156 }
else if (c_frac + other_c_frac < 1.0) {
157 if (next_double() > c_frac / (c_frac + other_c_frac)) {
158 set_partial(conditional_forward<FwdSample>(*other.partial_item_));
161 if (next_double() <= (1 - c_frac) / ((1 - c_frac) + (1 - other_c_frac))) {
162 data_.emplace_back(conditional_forward<FwdSample>(*other.partial_item_));
164 data_.emplace_back(std::move(*partial_item_));
165 partial_item_.reset();
166 set_partial(conditional_forward<FwdSample>(*other.partial_item_));
171template<
typename T,
typename A>
172string<A> ebpps_sample<T ,A>::to_string()
const {
173 std::ostringstream oss;
174 oss <<
" sample:" << std::endl;
176 for (
const T& item : data_)
177 oss <<
"\t" << idx++ <<
":\t" << item << std::endl;
179 if (
bool(partial_item_))
180 oss << (*partial_item_) << std::endl;
182 oss <<
"NULL" << std::endl;
187template<
typename T,
typename A>
188void ebpps_sample<T,A>::subsample(uint32_t num_samples) {
195 if (num_samples == data_.size())
return;
197 auto erase_start = data_.begin();
198 uint32_t data_len =
static_cast<uint32_t
>(data_.size());
199 for (uint32_t i = 0; i < num_samples; ++i, ++erase_start) {
200 uint32_t j = i + random_idx(data_len - i);
201 std::swap(data_[i], data_[j]);
204 data_.erase(erase_start, data_.end());
207template<
typename T,
typename A>
208template<
typename FwdItem>
209void ebpps_sample<T,A>::set_partial(FwdItem&& item) {
211 *partial_item_ = conditional_forward<FwdItem>(item);
213 partial_item_.emplace(conditional_forward<FwdItem>(item));
216template<
typename T,
typename A>
217void ebpps_sample<T,A>::move_one_to_partial() {
218 const size_t idx = random_idx(
static_cast<uint32_t
>(data_.size()));
220 const size_t last_idx = data_.size() - 1;
221 if (idx != last_idx) {
222 std::swap(data_[idx], data_[last_idx]);
225 set_partial(std::move(data_[last_idx]));
230template<
typename T,
typename A>
231void ebpps_sample<T,A>::swap_with_partial() {
233 const size_t idx = random_idx(
static_cast<uint32_t
>(data_.size()));
234 std::swap(data_[idx], *partial_item_);
236 move_one_to_partial();
240template<
typename T,
typename A>
241void ebpps_sample<T,A>::reset() {
243 partial_item_.reset();
247template<
typename T,
typename A>
248double ebpps_sample<T,A>::get_c()
const {
252template<
typename T,
typename A>
253auto ebpps_sample<T,A>::get_full_items() const -> result_type {
254 return result_type(data_);
257template<
typename T,
typename A>
258bool ebpps_sample<T,A>::has_partial_item()
const {
259 return bool(partial_item_);
262template<
typename T,
typename A>
263T ebpps_sample<T,A>::get_partial_item()
const {
264 if (!partial_item_)
throw std::runtime_error(
"Call to get_partial_item() when no partial item exists");
265 return *partial_item_;
268template<
typename T,
typename A>
269uint32_t ebpps_sample<T,A>::random_idx(uint32_t max) {
270 static std::uniform_int_distribution<uint32_t> dist;
271 return dist(random_utils::rand, std::uniform_int_distribution<uint32_t>::param_type(0, max - 1));
274template<
typename T,
typename A>
275double ebpps_sample<T,A>::next_double() {
276 return random_utils::next_double(random_utils::rand);
279template<
typename T,
typename A>
280uint32_t ebpps_sample<T,A>::get_num_retained_items()
const {
281 return static_cast<uint32_t
>(data_.size() + (partial_item_ ? 1 : 0));
285template<
typename T,
typename A>
286template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type>
287size_t ebpps_sample<T, A>::get_serialized_size_bytes(
const SerDe&)
const {
291 return sizeof(c_) + (data_.size() + (partial_item_ ? 1 : 0)) *
sizeof(T);
295template<
typename T,
typename A>
296template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type>
297size_t ebpps_sample<T, A>::get_serialized_size_bytes(
const SerDe& sd)
const {
298 if (c_ == 0.0)
return 0;
300 size_t num_bytes =
sizeof(c_);
301 for (
auto it : data_)
302 num_bytes += sd.size_of_item(it);
305 num_bytes += sd.size_of_item(*partial_item_);
310template<
typename T,
typename A>
311template<
typename SerDe>
312size_t ebpps_sample<T,A>::serialize(uint8_t* ptr,
const uint8_t* end_ptr,
const SerDe& sd)
const {
313 uint8_t* st_ptr = ptr;
315 ensure_minimum_memory(end_ptr - ptr,
sizeof(c_));
316 ptr += copy_to_mem(c_, ptr);
318 ptr += sd.serialize(ptr, end_ptr - ptr, data_.data(),
static_cast<unsigned>(data_.size()));
321 ptr += sd.serialize(ptr, end_ptr - ptr, &*partial_item_, 1);
327template<
typename T,
typename A>
328template<
typename SerDe>
329void ebpps_sample<T,A>::serialize(std::ostream& os,
const SerDe& sd)
const {
332 sd.serialize(os, data_.data(),
static_cast<unsigned>(data_.size()));
335 sd.serialize(os, &*partial_item_, 1);
337 if (!os.good())
throw std::runtime_error(
"error writing to std::ostream");
340template<
typename T,
typename A>
341template<
typename SerDe>
342std::pair<ebpps_sample<T, A>,
size_t> ebpps_sample<T, A>::deserialize(
const uint8_t* ptr,
size_t size,
const SerDe& sd,
const A& allocator) {
343 const uint8_t* st_ptr = ptr;
344 const uint8_t* end_ptr = ptr + size;
346 ensure_minimum_memory(size,
sizeof(
double));
348 ptr += copy_from_mem(ptr, c);
350 throw std::runtime_error(
"sketch image has C < 0.0 during deserializaiton");
353 const double c_frac = std::modf(c, &c_int);
354 const bool has_partial = c_frac != 0.0;
356 const uint32_t num_full_items =
static_cast<uint32_t
>(c_int);
358 std::unique_ptr<T, items_deleter> items(alloc.allocate(num_full_items), items_deleter(allocator,
false, num_full_items));
359 ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), num_full_items);
361 items.get_deleter().set_destroy(
true);
362 std::vector<T, A> data(std::make_move_iterator(items.get()),
363 std::make_move_iterator(items.get() + num_full_items),
366 optional<T> partial_item;
369 ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
371 partial_item.emplace(*tmp);
375 return std::pair<ebpps_sample<T,A>,
size_t>(
376 ebpps_sample<T,A>(std::move(data), std::move(partial_item), c, allocator),
380template<
typename T,
typename A>
381template<
typename SerDe>
382ebpps_sample<T, A> ebpps_sample<T, A>::deserialize(std::istream& is,
const SerDe& sd,
const A& allocator) {
383 const double c = read<double>(is);
385 throw std::runtime_error(
"sketch image has C < 0.0 during deserializaiton");
388 const double c_frac = std::modf(c, &c_int);
389 const bool has_partial = c_frac != 0.0;
391 const uint32_t num_full_items =
static_cast<uint32_t
>(c_int);
393 std::unique_ptr<T, items_deleter> items(alloc.allocate(num_full_items), items_deleter(allocator,
false, num_full_items));
394 sd.deserialize(is, items.get(), num_full_items);
396 items.get_deleter().set_destroy(
true);
397 std::vector<T, A> data(std::make_move_iterator(items.get()),
398 std::make_move_iterator(items.get() + num_full_items),
401 optional<T> partial_item;
404 sd.deserialize(is, &*tmp, 1);
406 partial_item.emplace(*tmp);
410 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
412 return ebpps_sample<T,A>(std::move(data), std::move(partial_item), c, allocator);
416template<
typename T,
typename A>
417typename ebpps_sample<T, A>::const_iterator ebpps_sample<T, A>::begin()
const {
418 return const_iterator(
this);
421template<
typename T,
typename A>
422typename ebpps_sample<T, A>::const_iterator ebpps_sample<T, A>::end()
const {
423 return const_iterator(
nullptr);
429template<
typename T,
typename A>
430ebpps_sample<T, A>::const_iterator::const_iterator(
const ebpps_sample* sample) :
435 if (sample ==
nullptr)
440 const double c_frac = std::modf(sample_->get_c(), &c_int);
441 use_partial_ = sample->next_double() < c_frac;
444 if (sample_->data_.size() == 0 && use_partial_) {
448 if (sample_->c_== 0.0 || (sample_->data_.size() == 0 && !sample_->has_partial_item())) { sample_ =
nullptr; }
451template<
typename T,
typename A>
452ebpps_sample<T, A>::const_iterator::const_iterator(
const const_iterator& other) :
453 sample_(other.sample_),
455 use_partial_(other.use_partial_)
458template<
typename T,
typename A>
459typename ebpps_sample<T, A>::const_iterator& ebpps_sample<T, A>::const_iterator::operator++() {
460 if (sample_ ==
nullptr)
462 else if (idx_ == PARTIAL_IDX) {
463 idx_ = sample_->data_.size();
470 if (idx_ == sample_->data_.size()) {
480template<
typename T,
typename A>
481typename ebpps_sample<T, A>::const_iterator& ebpps_sample<T, A>::const_iterator::operator++(
int) {
482 const_iterator tmp(*
this);
487template<
typename T,
typename A>
488bool ebpps_sample<T, A>::const_iterator::operator==(
const const_iterator& other)
const {
489 if (sample_ != other.sample_)
return false;
490 if (sample_ ==
nullptr)
return true;
491 return idx_ == other.idx_;
494template<
typename T,
typename A>
495bool ebpps_sample<T, A>::const_iterator::operator!=(
const const_iterator& other)
const {
496 return !operator==(other);
499template<
typename T,
typename A>
500auto ebpps_sample<T, A>::const_iterator::operator*() const -> reference {
501 if (idx_ == PARTIAL_IDX)
502 return *(sample_->partial_item_);
504 return sample_->data_[idx_];
507template<
typename T,
typename A>
508auto ebpps_sample<T, A>::const_iterator::operator->() const -> pointer {
512template<
typename T,
typename A>
513class ebpps_sample<T, A>::items_deleter {
515 items_deleter(
const A& allocator,
bool destroy,
size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
516 void operator() (T* ptr) {
517 if (ptr !=
nullptr) {
519 for (
size_t i = 0; i < num_; ++i) {
523 allocator_.deallocate(ptr, num_);
526 void set_destroy(
bool destroy) { destroy_ = destroy; }
DataSketches namespace.
Definition binomial_bounds.hpp:38