20 #ifndef _EBPPS_SAMPLE_IMPL_HPP_
21 #define _EBPPS_SAMPLE_IMPL_HPP_
23 #include "common_defs.hpp"
24 #include "conditional_forward.hpp"
25 #include "ebpps_sample.hpp"
34 template<
typename T,
typename A>
35 ebpps_sample<T,A>::ebpps_sample(uint32_t reserved_size,
const A& allocator) :
36 allocator_(allocator),
41 data_.reserve(reserved_size);
44 template<
typename T,
typename A>
45 ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item,
double c,
const A& allocator) :
46 allocator_(allocator),
48 partial_item_(partial_item),
49 data_(data, allocator)
52 template<
typename T,
typename A>
54 void ebpps_sample<T,A>::replace_content(TT&& item,
double theta) {
57 partial_item_.reset();
59 data_.emplace_back(std::forward<TT>(item));
61 partial_item_.emplace(std::forward<TT>(item));
65 template<
typename T,
typename A>
66 auto ebpps_sample<T,A>::get_sample() const -> result_type {
68 const double c_frac = std::modf(c_, &unused);
69 const bool include_partial = next_double() < c_frac;
70 const uint32_t result_size =
static_cast<uint32_t
>(data_.size()) + (include_partial ? 1 : 0);
73 result.reserve(result_size);
74 std::copy(data_.begin(), data_.end(), std::back_inserter(result));
76 result.emplace_back(
static_cast<const T&
>(*partial_item_));
81 template<
typename T,
typename A>
82 void ebpps_sample<T,A>::downsample(
double theta) {
83 if (theta >= 1.0)
return;
85 const double new_c = theta * c_;
87 const double new_c_frac = std::modf(new_c, &new_c_int);
89 const double c_frac = std::modf(c_, &c_int);
91 if (new_c_int == 0.0) {
93 if (next_double() > (c_frac / c_)) {
97 }
else if (new_c_int == c_int) {
99 if (next_double() > (1 - theta * c_frac)/(1 - new_c_frac)) {
103 if (next_double() < theta * c_frac) {
106 subsample(
static_cast<uint32_t
>(new_c_int));
110 subsample(
static_cast<uint32_t
>(new_c_int) + 1);
111 move_one_to_partial();
115 if (new_c == new_c_int)
116 partial_item_.reset();
121 template<
typename T,
typename A>
122 template<
typename FwdSample>
123 void ebpps_sample<T,A>::merge(FwdSample&& other) {
125 const double c_frac = std::modf(c_, &c_int);
128 const double other_c_frac = std::modf(other.c_, &unused);
133 for (uint32_t i = 0; i < other.data_.size(); ++i)
134 data_.emplace_back(conditional_forward<FwdSample>(other.data_[i]));
145 if (c_frac == 0.0 && other_c_frac == 0.0) {
146 partial_item_.reset();
147 }
else if (c_frac + other_c_frac == 1.0 || c_ == std::floor(c_)) {
148 if (next_double() <= c_frac) {
150 data_.emplace_back(std::move(*partial_item_));
152 if (other.partial_item_)
153 data_.emplace_back(conditional_forward<FwdSample>(*other.partial_item_));
155 partial_item_.reset();
156 }
else if (c_frac + other_c_frac < 1.0) {
157 if (next_double() > c_frac / (c_frac + other_c_frac)) {
158 set_partial(conditional_forward<FwdSample>(*other.partial_item_));
161 if (next_double() <= (1 - c_frac) / ((1 - c_frac) + (1 - other_c_frac))) {
162 data_.emplace_back(conditional_forward<FwdSample>(*other.partial_item_));
164 data_.emplace_back(std::move(*partial_item_));
165 partial_item_.reset();
166 set_partial(conditional_forward<FwdSample>(*other.partial_item_));
171 template<
typename T,
typename A>
172 string<A> ebpps_sample<T ,A>::to_string()
const {
173 std::ostringstream oss;
174 oss <<
" sample:" << std::endl;
176 for (
const T& item : data_)
177 oss <<
"\t" << idx++ <<
":\t" << item << std::endl;
179 if (
bool(partial_item_))
180 oss << (*partial_item_) << std::endl;
182 oss <<
"NULL" << std::endl;
187 template<
typename T,
typename A>
188 void ebpps_sample<T,A>::subsample(uint32_t num_samples) {
195 if (num_samples == data_.size())
return;
197 auto erase_start = data_.begin();
198 uint32_t data_len =
static_cast<uint32_t
>(data_.size());
199 for (uint32_t i = 0; i < num_samples; ++i, ++erase_start) {
200 uint32_t j = i + random_idx(data_len - i);
201 std::swap(data_[i], data_[j]);
204 data_.erase(erase_start, data_.end());
207 template<
typename T,
typename A>
208 template<
typename FwdItem>
209 void ebpps_sample<T,A>::set_partial(FwdItem&& item) {
211 *partial_item_ = conditional_forward<FwdItem>(item);
213 partial_item_.emplace(conditional_forward<FwdItem>(item));
216 template<
typename T,
typename A>
217 void ebpps_sample<T,A>::move_one_to_partial() {
218 const size_t idx = random_idx(
static_cast<uint32_t
>(data_.size()));
220 const size_t last_idx = data_.size() - 1;
221 if (idx != last_idx) {
222 std::swap(data_[idx], data_[last_idx]);
225 set_partial(std::move(data_[last_idx]));
230 template<
typename T,
typename A>
231 void ebpps_sample<T,A>::swap_with_partial() {
233 const size_t idx = random_idx(
static_cast<uint32_t
>(data_.size()));
234 std::swap(data_[idx], *partial_item_);
236 move_one_to_partial();
240 template<
typename T,
typename A>
241 void ebpps_sample<T,A>::reset() {
243 partial_item_.reset();
247 template<
typename T,
typename A>
248 double ebpps_sample<T,A>::get_c()
const {
252 template<
typename T,
typename A>
253 auto ebpps_sample<T,A>::get_full_items() const -> result_type {
254 return result_type(data_);
257 template<
typename T,
typename A>
258 bool ebpps_sample<T,A>::has_partial_item()
const {
259 return bool(partial_item_);
262 template<
typename T,
typename A>
263 T ebpps_sample<T,A>::get_partial_item()
const {
264 if (!partial_item_)
throw std::runtime_error(
"Call to get_partial_item() when no partial item exists");
265 return *partial_item_;
268 template<
typename T,
typename A>
269 uint32_t ebpps_sample<T,A>::random_idx(uint32_t max) {
270 static std::uniform_int_distribution<uint32_t> dist;
271 return dist(random_utils::rand, std::uniform_int_distribution<uint32_t>::param_type(0, max - 1));
274 template<
typename T,
typename A>
275 double ebpps_sample<T,A>::next_double() {
276 return random_utils::next_double(random_utils::rand);
279 template<
typename T,
typename A>
280 uint32_t ebpps_sample<T,A>::get_num_retained_items()
const {
281 return static_cast<uint32_t
>(data_.size() + (partial_item_ ? 1 : 0));
285 template<
typename T,
typename A>
286 template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value,
int>::type>
287 size_t ebpps_sample<T, A>::get_serialized_size_bytes(
const SerDe&)
const {
291 return sizeof(c_) + (data_.size() + (partial_item_ ? 1 : 0)) *
sizeof(T);
295 template<
typename T,
typename A>
296 template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value,
int>::type>
297 size_t ebpps_sample<T, A>::get_serialized_size_bytes(
const SerDe& sd)
const {
298 if (c_ == 0.0)
return 0;
300 size_t num_bytes =
sizeof(c_);
301 for (
auto it : data_)
302 num_bytes += sd.size_of_item(it);
305 num_bytes += sd.size_of_item(*partial_item_);
310 template<
typename T,
typename A>
311 template<
typename SerDe>
312 size_t ebpps_sample<T,A>::serialize(uint8_t* ptr,
const uint8_t* end_ptr,
const SerDe& sd)
const {
313 uint8_t* st_ptr = ptr;
315 ensure_minimum_memory(end_ptr - ptr,
sizeof(c_));
316 ptr += copy_to_mem(c_, ptr);
318 ptr += sd.serialize(ptr, end_ptr - ptr, data_.data(),
static_cast<unsigned>(data_.size()));
321 ptr += sd.serialize(ptr, end_ptr - ptr, &*partial_item_, 1);
327 template<
typename T,
typename A>
328 template<
typename SerDe>
329 void ebpps_sample<T,A>::serialize(std::ostream& os,
const SerDe& sd)
const {
332 sd.serialize(os, data_.data(),
static_cast<unsigned>(data_.size()));
335 sd.serialize(os, &*partial_item_, 1);
337 if (!os.good())
throw std::runtime_error(
"error writing to std::ostream");
340 template<
typename T,
typename A>
341 template<
typename SerDe>
342 std::pair<ebpps_sample<T, A>,
size_t> ebpps_sample<T, A>::deserialize(
const uint8_t* ptr,
size_t size,
const SerDe& sd,
const A& allocator) {
343 const uint8_t* st_ptr = ptr;
344 const uint8_t* end_ptr = ptr + size;
346 ensure_minimum_memory(size,
sizeof(
double));
348 ptr += copy_from_mem(ptr, c);
350 throw std::runtime_error(
"sketch image has C < 0.0 during deserializaiton");
353 const double c_frac = std::modf(c, &c_int);
354 const bool has_partial = c_frac != 0.0;
356 const uint32_t num_full_items =
static_cast<uint32_t
>(c_int);
358 std::unique_ptr<T, items_deleter> items(alloc.allocate(num_full_items), items_deleter(allocator,
false, num_full_items));
359 ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), num_full_items);
361 items.get_deleter().set_destroy(
true);
362 std::vector<T, A> data(std::make_move_iterator(items.get()),
363 std::make_move_iterator(items.get() + num_full_items),
366 optional<T> partial_item;
369 ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
371 partial_item.emplace(*tmp);
375 return std::pair<ebpps_sample<T,A>,
size_t>(
376 ebpps_sample<T,A>(std::move(data), std::move(partial_item), c, allocator),
380 template<
typename T,
typename A>
381 template<
typename SerDe>
382 ebpps_sample<T, A> ebpps_sample<T, A>::deserialize(std::istream& is,
const SerDe& sd,
const A& allocator) {
383 const double c = read<double>(is);
385 throw std::runtime_error(
"sketch image has C < 0.0 during deserializaiton");
388 const double c_frac = std::modf(c, &c_int);
389 const bool has_partial = c_frac != 0.0;
391 const uint32_t num_full_items =
static_cast<uint32_t
>(c_int);
393 std::unique_ptr<T, items_deleter> items(alloc.allocate(num_full_items), items_deleter(allocator,
false, num_full_items));
394 sd.deserialize(is, items.get(), num_full_items);
396 items.get_deleter().set_destroy(
true);
397 std::vector<T, A> data(std::make_move_iterator(items.get()),
398 std::make_move_iterator(items.get() + num_full_items),
401 optional<T> partial_item;
404 sd.deserialize(is, &*tmp, 1);
406 partial_item.emplace(*tmp);
410 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
412 return ebpps_sample<T,A>(std::move(data), std::move(partial_item), c, allocator);
416 template<
typename T,
typename A>
417 typename ebpps_sample<T, A>::const_iterator ebpps_sample<T, A>::begin()
const {
418 return const_iterator(
this);
421 template<
typename T,
typename A>
422 typename ebpps_sample<T, A>::const_iterator ebpps_sample<T, A>::end()
const {
423 return const_iterator(
nullptr);
429 template<
typename T,
typename A>
430 ebpps_sample<T, A>::const_iterator::const_iterator(
const ebpps_sample* sample) :
435 if (sample ==
nullptr)
440 const double c_frac = std::modf(sample_->get_c(), &c_int);
441 use_partial_ = sample->next_double() < c_frac;
444 if (sample_->data_.size() == 0 && use_partial_) {
448 if (sample_->c_== 0.0 || (sample_->data_.size() == 0 && !sample_->has_partial_item())) { sample_ =
nullptr; }
451 template<
typename T,
typename A>
452 ebpps_sample<T, A>::const_iterator::const_iterator(
const const_iterator& other) :
453 sample_(other.sample_),
455 use_partial_(other.use_partial_)
458 template<
typename T,
typename A>
459 typename ebpps_sample<T, A>::const_iterator& ebpps_sample<T, A>::const_iterator::operator++() {
460 if (sample_ ==
nullptr)
462 else if (idx_ == PARTIAL_IDX) {
463 idx_ = sample_->data_.size();
470 if (idx_ == sample_->data_.size()) {
480 template<
typename T,
typename A>
481 typename ebpps_sample<T, A>::const_iterator& ebpps_sample<T, A>::const_iterator::operator++(
int) {
482 const_iterator tmp(*
this);
487 template<
typename T,
typename A>
488 bool ebpps_sample<T, A>::const_iterator::operator==(
const const_iterator& other)
const {
489 if (sample_ != other.sample_)
return false;
490 if (sample_ ==
nullptr)
return true;
491 return idx_ == other.idx_;
494 template<
typename T,
typename A>
495 bool ebpps_sample<T, A>::const_iterator::operator!=(
const const_iterator& other)
const {
496 return !operator==(other);
499 template<
typename T,
typename A>
500 auto ebpps_sample<T, A>::const_iterator::operator*() const -> reference {
501 if (idx_ == PARTIAL_IDX)
502 return *(sample_->partial_item_);
504 return sample_->data_[idx_];
507 template<
typename T,
typename A>
508 auto ebpps_sample<T, A>::const_iterator::operator->() const -> pointer {
512 template<
typename T,
typename A>
513 class ebpps_sample<T, A>::items_deleter {
515 items_deleter(
const A& allocator,
bool destroy,
size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
516 void operator() (T* ptr) {
517 if (ptr !=
nullptr) {
519 for (
size_t i = 0; i < num_; ++i) {
523 allocator_.deallocate(ptr, num_);
526 void set_destroy(
bool destroy) { destroy_ = destroy; }
DataSketches namespace.
Definition: binomial_bounds.hpp:38