20 #ifndef THETA_UNION_BASE_IMPL_HPP_
21 #define THETA_UNION_BASE_IMPL_HPP_
26 #include "conditional_forward.hpp"
30 template<
typename EN,
typename EK,
typename P,
typename S,
typename CS,
typename A>
31 theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
32 float p, uint64_t theta, uint64_t seed,
const P& policy,
const A& allocator):
34 table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator),
35 union_theta_(table_.theta_)
38 template<
typename EN,
typename EK,
typename P,
typename S,
typename CS,
typename A>
40 void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
41 if (sketch.is_empty())
return;
42 if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_))
throw std::invalid_argument(
"seed hash mismatch");
43 table_.is_empty_ =
false;
44 union_theta_ = std::min(union_theta_, sketch.get_theta64());
45 for (
auto&& entry: sketch) {
46 const uint64_t hash = EK()(entry);
47 if (hash < union_theta_ && hash < table_.theta_) {
48 auto result = table_.find(hash);
50 table_.insert(result.first, conditional_forward<SS>(entry));
52 policy_(*result.first, conditional_forward<SS>(entry));
55 if (sketch.is_ordered())
break;
58 union_theta_ = std::min(union_theta_, table_.theta_);
61 template<
typename EN,
typename EK,
typename P,
typename S,
typename CS,
typename A>
62 CS theta_union_base<EN, EK, P, S, CS, A>::get_result(
bool ordered)
const {
63 std::vector<EN, A> entries(table_.allocator_);
64 if (table_.is_empty_)
return CS(
true,
true, compute_seed_hash(table_.seed_), union_theta_, std::move(entries));
65 entries.reserve(table_.num_entries_);
66 uint64_t theta = std::min(union_theta_, table_.theta_);
67 const uint32_t nominal_num = 1 << table_.lg_nom_size_;
68 if (union_theta_ >= table_.theta_) {
69 std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
71 std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
73 if (entries.size() > nominal_num) {
74 std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
75 theta = EK()(entries[nominal_num]);
76 entries.erase(entries.begin() + nominal_num, entries.end());
77 entries.shrink_to_fit();
79 if (ordered) std::sort(entries.begin(), entries.end(), comparator());
80 return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
83 template<
typename EN,
typename EK,
typename P,
typename S,
typename CS,
typename A>
84 const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy()
const {
88 template<
typename EN,
typename EK,
typename P,
typename S,
typename CS,
typename A>
89 void theta_union_base<EN, EK, P, S, CS, A>::reset() {
91 union_theta_ = table_.theta_;
DataSketches namespace.
Definition: binomial_bounds.hpp:38