datasketches-cpp
Loading...
Searching...
No Matches
theta_union_base_impl.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef THETA_UNION_BASE_IMPL_HPP_
21#define THETA_UNION_BASE_IMPL_HPP_
22
23#include <algorithm>
24#include <stdexcept>
25
26#include "conditional_forward.hpp"
27
28namespace datasketches {
29
30template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
31theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
32 float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
33policy_(policy),
34table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator),
35union_theta_(table_.theta_)
36{}
37
38template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
39template<typename SS>
40void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
41 if (sketch.is_empty()) return;
42 if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
43 table_.is_empty_ = false;
44 union_theta_ = std::min(union_theta_, sketch.get_theta64());
45 for (auto&& entry: sketch) {
46 const uint64_t hash = EK()(entry);
47 if (hash < union_theta_ && hash < table_.theta_) {
48 auto result = table_.find(hash);
49 if (!result.second) {
50 table_.insert(result.first, conditional_forward<SS>(entry));
51 } else {
52 policy_(*result.first, conditional_forward<SS>(entry));
53 }
54 } else {
55 if (sketch.is_ordered()) break; // early stop
56 }
57 }
58 union_theta_ = std::min(union_theta_, table_.theta_);
59}
60
61template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
62CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
63 std::vector<EN, A> entries(table_.allocator_);
64 if (table_.is_empty_) return CS(true, true, compute_seed_hash(table_.seed_), union_theta_, std::move(entries));
65 entries.reserve(table_.num_entries_);
66 uint64_t theta = std::min(union_theta_, table_.theta_);
67 const uint32_t nominal_num = 1 << table_.lg_nom_size_;
68 if (union_theta_ >= table_.theta_) {
69 std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
70 } else {
71 std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
72 }
73 if (entries.size() > nominal_num) {
74 std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
75 theta = EK()(entries[nominal_num]);
76 entries.erase(entries.begin() + nominal_num, entries.end());
77 entries.shrink_to_fit();
78 }
79 if (ordered) std::sort(entries.begin(), entries.end(), comparator());
80 return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
81}
82
83template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
84const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
85 return policy_;
86}
87
88template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
89void theta_union_base<EN, EK, P, S, CS, A>::reset() {
90 table_.reset();
91 union_theta_ = table_.theta_;
92}
93
94} /* namespace datasketches */
95
96#endif
DataSketches namespace.
Definition binomial_bounds.hpp:38