datasketches-cpp
Loading...
Searching...
No Matches
density_sketch.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef DENSITY_SKETCH_HPP_
21#define DENSITY_SKETCH_HPP_
22
23#include <type_traits>
24#include <vector>
25#include <functional>
26#include <numeric>
27#include <cmath>
28
29#include "common_defs.hpp"
30
31namespace datasketches {
32
33template<typename T>
34struct gaussian_kernel {
35 T operator()(const std::vector<T>& v1, const std::vector<T>& v2) const {
36 return exp(-std::inner_product(v1.begin(), v1.end(), v2.begin(), 0.0, std::plus<T>(), [](T a, T b){return (a-b)*(a-b);}));
37 }
38};
39
52template<
53 typename T,
54 typename Kernel = gaussian_kernel<T>,
55 typename Allocator = std::allocator<T>
56>
58 static_assert(std::is_floating_point<T>::value, "Floating point type expected");
59
60public:
61 using Vector = std::vector<T, Allocator>;
62 using Level = std::vector<Vector, typename std::allocator_traits<Allocator>::template rebind_alloc<Vector>>;
63 using Levels = std::vector<Level, typename std::allocator_traits<Allocator>::template rebind_alloc<Level>>;
64
72 density_sketch(uint16_t k, uint32_t dim, const Kernel& kernel = Kernel(), const Allocator& allocator = Allocator());
73
78 uint16_t get_k() const;
79
84 uint32_t get_dim() const;
85
90 bool is_empty() const;
91
96 uint64_t get_n() const;
97
102 uint32_t get_num_retained() const;
103
108 bool is_estimation_mode() const;
109
114 template<typename FwdVector>
115 void update(FwdVector&& point);
116
121 template<typename FwdSketch>
122 void merge(FwdSketch&& other);
123
128 T get_estimate(const std::vector<T>& point) const;
129
134 Allocator get_allocator() const;
135
140 void serialize(std::ostream& os) const;
141
142 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>>;
143
151 vector_bytes serialize(unsigned header_size_bytes = 0) const;
152
160 static density_sketch deserialize(std::istream& is,
161 const Kernel& kernel=Kernel(), const Allocator& allocator = Allocator());
162
171 static density_sketch deserialize(const void* bytes, size_t size,
172 const Kernel& kernel=Kernel(), const Allocator& allocator = Allocator());
173
179 string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
180
181 class const_iterator;
182
188 const_iterator begin() const;
189
196 const_iterator end() const;
197
198private:
199 enum flags { RESERVED0, RESERVED1, IS_EMPTY };
200 static const uint8_t PREAMBLE_INTS_SHORT = 3;
201 static const uint8_t PREAMBLE_INTS_LONG = 6;
202 static const uint8_t FAMILY_ID = 19;
203 static const uint8_t SERIAL_VERSION = 1;
204 static const size_t LEVELS_ARRAY_START = 5;
205
206 Allocator allocator_;
207 Kernel kernel_;
208 uint16_t k_;
209 uint32_t dim_;
210 uint32_t num_retained_;
211 uint64_t n_;
212 Levels levels_;
213
214 void compact();
215 void compact_level(unsigned height);
216
217 static void check_k(uint16_t k);
218 static void check_serial_version(uint8_t serial_version);
219 static void check_family_id(uint8_t family_id);
220 static void check_header_validity(uint8_t preamble_ints, uint8_t flags_byte, uint8_t serial_version);
221
222 density_sketch(uint16_t k, uint32_t dim, uint32_t num_retained, uint64_t n, Levels&& levels,
223 const Kernel& kernel = Kernel());
224};
225
226template<typename T, typename K, typename A>
227class density_sketch<T, K, A>::const_iterator {
228public:
229 using Vector = density_sketch<T, K, A>::Vector;
230 using iterator_category = std::input_iterator_tag;
231 using value_type = std::pair<const Vector&, const uint64_t>;
232 using difference_type = void;
233 using pointer = return_value_holder<value_type>;
234 using reference = const value_type;
235 const_iterator& operator++();
236 const_iterator& operator++(int);
237 bool operator==(const const_iterator& other) const;
238 bool operator!=(const const_iterator& other) const;
239 const value_type operator*() const;
240 const return_value_holder<value_type> operator->() const;
241private:
242 using LevelsIterator = typename density_sketch<T, K, A>::Levels::const_iterator;
243 using LevelIterator = typename density_sketch<T, K, A>::Level::const_iterator;
244 LevelsIterator levels_it_;
245 LevelsIterator levels_end_;
246 LevelIterator level_it_;
247 unsigned height_;
248 friend class density_sketch<T, K, A>;
249 const_iterator(LevelsIterator begin, LevelsIterator end);
250};
251
252} /* namespace datasketches */
253
254#include "density_sketch_impl.hpp"
255
256#endif
Density sketch.
Definition density_sketch.hpp:57
density_sketch(uint16_t k, uint32_t dim, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator())
Constructor.
T get_estimate(const std::vector< T > &point) const
Density estimate at a given point.
Definition density_sketch_impl.hpp:115
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition density_sketch_impl.hpp:184
void merge(FwdSketch &&other)
Merges another sketch into this one.
Definition density_sketch_impl.hpp:98
static density_sketch deserialize(const void *bytes, size_t size, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given array of bytes.
uint32_t get_num_retained() const
Returns the number of retained points in the sketch.
Definition density_sketch_impl.hpp:77
uint32_t get_dim() const
Returns configured dimensions.
Definition density_sketch_impl.hpp:62
string< Allocator > to_string(bool print_levels=false, bool print_items=false) const
Prints a summary of the sketch.
Definition density_sketch_impl.hpp:420
uint16_t get_k() const
Returns configured parameter K.
Definition density_sketch_impl.hpp:57
bool is_empty() const
Returns true if this sketch is empty.
Definition density_sketch_impl.hpp:67
void update(FwdVector &&point)
Updates this sketch with a given point.
Definition density_sketch_impl.hpp:88
Allocator get_allocator() const
Returns an instance of the allocator for this sketch.
Definition density_sketch_impl.hpp:127
const_iterator begin() const
Iterator pointing to the first item in the sketch.
Definition density_sketch_impl.hpp:468
static density_sketch deserialize(std::istream &is, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
const_iterator end() const
Iterator pointing to the past-the-end item in the sketch.
Definition density_sketch_impl.hpp:473
bool is_estimation_mode() const
Returns true if this sketch is in estimation mode.
Definition density_sketch_impl.hpp:82
uint64_t get_n() const
Returns the length of the input stream (number of points observed by this sketch).
Definition density_sketch_impl.hpp:72
DataSketches namespace.
Definition binomial_bounds.hpp:38