20 #ifndef DENSITY_SKETCH_HPP_
21 #define DENSITY_SKETCH_HPP_
23 #include <type_traits>
29 #include "common_defs.hpp"
34 struct gaussian_kernel {
35 T operator()(
const std::vector<T>& v1,
const std::vector<T>& v2)
const {
36 return exp(-std::inner_product(v1.begin(), v1.end(), v2.begin(), 0.0, std::plus<T>(), [](T a, T b){return (a-b)*(a-b);}));
54 typename Kernel = gaussian_kernel<T>,
55 typename Allocator = std::allocator<T>
58 static_assert(std::is_floating_point<T>::value,
"Floating point type expected");
61 using Vector = std::vector<T, Allocator>;
62 using Level = std::vector<Vector, typename std::allocator_traits<Allocator>::template rebind_alloc<Vector>>;
63 using Levels = std::vector<Level, typename std::allocator_traits<Allocator>::template rebind_alloc<Level>>;
72 density_sketch(uint16_t k, uint32_t dim,
const Kernel& kernel = Kernel(),
const Allocator& allocator = Allocator());
78 uint16_t
get_k()
const;
96 uint64_t
get_n()
const;
114 template<
typename FwdVector>
115 void update(FwdVector&& point);
121 template<
typename FwdSketch>
122 void merge(FwdSketch&& other);
142 using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>>;
151 vector_bytes
serialize(
unsigned header_size_bytes = 0)
const;
161 const Kernel& kernel=Kernel(),
const Allocator& allocator = Allocator());
172 const Kernel& kernel=Kernel(),
const Allocator& allocator = Allocator());
179 string<Allocator>
to_string(
bool print_levels =
false,
bool print_items =
false)
const;
181 class const_iterator;
188 const_iterator
begin()
const;
196 const_iterator
end()
const;
199 enum flags { RESERVED0, RESERVED1, IS_EMPTY };
200 static const uint8_t PREAMBLE_INTS_SHORT = 3;
201 static const uint8_t PREAMBLE_INTS_LONG = 6;
202 static const uint8_t FAMILY_ID = 19;
203 static const uint8_t SERIAL_VERSION = 1;
204 static const size_t LEVELS_ARRAY_START = 5;
206 Allocator allocator_;
210 uint32_t num_retained_;
215 void compact_level(
unsigned height);
217 static void check_k(uint16_t k);
218 static void check_serial_version(uint8_t serial_version);
219 static void check_family_id(uint8_t family_id);
220 static void check_header_validity(uint8_t preamble_ints, uint8_t flags_byte, uint8_t serial_version);
222 density_sketch(uint16_t k, uint32_t dim, uint32_t num_retained, uint64_t n, Levels&& levels,
223 const Kernel& kernel = Kernel());
226 template<
typename T,
typename K,
typename A>
227 class density_sketch<T, K, A>::const_iterator {
229 using Vector = density_sketch<T, K, A>::Vector;
230 using iterator_category = std::input_iterator_tag;
231 using value_type = std::pair<const Vector&, const uint64_t>;
232 using difference_type = void;
233 using pointer = return_value_holder<value_type>;
234 using reference =
const value_type;
235 const_iterator& operator++();
236 const_iterator& operator++(
int);
237 bool operator==(
const const_iterator& other)
const;
238 bool operator!=(
const const_iterator& other)
const;
239 const value_type operator*()
const;
240 const return_value_holder<value_type> operator->()
const;
242 using LevelsIterator =
typename density_sketch<T, K, A>::Levels::const_iterator;
243 using LevelIterator =
typename density_sketch<T, K, A>::Level::const_iterator;
244 LevelsIterator levels_it_;
245 LevelsIterator levels_end_;
246 LevelIterator level_it_;
249 const_iterator(LevelsIterator
begin, LevelsIterator
end);
254 #include "density_sketch_impl.hpp"
Density sketch.
Definition: density_sketch.hpp:57
density_sketch(uint16_t k, uint32_t dim, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator())
Constructor.
T get_estimate(const std::vector< T > &point) const
Density estimate at a given point.
Definition: density_sketch_impl.hpp:115
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition: density_sketch_impl.hpp:184
void merge(FwdSketch &&other)
Merges another sketch into this one.
Definition: density_sketch_impl.hpp:98
static density_sketch deserialize(const void *bytes, size_t size, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given array of bytes.
uint32_t get_num_retained() const
Returns the number of retained points in the sketch.
Definition: density_sketch_impl.hpp:77
uint32_t get_dim() const
Returns configured dimensions.
Definition: density_sketch_impl.hpp:62
string< Allocator > to_string(bool print_levels=false, bool print_items=false) const
Prints a summary of the sketch.
Definition: density_sketch_impl.hpp:420
uint16_t get_k() const
Returns configured parameter K.
Definition: density_sketch_impl.hpp:57
bool is_empty() const
Returns true if this sketch is empty.
Definition: density_sketch_impl.hpp:67
void update(FwdVector &&point)
Updates this sketch with a given point.
Definition: density_sketch_impl.hpp:88
Allocator get_allocator() const
Returns an instance of the allocator for this sketch.
Definition: density_sketch_impl.hpp:127
const_iterator begin() const
Iterator pointing to the first item in the sketch.
Definition: density_sketch_impl.hpp:468
static density_sketch deserialize(std::istream &is, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
const_iterator end() const
Iterator pointing to the past-the-end item in the sketch.
Definition: density_sketch_impl.hpp:473
bool is_estimation_mode() const
Returns true if this sketch is in estimation mode.
Definition: density_sketch_impl.hpp:82
uint64_t get_n() const
Returns the length of the input stream (number of points observed by this sketch).
Definition: density_sketch_impl.hpp:72
DataSketches namespace.
Definition: binomial_bounds.hpp:38