Density sketch.
More...
#include <density_sketch.hpp>
|
| density_sketch (uint16_t k, uint32_t dim, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator()) |
| Constructor.
|
|
uint16_t | get_k () const |
| Returns configured parameter K.
|
|
uint32_t | get_dim () const |
| Returns configured dimensions.
|
|
bool | is_empty () const |
| Returns true if this sketch is empty.
|
|
uint64_t | get_n () const |
| Returns the length of the input stream (number of points observed by this sketch).
|
|
uint32_t | get_num_retained () const |
| Returns the number of retained points in the sketch.
|
|
bool | is_estimation_mode () const |
| Returns true if this sketch is in estimation mode.
|
|
template<typename FwdVector > |
void | update (FwdVector &&point) |
| Updates this sketch with a given point.
|
|
template<typename FwdSketch > |
void | merge (FwdSketch &&other) |
| Merges another sketch into this one.
|
|
T | get_estimate (const std::vector< T > &point) const |
| Density estimate at a given point.
|
|
Allocator | get_allocator () const |
| Returns an instance of the allocator for this sketch.
|
|
void | serialize (std::ostream &os) const |
| This method serializes the sketch into a given stream in a binary form.
|
|
vector_bytes | serialize (unsigned header_size_bytes=0) const |
| This method serializes the sketch as a vector of bytes.
|
|
string< Allocator > | to_string (bool print_levels=false, bool print_items=false) const |
| Prints a summary of the sketch.
|
|
const_iterator | begin () const |
| Iterator pointing to the first item in the sketch.
|
|
const_iterator | end () const |
| Iterator pointing to the past-the-end item in the sketch.
|
|
|
static density_sketch | deserialize (std::istream &is, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator()) |
| This method deserializes a sketch from a given stream.
|
|
static density_sketch | deserialize (const void *bytes, size_t size, const Kernel &kernel=Kernel(), const Allocator &allocator=Allocator()) |
| This method deserializes a sketch from a given array of bytes.
|
|
template<typename T, typename Kernel = gaussian_kernel<T>, typename Allocator = std::allocator<T>>
class datasketches::density_sketch< T, Kernel, Allocator >
Density sketch.
Builds a coreset from the given set of input points. Provides density estimate at a given point.
Based on the following paper: Zohar Karnin, Edo Liberty "Discrepancy, Coresets, and Sketches in Machine Learning" https://proceedings.mlr.press/v99/karnin19a/karnin19a.pdf
Inspired by the following implementation: https://github.com/edoliberty/streaming-quantiles/blob/f688c8161a25582457b0a09deb4630a81406293b/gde.py
◆ density_sketch()
template<typename T , typename Kernel = gaussian_kernel<T>, typename Allocator = std::allocator<T>>
density_sketch |
( |
uint16_t |
k, |
|
|
uint32_t |
dim, |
|
|
const Kernel & |
kernel = Kernel() , |
|
|
const Allocator & |
allocator = Allocator() |
|
) |
| |
Constructor.
- Parameters
-
k | controls the size and error of the sketch. |
dim | dimension of the input domain |
kernel | to use by this instance |
allocator | to use by this instance |
◆ get_k()
template<typename T , typename K , typename A >
Returns configured parameter K.
- Returns
- parameter K
◆ get_dim()
template<typename T , typename K , typename A >
uint32_t get_dim |
( |
| ) |
const |
Returns configured dimensions.
- Returns
- dimensions
◆ is_empty()
template<typename T , typename K , typename A >
Returns true if this sketch is empty.
- Returns
- empty flag
◆ get_n()
template<typename T , typename K , typename A >
Returns the length of the input stream (number of points observed by this sketch).
- Returns
- stream length
◆ get_num_retained()
template<typename T , typename K , typename A >
uint32_t get_num_retained |
( |
| ) |
const |
Returns the number of retained points in the sketch.
- Returns
- number of retained points
◆ is_estimation_mode()
template<typename T , typename K , typename A >
bool is_estimation_mode |
( |
| ) |
const |
Returns true if this sketch is in estimation mode.
- Returns
- estimation mode flag
◆ update()
template<typename T , typename K , typename A >
template<typename FwdVector >
void update |
( |
FwdVector && |
point | ) |
|
Updates this sketch with a given point.
- Parameters
-
◆ merge()
template<typename T , typename K , typename A >
template<typename FwdSketch >
void merge |
( |
FwdSketch && |
other | ) |
|
Merges another sketch into this one.
- Parameters
-
other | sketch to merge into this one |
◆ get_estimate()
template<typename T , typename K , typename A >
T get_estimate |
( |
const std::vector< T > & |
point | ) |
const |
Density estimate at a given point.
- Returns
- density estimate at a given point
◆ get_allocator()
template<typename T , typename K , typename A >
A get_allocator |
( |
| ) |
const |
Returns an instance of the allocator for this sketch.
- Returns
- allocator
◆ serialize() [1/2]
template<typename T , typename K , typename A >
void serialize |
( |
std::ostream & |
os | ) |
const |
This method serializes the sketch into a given stream in a binary form.
- Parameters
-
◆ serialize() [2/2]
template<typename T , typename K , typename A >
auto serialize |
( |
unsigned |
header_size_bytes = 0 | ) |
const |
This method serializes the sketch as a vector of bytes.
An optional header can be reserved in front of the sketch. It is an uninitialized space of a given size. This header is used in Datasketches PostgreSQL extension.
- Parameters
-
header_size_bytes | space to reserve in front of the sketch |
◆ deserialize() [1/2]
template<typename T , typename Kernel = gaussian_kernel<T>, typename Allocator = std::allocator<T>>
static density_sketch deserialize |
( |
std::istream & |
is, |
|
|
const Kernel & |
kernel = Kernel() , |
|
|
const Allocator & |
allocator = Allocator() |
|
) |
| |
|
static |
This method deserializes a sketch from a given stream.
- Parameters
-
is | input stream |
kernel | the kernel function to use for this sketch |
allocator | the memory allocator to use with this sketch |
- Returns
- an instance of the sketch
◆ deserialize() [2/2]
template<typename T , typename Kernel = gaussian_kernel<T>, typename Allocator = std::allocator<T>>
static density_sketch deserialize |
( |
const void * |
bytes, |
|
|
size_t |
size, |
|
|
const Kernel & |
kernel = Kernel() , |
|
|
const Allocator & |
allocator = Allocator() |
|
) |
| |
|
static |
This method deserializes a sketch from a given array of bytes.
- Parameters
-
bytes | pointer to the array of bytes |
size | the size of the array |
kernel | the kernel function to use for this sketch |
allocator | the memory allocator to use with this sketch |
- Returns
- an instance of the sketch
◆ to_string()
template<typename T , typename K , typename A >
string< A > to_string |
( |
bool |
print_levels = false , |
|
|
bool |
print_items = false |
|
) |
| const |
Prints a summary of the sketch.
- Parameters
-
print_levels | if true include information about levels |
print_items | if true include sketch data |
◆ begin()
template<typename T , typename K , typename A >
Iterator pointing to the first item in the sketch.
If the sketch is empty, the returned iterator must not be dereferenced or incremented.
- Returns
- iterator pointing to the first item in the sketch
◆ end()
template<typename T , typename K , typename A >
Iterator pointing to the past-the-end item in the sketch.
The past-the-end item is the hypothetical item that would follow the last item. It does not point to any item, and must not be dereferenced or incremented.
- Returns
- iterator pointing to the past-the-end item in the sketch
The documentation for this class was generated from the following files: