|
| var_opt_sketch (uint32_t k, resize_factor rf=var_opt_constants::DEFAULT_RESIZE_FACTOR, const A &allocator=A()) |
| Constructor.
|
|
| var_opt_sketch (const var_opt_sketch &other) |
| Copy constructor.
|
|
| var_opt_sketch (var_opt_sketch &&other) noexcept |
| Move constructor.
|
|
var_opt_sketch & | operator= (const var_opt_sketch &other) |
| Copy assignment.
|
|
var_opt_sketch & | operator= (var_opt_sketch &&other) |
| Move assignment.
|
|
void | update (const T &item, double weight=1.0) |
| Updates this sketch with the given data item with the given weight.
|
|
void | update (T &&item, double weight=1.0) |
| Updates this sketch with the given data item with the given weight.
|
|
uint32_t | get_k () const |
| Returns the configured maximum sample size.
|
|
uint64_t | get_n () const |
| Returns the length of the input stream.
|
|
uint32_t | get_num_samples () const |
| Returns the number of samples currently in the sketch.
|
|
template<typename P > |
subset_summary | estimate_subset_sum (P predicate) const |
| Computes an estimated subset sum from the entire stream for objects matching a given predicate.
|
|
bool | is_empty () const |
| Returns true if the sketch is empty.
|
|
void | reset () |
| Resets the sketch to its default, empty state.
|
|
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if< std::is_arithmetic< TT >::value, int >::type = 0> |
size_t | get_serialized_size_bytes (const SerDe &sd=SerDe()) const |
| Computes size needed to serialize the current state of the sketch.
|
|
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic< TT >::value, int >::type = 0> |
size_t | get_serialized_size_bytes (const SerDe &sd=SerDe()) const |
| Computes size needed to serialize the current state of the sketch.
|
|
template<typename SerDe = serde<T>> |
vector_bytes | serialize (unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const |
| This method serializes the sketch as a vector of bytes.
|
|
template<typename SerDe = serde<T>> |
void | serialize (std::ostream &os, const SerDe &sd=SerDe()) const |
| This method serializes the sketch into a given stream in a binary form.
|
|
string< A > | to_string () const |
| Prints a summary of the sketch.
|
|
string< A > | items_to_string () const |
| Prints the raw sketch items to a string.
|
|
const_iterator | begin () const |
| Iterator pointing to the first item in the sketch.
|
|
const_iterator | end () const |
| Iterator pointing to the past-the-end item in the sketch.
|
|
template<typename T, typename A = std::allocator<T>>
class datasketches::var_opt_sketch< T, A >
This sketch samples data from a stream of items.
Designed for optimal (minimum) variance when querying the sketch to estimate subset sums of items matching a provided predicate. Variance optimal (varopt) sampling is related to reservoir sampling, with improved error bounds for subset sum estimation.
author Kevin Lang author Jon Malkin
template<typename T , typename A >
template<typename P >
subset_summary estimate_subset_sum |
( |
P |
predicate | ) |
const |
Computes an estimated subset sum from the entire stream for objects matching a given predicate.
Provides a lower bound, estimate, and upper bound using a target of 2 standard deviations. This is technically a heuristic method and tries to err on the conservative side.
- Parameters
-
predicate | a predicate function |
- Returns
- a subset_summary item with estimate, upper and lower bounds, and total sketch weight
template<typename T , typename A >
template<typename TT , typename SerDe , typename std::enable_if<!std::is_arithmetic< TT >::value, int >::type >
size_t get_serialized_size_bytes |
( |
const SerDe & |
sd = SerDe() | ) |
const |
|
inline |
Computes size needed to serialize the current state of the sketch.
This version is for fixed-size arithmetic types (integral and floating point).
- Parameters
-
- Returns
- size in bytes needed to serialize this sketch
template<typename T , typename A = std::allocator<T>>
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic< TT >::value, int >::type = 0>
size_t get_serialized_size_bytes |
( |
const SerDe & |
sd = SerDe() | ) |
const |
|
inline |
Computes size needed to serialize the current state of the sketch.
This version is for all other types and can be expensive since every item needs to be looked at.
- Parameters
-
- Returns
- size in bytes needed to serialize this sketch
template<typename T , typename A = std::allocator<T>>
template<typename SerDe = serde<T>>
vector_bytes serialize |
( |
unsigned |
header_size_bytes = 0 , |
|
|
const SerDe & |
sd = SerDe() |
|
) |
| const |
This method serializes the sketch as a vector of bytes.
An optional header can be reserved in front of the sketch. It is a blank space of a given size. This header is used in Datasketches PostgreSQL extension.
- Parameters
-
header_size_bytes | space to reserve in front of the sketch |
sd | instance of a SerDe |
template<typename T , typename A >
string< A > items_to_string |
( |
| ) |
const |
Prints the raw sketch items to a string.
Calls items_to_stream() internally. Only works for type T with a defined std::ostream& operator<<(std::ostream&, const T&) and kept separate from to_string() to allow compilation even if T does not have such an operator defined.
- Returns
- a string with the sketch items