|
| | var_opt_sketch (uint32_t k, resize_factor rf=var_opt_constants::DEFAULT_RESIZE_FACTOR, const A &allocator=A()) |
| | Constructor.
|
| |
| | var_opt_sketch (const var_opt_sketch &other) |
| | Copy constructor.
|
| |
| | var_opt_sketch (var_opt_sketch &&other) noexcept |
| | Move constructor.
|
| |
| var_opt_sketch & | operator= (const var_opt_sketch &other) |
| | Copy assignment.
|
| |
| var_opt_sketch & | operator= (var_opt_sketch &&other) |
| | Move assignment.
|
| |
| void | update (const T &item, double weight=1.0) |
| | Updates this sketch with the given data item with the given weight.
|
| |
| void | update (T &&item, double weight=1.0) |
| | Updates this sketch with the given data item with the given weight.
|
| |
| uint32_t | get_k () const |
| | Returns the configured maximum sample size.
|
| |
| uint64_t | get_n () const |
| | Returns the length of the input stream.
|
| |
| uint32_t | get_num_samples () const |
| | Returns the number of samples currently in the sketch.
|
| |
| template<typename P > |
| subset_summary | estimate_subset_sum (P predicate) const |
| | Computes an estimated subset sum from the entire stream for objects matching a given predicate.
|
| |
| bool | is_empty () const |
| | Returns true if the sketch is empty.
|
| |
|
void | reset () |
| | Resets the sketch to its default, empty state.
|
| |
| template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if< std::is_arithmetic< TT >::value, int >::type = 0> |
| size_t | get_serialized_size_bytes (const SerDe &sd=SerDe()) const |
| | Computes size needed to serialize the current state of the sketch.
|
| |
| template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic< TT >::value, int >::type = 0> |
| size_t | get_serialized_size_bytes (const SerDe &sd=SerDe()) const |
| | Computes size needed to serialize the current state of the sketch.
|
| |
| template<typename SerDe = serde<T>> |
| vector_bytes | serialize (unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const |
| | This method serializes the sketch as a vector of bytes.
|
| |
| template<typename SerDe = serde<T>> |
| void | serialize (std::ostream &os, const SerDe &sd=SerDe()) const |
| | This method serializes the sketch into a given stream in a binary form.
|
| |
| string< A > | to_string () const |
| | Prints a summary of the sketch.
|
| |
| string< A > | items_to_string () const |
| | Prints the raw sketch items to a string.
|
| |
| const_iterator | begin () const |
| | Iterator pointing to the first item in the sketch.
|
| |
| const_iterator | end () const |
| | Iterator pointing to the past-the-end item in the sketch.
|
| |
template<typename T, typename A = std::allocator<T>>
class datasketches::var_opt_sketch< T, A >
This sketch samples data from a stream of items.
Designed for optimal (minimum) variance when querying the sketch to estimate subset sums of items matching a provided predicate. Variance optimal (varopt) sampling is related to reservoir sampling, with improved error bounds for subset sum estimation.
author Kevin Lang author Jon Malkin
template<typename T , typename A >
template<typename P >
| subset_summary estimate_subset_sum |
( |
P |
predicate | ) |
const |
Computes an estimated subset sum from the entire stream for objects matching a given predicate.
Provides a lower bound, estimate, and upper bound using a target of 2 standard deviations. This is technically a heuristic method and tries to err on the conservative side.
- Parameters
-
| predicate | a predicate function |
- Returns
- a subset_summary item with estimate, upper and lower bounds, and total sketch weight
template<typename T , typename A >
template<typename TT , typename SerDe , typename std::enable_if<!std::is_arithmetic< TT >::value, int >::type >
| size_t get_serialized_size_bytes |
( |
const SerDe & |
sd = SerDe() | ) |
const |
|
inline |
Computes size needed to serialize the current state of the sketch.
This version is for fixed-size arithmetic types (integral and floating point).
- Parameters
-
- Returns
- size in bytes needed to serialize this sketch
template<typename T , typename A = std::allocator<T>>
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic< TT >::value, int >::type = 0>
| size_t get_serialized_size_bytes |
( |
const SerDe & |
sd = SerDe() | ) |
const |
|
inline |
Computes size needed to serialize the current state of the sketch.
This version is for all other types and can be expensive since every item needs to be looked at.
- Parameters
-
- Returns
- size in bytes needed to serialize this sketch
template<typename T , typename A = std::allocator<T>>
template<typename SerDe = serde<T>>
| vector_bytes serialize |
( |
unsigned |
header_size_bytes = 0, |
|
|
const SerDe & |
sd = SerDe() |
|
) |
| const |
This method serializes the sketch as a vector of bytes.
An optional header can be reserved in front of the sketch. It is a blank space of a given size. This header is used in Datasketches PostgreSQL extension.
- Parameters
-
| header_size_bytes | space to reserve in front of the sketch |
| sd | instance of a SerDe |
template<typename T , typename A >
| string< A > items_to_string |
( |
| ) |
const |
Prints the raw sketch items to a string.
Calls items_to_stream() internally. Only works for type T with a defined std::ostream& operator<<(std::ostream&, const T&) and kept separate from to_string() to allow compilation even if T does not have such an operator defined.
- Returns
- a string with the sketch items