Relative Error Quantiles Sketch. More...

#include <req_sketch.hpp>

Public Types
using	quantile_return_type = typename quantiles_sorted_view< T, Comparator, Allocator >::quantile_return_type
	Quantile return type. More...

Public Member Functions
	req_sketch (uint16_t k, bool hra=true, const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
	Constructor. More...

	req_sketch (const req_sketch &other)
	Copy constructor. More...

	req_sketch (req_sketch &&other) noexcept
	Move constructor. More...

req_sketch &	operator= (const req_sketch &other)
	Copy assignment. More...

req_sketch &	operator= (req_sketch &&other)
	Move assignment. More...

template<typename TT , typename CC , typename AA >
	req_sketch (const req_sketch< TT, CC, AA > &other, const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
	Type converting constructor. More...

uint16_t	get_k () const
	Returns configured parameter K. More...

bool	is_HRA () const
	Returns configured parameter High Rank Accuracy. More...

bool	is_empty () const
	Returns true if this sketch is empty. More...

uint64_t	get_n () const
	Returns the length of the input stream. More...

uint32_t	get_num_retained () const
	Returns the number of retained items in the sketch. More...

bool	is_estimation_mode () const
	Returns true if this sketch is in estimation mode. More...

template<typename FwdT >
void	update (FwdT &&item)
	Updates this sketch with the given data item. More...

template<typename FwdSk >
void	merge (FwdSk &&other)
	Merges another sketch into this one. More...

const T &	get_min_item () const
	Returns the min item of the stream. More...

const T &	get_max_item () const
	Returns the max item of the stream. More...

Comparator	get_comparator () const
	Returns an instance of the comparator for this sketch. More...

Allocator	get_allocator () const
	Returns an instance of the allocator for this sketch. More...

double	get_rank (const T &item, bool inclusive=true) const
	Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive. More...

vector_double	get_PMF (const T *split_points, uint32_t size, bool inclusive=true) const
	Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of split points (items). More...

vector_double	get_CDF (const T *split_points, uint32_t size, bool inclusive=true) const
	Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analog of the PMF, of the input stream given a set of split points (items). More...

quantile_return_type	get_quantile (double rank, bool inclusive=true) const
	Returns an approximate quantile of the given normalized rank. More...

double	get_rank_lower_bound (double rank, uint8_t num_std_dev) const
	Returns an approximate lower bound of the given normalized rank. More...

double	get_rank_upper_bound (double rank, uint8_t num_std_dev) const
	Returns an approximate upper bound of the given normalized rank. More...

template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if< std::is_arithmetic< TT >::value, int >::type = 0>
size_t	get_serialized_size_bytes (const SerDe &sd=SerDe()) const
	Computes size needed to serialize the current state of the sketch. More...

template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic< TT >::value, int >::type = 0>
size_t	get_serialized_size_bytes (const SerDe &sd=SerDe()) const
	Computes size needed to serialize the current state of the sketch. More...

template<typename SerDe = serde<T>>
void	serialize (std::ostream &os, const SerDe &sd=SerDe()) const
	This method serializes the sketch into a given stream in a binary form. More...

template<typename SerDe = serde<T>>
vector_bytes	serialize (unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const
	This method serializes the sketch as a vector of bytes. More...

string< Allocator >	to_string (bool print_levels=false, bool print_items=false) const
	Prints a summary of the sketch. More...

const_iterator	begin () const
	Iterator pointing to the first item in the sketch. More...

const_iterator	end () const
	Iterator pointing to the past-the-end item in the sketch. More...

quantiles_sorted_view< T, Comparator, Allocator >	get_sorted_view () const
	Gets the sorted view of this sketch. More...

Static Public Member Functions
static double	get_RSE (uint16_t k, double rank, bool hra, uint64_t n)
	Returns an a priori estimate of relative standard error (RSE, expressed as a number in [0,1]). More...

template<typename SerDe = serde<T>>
static req_sketch	deserialize (std::istream &is, const SerDe &sd=SerDe(), const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
	This method deserializes a sketch from a given stream. More...

template<typename SerDe = serde<T>>
static req_sketch	deserialize (const void *bytes, size_t size, const SerDe &sd=SerDe(), const Comparator &comparator=Comparator(), const Allocator &allocator=Allocator())
	This method deserializes a sketch from a given array of bytes. More...

Detailed Description

template<typename T, typename Comparator = std::less<T>, typename Allocator = std::allocator<T>>
class datasketches::req_sketch< T, Comparator, Allocator >

Relative Error Quantiles Sketch.

This is an implementation based on the paper "Relative Error Streaming Quantiles" by Graham Cormode, Zohar Karnin, Edo Liberty, Justin Thaler, Pavel Veselý, and loosely derived from a Python prototype written by Pavel Veselý.

Reference: https://arxiv.org/abs/2004.01668

This implementation differs from the algorithm described in the paper in the following:

The algorithm requires no upper bound on the stream length. Instead, each relative-compactor counts the number of compaction operations performed so far (via variable state). Initially, the relative-compactor starts with INIT_NUMBER_OF_SECTIONS. Each time the number of compactions (variable state) exceeds 2^{numSections - 1}, we double numSections. Note that after merging the sketch with another one variable state may not correspond to the number of compactions performed at a particular level, however, since the state variable never exceeds the number of compactions, the guarantees of the sketch remain valid.
The size of each section (variable k and section_size in the code and parameter k in the paper) is initialized with a number set by the user via variable k. When the number of sections doubles, we decrease section_size by a factor of sqrt(2). This is applied at each level separately. Thus, when we double the number of sections, the nominal compactor size increases by a factor of approx. sqrt(2) (+/- rounding).
The merge operation here does not perform "special compactions", which are used in the paper to allow for a tight mathematical analysis of the sketch.

This implementation provides a number of capabilities not discussed in the paper or provided in the Python prototype.

The Python prototype only implemented high accuracy for low ranks. This implementation provides the user with the ability to choose either high rank accuracy or low rank accuracy at the time of sketch construction.
The Python prototype only implemented a comparison criterion of "INCLUSIVE". This implementation allows the user to use both the "INCLUSIVE" criterion and the "EXCLUSIVE" criterion.
This implementation provides extensive debug visibility into the operation of the sketch with two levels of detail output. This is not only useful for debugging, but is a powerful tool to help users understand how the sketch works.

Member Typedef Documentation

◆ quantile_return_type

using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type

Quantile return type.

This is to return quantiles either by value (for arithmetic types) or by const reference (for all other types)

Constructor & Destructor Documentation

◆ req_sketch() [1/4]

req_sketch	(	uint16_t	k,
		bool	hra = `true`,
		const Comparator &	comparator = `Comparator()`,
		const Allocator &	allocator = `Allocator()`
	)

explicit

Constructor.

Parameters

k	Controls the size and error of the sketch. It must be even and in the range [4, 1024], inclusive. Value of 12 roughly corresponds to 1% relative error guarantee at 95% confidence.
hra	if true, the default, the high ranks are prioritized for better accuracy. Otherwise the low ranks are prioritized for better accuracy.
comparator	strict weak ordering function (see C++ named requirements: Compare)
allocator	used by this sketch to allocate memory

◆ req_sketch() [2/4]

req_sketch ( const req_sketch< T, Comparator, Allocator > & other )

Copy constructor.

Parameters

other sketch to be copied

◆ req_sketch() [3/4]

req_sketch ( req_sketch< T, Comparator, Allocator > && other )

noexcept

Move constructor.

Parameters

other sketch to be moved

◆ req_sketch() [4/4]

req_sketch	(	const req_sketch< TT, CC, AA > &	other,
		const Comparator &	comparator = `Comparator()`,
		const Allocator &	allocator = `Allocator()`
	)

explicit

Type converting constructor.

Parameters

other	sketch of a different type
comparator	instance of a Comparator
allocator	instance of an Allocator

Member Function Documentation

◆ operator=() [1/2]

req_sketch< T, C, A > & operator= ( const req_sketch< T, Comparator, Allocator > & other )

Copy assignment.

Parameters

other sketch to be copied

Returns: reference to this sketch

◆ operator=() [2/2]

req_sketch< T, C, A > & operator= ( req_sketch< T, Comparator, Allocator > && other )

Move assignment.

Parameters

other sketch to be moved

Returns: reference to this sketch

◆ get_k()

uint16_t get_k

Returns configured parameter K.

Returns: parameter K

◆ is_HRA()

bool is_HRA

Returns configured parameter High Rank Accuracy.

Returns: parameter HRA

◆ is_empty()

bool is_empty

Returns true if this sketch is empty.

Returns: empty flag

◆ get_n()

uint64_t get_n

Returns the length of the input stream.

Returns: stream length

◆ get_num_retained()

uint32_t get_num_retained

Returns the number of retained items in the sketch.

Returns: number of retained items

◆ is_estimation_mode()

bool is_estimation_mode

Returns true if this sketch is in estimation mode.

Returns: estimation mode flag

◆ update()

void update ( FwdT && item )

Updates this sketch with the given data item.

Parameters

item	from a stream of items

◆ merge()

void merge ( FwdSk && other )

Merges another sketch into this one.

Parameters

other sketch to merge into this one

◆ get_min_item()

const T & get_min_item

Returns the min item of the stream.

If the sketch is empty this throws std::runtime_error.

Returns: the min item of the stream

◆ get_max_item()

const T & get_max_item

Returns the max item of the stream.

If the sketch is empty this throws std::runtime_error.

Returns: the max item of the stream

◆ get_comparator()

C get_comparator

Returns an instance of the comparator for this sketch.

Returns: comparator

◆ get_allocator()

A get_allocator

Returns an instance of the allocator for this sketch.

Returns: allocator

◆ get_rank()

double get_rank	(	const T &	item,
		bool	inclusive = `true`
	)		const

Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.

If the sketch is empty this throws std::runtime_error.

Parameters

item	to be ranked.
inclusive	if true the weight of the given item is included into the rank. Otherwise the rank equals the sum of the weights of all items that are less than the given item according to the comparator C.

Returns: an approximate rank of the given item

◆ get_PMF()

auto get_PMF	(	const T *	split_points,
		uint32_t	size,
		bool	inclusive = `true`
	)		const

Returns an approximation to the Probability Mass Function (PMF) of the input stream given a set of split points (items).

If the sketch is empty this throws std::runtime_error.

Parameters

split_points	an array of m unique, monotonically increasing items that divide the input domain into m+1 consecutive disjoint intervals (bins).
size	the number of split points in the array
inclusive	if true the rank of an item includes its own weight, and therefore if the sketch contains items equal to a slit point, then in PMF such items are included into the interval to the left of split point. Otherwise they are included into the interval to the right of split point.

Returns: an array of m+1 doubles each of which is an approximation to the fraction of the input stream items (the mass) that fall into one of those intervals.

◆ get_CDF()

auto get_CDF	(	const T *	split_points,
		uint32_t	size,
		bool	inclusive = `true`
	)		const

Returns an approximation to the Cumulative Distribution Function (CDF), which is the cumulative analog of the PMF, of the input stream given a set of split points (items).

If the sketch is empty this throws std::runtime_error.

Parameters

split_points	an array of m unique, monotonically increasing items that divide the input domain into m+1 consecutive disjoint intervals.
size	the number of split points in the array
inclusive	if true the rank of an item includes its own weight, and therefore if the sketch contains items equal to a slit point, then in CDF such items are included into the interval to the left of split point. Otherwise they are included into the interval to the right of split point.

Returns: an array of m+1 doubles, which are a consecutive approximation to the CDF of the input stream given the split_points. The value at array position j of the returned CDF array is the sum of the returned values in positions 0 through j of the returned PMF array. This can be viewed as array of ranks of the given split points plus one more value that is always 1.

◆ get_quantile()

auto get_quantile	(	double	rank,
		bool	inclusive = `true`
	)		const

Returns an approximate quantile of the given normalized rank.

The normalized rank must be in the range [0.0, 1.0] (both inclusive).

If the sketch is empty this throws std::runtime_error.

Parameters

rank	of an item in the hypothetical sorted stream.
inclusive	if true, the given rank is considered inclusive (includes weight of an item)

Returns: approximate quantile associated with the given rank

◆ get_rank_lower_bound()

double get_rank_lower_bound	(	double	rank,
		uint8_t	num_std_dev
	)		const

Returns an approximate lower bound of the given normalized rank.

Parameters

rank	the given rank, a value between 0 and 1.0.
num_std_dev	the number of standard deviations. Must be 1, 2, or 3.

Returns: an approximate lower bound rank.

◆ get_rank_upper_bound()

double get_rank_upper_bound	(	double	rank,
		uint8_t	num_std_dev
	)		const

Returns an approximate upper bound of the given normalized rank.

Parameters

rank	the given rank, a value between 0 and 1.0.
num_std_dev	the number of standard deviations. Must be 1, 2, or 3.

Returns: an approximate upper bound rank.

◆ get_RSE()

double get_RSE	(	uint16_t	k,
		double	rank,
		bool	hra,
		uint64_t	n
	)

static

Returns an a priori estimate of relative standard error (RSE, expressed as a number in [0,1]).

Derived from Lemma 12 in https://arxiv.org/abs/2004.01668v2, but the constant factors were modified based on empirical measurements.

Parameters

k	the given value of k
rank	the given normalized rank, a number in [0,1].
hra	if true High Rank Accuracy mode is being selected, otherwise, Low Rank Accuracy.
n	an estimate of the total number of items submitted to the sketch.

Returns: an a priori estimate of relative standard error (RSE, expressed as a number in [0,1]).

◆ get_serialized_size_bytes() [1/2]

size_t get_serialized_size_bytes ( const SerDe & sd = SerDe() ) const

Computes size needed to serialize the current state of the sketch.

This version is for fixed-size arithmetic types (integral and floating point).

Parameters

sd	instance of a SerDe

Returns: size in bytes needed to serialize this sketch

◆ get_serialized_size_bytes() [2/2]

size_t get_serialized_size_bytes ( const SerDe & sd = SerDe() ) const

Computes size needed to serialize the current state of the sketch.

This version is for all other types and can be expensive since every item needs to be looked at.

Parameters

sd	instance of a SerDe

Returns: size in bytes needed to serialize this sketch

◆ serialize() [1/2]

void serialize	(	std::ostream &	os,
		const SerDe &	sd = `SerDe()`
	)		const

This method serializes the sketch into a given stream in a binary form.

Parameters

os	output stream
sd	instance of a SerDe

◆ serialize() [2/2]

vector_bytes serialize	(	unsigned	header_size_bytes = `0`,
		const SerDe &	sd = `SerDe()`
	)		const

This method serializes the sketch as a vector of bytes.

An optional header can be reserved in front of the sketch. It is a blank space of a given size. This header is used in Datasketches PostgreSQL extension.

Parameters

header_size_bytes	space to reserve in front of the sketch
sd	instance of a SerDe

◆ deserialize() [1/2]

static req_sketch deserialize	(	std::istream &	is,
		const SerDe &	sd = `SerDe()`,
		const Comparator &	comparator = `Comparator()`,
		const Allocator &	allocator = `Allocator()`
	)

static

This method deserializes a sketch from a given stream.

Parameters

is	input stream
sd	instance of a SerDe
comparator	instance of a Comparator
allocator	instance of an Allocator

Returns: an instance of a sketch

◆ deserialize() [2/2]

static req_sketch deserialize	(	const void *	bytes,
		size_t	size,
		const SerDe &	sd = `SerDe()`,
		const Comparator &	comparator = `Comparator()`,
		const Allocator &	allocator = `Allocator()`
	)

static

This method deserializes a sketch from a given array of bytes.

Parameters

bytes	pointer to the array of bytes
size	the size of the array
sd	instance of a SerDe
comparator	instance of a Comparator
allocator	instance of an Allocator

Returns: an instance of a sketch

◆ to_string()

string< A > to_string	(	bool	print_levels = `false`,
		bool	print_items = `false`
	)		const

Prints a summary of the sketch.

Parameters

print_levels	if true include information about levels
print_items	if true include sketch data

◆ begin()

auto begin

Iterator pointing to the first item in the sketch.

If the sketch is empty, the returned iterator must not be dereferenced or incremented.

Returns: iterator pointing to the first item in the sketch

◆ end()

auto end

Iterator pointing to the past-the-end item in the sketch.

The past-the-end item is the hypothetical item that would follow the last item. It does not point to any item, and must not be dereferenced or incremented.

Returns: iterator pointing to the past-the-end item in the sketch

◆ get_sorted_view()

quantiles_sorted_view< T, C, A > get_sorted_view

Gets the sorted view of this sketch.

Returns: the sorted view of this sketch

The documentation for this class was generated from the following files:

req/include/req_sketch.hpp
req/include/req_sketch_impl.hpp

Public Types

Public Member Functions

Static Public Member Functions

Detailed Description

template<typename T, typename Comparator = std::less<T>, typename Allocator = std::allocator<T>> class datasketches::req_sketch< T, Comparator, Allocator >

Member Typedef Documentation

◆ quantile_return_type

Constructor & Destructor Documentation

◆ req_sketch() [1/4]

◆ req_sketch() [2/4]

◆ req_sketch() [3/4]

◆ req_sketch() [4/4]

Member Function Documentation

◆ operator=() [1/2]

◆ operator=() [2/2]

◆ get_k()

◆ is_HRA()

◆ is_empty()

◆ get_n()

◆ get_num_retained()

◆ is_estimation_mode()

◆ update()

◆ merge()

◆ get_min_item()

◆ get_max_item()

◆ get_comparator()

◆ get_allocator()

◆ get_rank()

◆ get_PMF()

◆ get_CDF()

◆ get_quantile()

◆ get_rank_lower_bound()

◆ get_rank_upper_bound()

◆ get_RSE()

◆ get_serialized_size_bytes() [1/2]

◆ get_serialized_size_bytes() [2/2]

◆ serialize() [1/2]

◆ serialize() [2/2]

◆ deserialize() [1/2]

◆ deserialize() [2/2]

◆ to_string()

◆ begin()

◆ end()

◆ get_sorted_view()

template<typename T, typename Comparator = std::less<T>, typename Allocator = std::allocator<T>>
class datasketches::req_sketch< T, Comparator, Allocator >