|
| frequent_items_sketch (uint8_t lg_max_map_size, uint8_t lg_start_map_size=LG_MIN_MAP_SIZE, const E &equal=E(), const A &allocator=A()) |
| Construct this sketch with parameters lg_max_map_size and lg_start_map_size. More...
|
|
void | update (const T &item, W weight=1) |
| Update this sketch with an item and a positive weight (frequency count). More...
|
|
void | update (T &&item, W weight=1) |
| Update this sketch with an item and a positive weight (frequency count). More...
|
|
void | merge (const frequent_items_sketch &other) |
| This function merges the other sketch into this one. More...
|
|
void | merge (frequent_items_sketch &&other) |
| This function merges the other sketch into this one. More...
|
|
bool | is_empty () const |
|
uint32_t | get_num_active_items () const |
|
W | get_total_weight () const |
| Returns the sum of the weights (frequencies) in the stream seen so far by the sketch. More...
|
|
W | get_estimate (const T &item) const |
| Returns the estimate of the weight (frequency) of the given item. More...
|
|
W | get_lower_bound (const T &item) const |
| Returns the guaranteed lower bound weight (frequency) of the given item. More...
|
|
W | get_upper_bound (const T &item) const |
| Returns the guaranteed upper bound weight (frequency) of the given item. More...
|
|
W | get_maximum_error () const |
|
double | get_epsilon () const |
| Returns epsilon value of this sketch. More...
|
|
vector_row | get_frequent_items (frequent_items_error_type err_type) const |
| Returns an array of rows that include frequent items, estimates, upper and lower bounds given an error_type and using get_maximum_error() as a threshold. More...
|
|
vector_row | get_frequent_items (frequent_items_error_type err_type, W threshold) const |
| Returns an array of rows that include frequent items, estimates, upper and lower bounds given an error_type and a threshold. More...
|
|
template<typename SerDe = serde<T>> |
size_t | get_serialized_size_bytes (const SerDe &sd=SerDe()) const |
| Computes size needed to serialize the current state of the sketch. More...
|
|
template<typename SerDe = serde<T>> |
void | serialize (std::ostream &os, const SerDe &sd=SerDe()) const |
| This method serializes the sketch into a given stream in a binary form. More...
|
|
template<typename SerDe = serde<T>> |
vector_bytes | serialize (unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const |
| This method serializes the sketch as a vector of bytes. More...
|
|
string< A > | to_string (bool print_items=false) const |
| Returns a human readable summary of this sketch. More...
|
|
|
static double | get_epsilon (uint8_t lg_max_map_size) |
| Returns epsilon used to compute a priori error. More...
|
|
static double | get_apriori_error (uint8_t lg_max_map_size, W estimated_total_weight) |
| Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight. More...
|
|
template<typename SerDe = serde<T>> |
static frequent_items_sketch | deserialize (std::istream &is, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A()) |
| This method deserializes a sketch from a given stream. More...
|
|
template<typename SerDe = serde<T>> |
static frequent_items_sketch | deserialize (const void *bytes, size_t size, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A()) |
| This method deserializes a sketch from a given array of bytes. More...
|
|
template<typename T, typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename A = std::allocator<T>>
class datasketches::frequent_items_sketch< T, W, H, E, A >
Frequent Items sketch.
Based on Java implementation here: https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java
- Author
- Alexander Saydakov
Returns an array of rows that include frequent items, estimates, upper and lower bounds given an error_type and using get_maximum_error() as a threshold.
The method first examines all active items in the sketch (items that have a counter).
If error_type = NO_FALSE_NEGATIVES, this will include an item in the result list if get_upper_bound(item) > threshold. There will be no false negatives, i.e., no Type II error. There may be items in the set with true frequencies less than the threshold (false positives).
If error_type = NO_FALSE_POSITIVES, this will include an item in the result list if get_lower_bound(item) > threshold. There will be no false positives, i.e., no Type I error. There may be items omitted from the set with true frequencies greater than the threshold (false negatives).
- Parameters
-
err_type | determines whether no false positives or no false negatives are desired. |
- Returns
- an array of frequent items
Returns an array of rows that include frequent items, estimates, upper and lower bounds given an error_type and a threshold.
The method first examines all active items in the sketch (items that have a counter).
If error_type = NO_FALSE_NEGATIVES, this will include an item in the result list if get_upper_bound(item) > threshold. There will be no false negatives, i.e., no Type II error. There may be items in the set with true frequencies less than the threshold (false positives).
If error_type = NO_FALSE_POSITIVES, this will include an item in the result list if get_lower_bound(item) > threshold. There will be no false positives, i.e., no Type I error. There may be items omitted from the set with true frequencies greater than the threshold (false negatives).
- Parameters
-
err_type | determines whether no false positives or no false negatives are desired. |
threshold | to include items in the result list |
- Returns
- an array of frequent items