datasketches-cpp
Loading...
Searching...
No Matches
array_of_strings_sketch.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20#ifndef ARRAY_OF_STRINGS_SKETCH_HPP_
21#define ARRAY_OF_STRINGS_SKETCH_HPP_
22
23#include <memory>
24#include <string>
25
26#include "array_tuple_sketch.hpp"
27#include "xxhash64.h"
28
29namespace datasketches {
30
31using array_of_strings = array<std::string>;
32
33// default update policy for an array of strings
34class default_array_of_strings_update_policy {
35public:
36 default_array_of_strings_update_policy() = default;
37
38 array_of_strings create() const;
39
40 void update(array_of_strings& array, const array_of_strings& input) const;
41
42 void update(array_of_strings& array, const array_of_strings* input) const;
43};
44
55template<typename Allocator = std::allocator<array_of_strings>>
57 using summary_allocator = typename std::allocator_traits<Allocator>::template rebind_alloc<array_of_strings>;
58
59 explicit default_array_of_strings_serde(const Allocator& allocator = Allocator());
60
61 void serialize(std::ostream& os, const array_of_strings* items, unsigned num) const;
62 void deserialize(std::istream& is, array_of_strings* items, unsigned num) const;
63 size_t serialize(void* ptr, size_t capacity, const array_of_strings* items, unsigned num) const;
64 size_t deserialize(const void* ptr, size_t capacity, array_of_strings* items, unsigned num) const;
65 size_t size_of_item(const array_of_strings& item) const;
66
67private:
68 summary_allocator summary_allocator_;
69 static void check_num_nodes(uint8_t num_nodes);
70 static uint32_t compute_total_bytes(const array_of_strings& item);
71};
72
76uint64_t hash_array_of_strings_key(const array_of_strings& key);
77
92template<typename Allocator = std::allocator<array_of_strings>>
94 public compact_tuple_sketch<array_of_strings, Allocator> {
95public:
97 using vector_bytes = typename Base::vector_bytes;
98 using Base::serialize;
99
106 template<typename Sketch>
107 compact_array_of_strings_tuple_sketch(const Sketch& sketch, bool ordered = true);
108
117 template<typename SerDe = default_array_of_strings_serde<Allocator>>
118 static compact_array_of_strings_tuple_sketch deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED,
119 const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
120
130 template<typename SerDe = default_array_of_strings_serde<Allocator>>
131 static compact_array_of_strings_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED,
132 const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
133
134private:
136};
137
141template<typename Allocator = std::allocator<array_of_strings>,
142 typename Policy = default_array_of_strings_update_policy>
144 array_of_strings,
145 array_of_strings,
146 Policy,
147 Allocator
148>;
149
156template<typename Allocator = std::allocator<array_of_strings>, typename Policy = default_array_of_strings_update_policy>
158 const update_array_of_strings_tuple_sketch<Allocator, Policy>& sketch, bool ordered = true);
159
160} /* namespace datasketches */
161
162#include "array_of_strings_sketch_impl.hpp"
163
164#endif
Extended class of compact_tuple_sketch for array of strings.
Definition array_of_strings_sketch.hpp:94
static compact_array_of_strings_tuple_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const SerDe &sd=SerDe(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
static compact_array_of_strings_tuple_sketch deserialize(const void *bytes, size_t size, uint64_t seed=DEFAULT_SEED, const SerDe &sd=SerDe(), const Allocator &allocator=Allocator())
This method deserializes a sketch from a given array of bytes.
Compact Tuple sketch.
Definition tuple_sketch.hpp:457
void serialize(std::ostream &os, const SerDe &sd=SerDe()) const
This method serializes the sketch into a given stream in a binary form.
Definition tuple_sketch_impl.hpp:401
Update Tuple sketch.
Definition tuple_sketch.hpp:222
DataSketches namespace.
Definition binomial_bounds.hpp:38
compact_array_of_strings_tuple_sketch< Allocator > compact_array_of_strings_sketch(const update_array_of_strings_tuple_sketch< Allocator, Policy > &sketch, bool ordered=true)
Converts an array of strings tuple sketch to a compact sketch (ordered or unordered).
Definition array_of_strings_sketch_impl.hpp:67
uint64_t hash_array_of_strings_key(const array_of_strings &key)
Hashes an array of strings using ArrayOfStrings-compatible hashing.
Definition array_of_strings_sketch_impl.hpp:53
Serializer/deserializer for an array of strings.
Definition array_of_strings_sketch.hpp:56