datasketches-cpp
Loading...
Searching...
No Matches
array_tuple_sketch_impl.hpp
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20namespace datasketches {
21
22template<typename Array, typename Policy, typename Allocator>
23update_array_tuple_sketch<Array, Policy, Allocator>::update_array_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
24 float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator):
25Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {}
26
27template<typename Array, typename Policy, typename Allocator>
31
32template<typename Array, typename Policy, typename Allocator>
35}
36
37// builder
38
39template<typename Array, typename Policy, typename Allocator>
40update_array_tuple_sketch<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator):
41tuple_base_builder<builder, Policy, Allocator>(policy, allocator) {}
42
43template<typename Array, typename Policy, typename Allocator>
45 return update_array_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
46}
47
48// compact sketch
49
50template<typename Array, typename Allocator>
51template<typename S>
53Base(other, ordered), num_values_(other.get_num_values()) {}
54
55template<typename Array, typename Allocator>
57 uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values):
58Base(is_empty, is_ordered, seed_hash, theta, std::move(entries)), num_values_(num_values) {}
59
60template<typename Array, typename Allocator>
62Base(std::move(base)), num_values_(num_values) {}
63
64template<typename Array, typename Allocator>
66 return num_values_;
67}
68
69template<typename Array, typename Allocator>
71 const uint8_t preamble_longs = 1;
72 write(os, preamble_longs);
73 const uint8_t serial_version = SERIAL_VERSION;
74 write(os, serial_version);
75 const uint8_t family = SKETCH_FAMILY;
76 write(os, family);
77 const uint8_t type = SKETCH_TYPE;
78 write(os, type);
79 const uint8_t flags_byte(
80 (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
81 (this->get_num_retained() > 0 ? 1 << flags::HAS_ENTRIES : 0) |
82 (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
83 );
84 write(os, flags_byte);
85 write(os, num_values_);
86 const uint16_t seed_hash = this->get_seed_hash();
87 write(os, seed_hash);
88 write(os, this->theta_);
89 if (this->get_num_retained() > 0) {
90 const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
91 write(os, num_entries);
92 const uint32_t unused32 = 0;
93 write(os, unused32);
94 for (const auto& it: this->entries_) {
95 write(os, it.first);
96 }
97 for (const auto& it: this->entries_) {
98 write(os, it.second.data(), it.second.size() * sizeof(typename Array::value_type));
99 }
100 }
101}
102
103template<typename Array, typename Allocator>
104auto compact_array_tuple_sketch<Array, Allocator>::serialize(unsigned header_size_bytes) const -> vector_bytes {
105 const uint8_t preamble_longs = 1;
106 const size_t size = header_size_bytes + 16 // preamble and theta
107 + (this->entries_.size() > 0 ? 8 : 0)
108 + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values_) * this->entries_.size();
109 vector_bytes bytes(size, 0, this->entries_.get_allocator());
110 uint8_t* ptr = bytes.data() + header_size_bytes;
111
112 ptr += copy_to_mem(preamble_longs, ptr);
113 const uint8_t serial_version = SERIAL_VERSION;
114 ptr += copy_to_mem(serial_version, ptr);
115 const uint8_t family = SKETCH_FAMILY;
116 ptr += copy_to_mem(family, ptr);
117 const uint8_t type = SKETCH_TYPE;
118 ptr += copy_to_mem(type, ptr);
119 const uint8_t flags_byte(
120 (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
121 (this->get_num_retained() ? 1 << flags::HAS_ENTRIES : 0) |
122 (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
123 );
124 ptr += copy_to_mem(flags_byte, ptr);
125 ptr += copy_to_mem(num_values_, ptr);
126 const uint16_t seed_hash = this->get_seed_hash();
127 ptr += copy_to_mem(seed_hash, ptr);
128 ptr += copy_to_mem((this->theta_), ptr);
129 if (this->get_num_retained() > 0) {
130 const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
131 ptr += copy_to_mem(num_entries, ptr);
132 ptr += sizeof(uint32_t); // unused
133 for (const auto& it: this->entries_) {
134 ptr += copy_to_mem(it.first, ptr);
135 }
136 for (const auto& it: this->entries_) {
137 ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(typename Array::value_type));
138 }
139 }
140 return bytes;
141}
142
143template<typename Array, typename Allocator>
145 read<uint8_t>(is); // unused
146 const auto serial_version = read<uint8_t>(is);
147 const auto family = read<uint8_t>(is);
148 const auto type = read<uint8_t>(is);
149 const auto flags_byte = read<uint8_t>(is);
150 const auto num_values = read<uint8_t>(is);
151 const auto seed_hash = read<uint16_t>(is);
152 checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
153 checker<true>::check_sketch_family(family, SKETCH_FAMILY);
154 checker<true>::check_sketch_type(type, SKETCH_TYPE);
155 const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
156 if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
157
158 const auto theta = read<uint64_t>(is);
159 std::vector<Entry, AllocEntry> entries(allocator);
160 if (has_entries) {
161 const auto num_entries = read<uint32_t>(is);
162 read<uint32_t>(is); // unused
163 entries.reserve(num_entries);
164 std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
165 read(is, keys.data(), num_entries * sizeof(uint64_t));
166 for (size_t i = 0; i < num_entries; ++i) {
167 Array summary(num_values, 0, allocator);
168 read(is, summary.data(), num_values * sizeof(typename Array::value_type));
169 entries.push_back(Entry(keys[i], std::move(summary)));
170 }
171 }
172 if (!is.good()) throw std::runtime_error("error reading from std::istream");
173 const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
174 const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
175 return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
176}
177
178template<typename Array, typename Allocator>
179compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(const void* bytes, size_t size, uint64_t seed, const Allocator& allocator) {
180 ensure_minimum_memory(size, 16);
181 const char* ptr = static_cast<const char*>(bytes);
182 ptr += sizeof(uint8_t); // unused
183 uint8_t serial_version;
184 ptr += copy_from_mem(ptr, serial_version);
185 uint8_t family;
186 ptr += copy_from_mem(ptr, family);
187 uint8_t type;
188 ptr += copy_from_mem(ptr, type);
189 uint8_t flags_byte;
190 ptr += copy_from_mem(ptr, flags_byte);
191 uint8_t num_values;
192 ptr += copy_from_mem(ptr, num_values);
193 uint16_t seed_hash;
194 ptr += copy_from_mem(ptr, seed_hash);
195 checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
196 checker<true>::check_sketch_family(family, SKETCH_FAMILY);
197 checker<true>::check_sketch_type(type, SKETCH_TYPE);
198 const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
199 if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
200
201 uint64_t theta;
202 ptr += copy_from_mem(ptr, theta);
203 std::vector<Entry, AllocEntry> entries(allocator);
204 if (has_entries) {
205 ensure_minimum_memory(size, 24);
206 uint32_t num_entries;
207 ptr += copy_from_mem(ptr, num_entries);
208 ptr += sizeof(uint32_t); // unused
209 ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values) * num_entries);
210 entries.reserve(num_entries);
211 std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
212 ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * num_entries);
213 for (size_t i = 0; i < num_entries; ++i) {
214 Array summary(num_values, 0, allocator);
215 ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(typename Array::value_type));
216 entries.push_back(Entry(keys[i], std::move(summary)));
217 }
218 }
219 const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
220 const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
221 return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
222}
223
224} /* namespace datasketches */
Compact array tuple sketch.
Definition array_tuple_sketch.hpp:163
uint8_t get_num_values() const
Definition array_tuple_sketch_impl.hpp:65
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition array_tuple_sketch_impl.hpp:70
compact_array_tuple_sketch(const Sketch &other, bool ordered=true)
Copy constructor.
static compact_array_tuple_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
Definition array_tuple_sketch_impl.hpp:144
Tuple base builder.
Definition tuple_sketch.hpp:614
Update array tuple sketch builder.
Definition array_tuple_sketch.hpp:145
update_array_tuple_sketch build() const
Definition array_tuple_sketch_impl.hpp:44
Update array tuple sketch.
Definition array_tuple_sketch.hpp:125
uint8_t get_num_values() const
Definition array_tuple_sketch_impl.hpp:28
Update Tuple sketch.
Definition tuple_sketch.hpp:217
virtual uint32_t get_num_retained() const
Definition tuple_sketch_impl.hpp:125
virtual bool is_empty() const
Definition tuple_sketch_impl.hpp:110
virtual bool is_ordered() const
Definition tuple_sketch_impl.hpp:115
virtual uint16_t get_seed_hash() const
Definition tuple_sketch_impl.hpp:130
DataSketches namespace.
Definition binomial_bounds.hpp:38