datasketches-cpp
array_tuple_sketch_impl.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 namespace datasketches {
21 
22 template<typename Array, typename Policy, typename Allocator>
23 update_array_tuple_sketch<Array, Policy, Allocator>::update_array_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
24  float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator):
25 Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {}
26 
27 template<typename Array, typename Policy, typename Allocator>
29  return this->policy_.get_num_values();
30 }
31 
32 template<typename Array, typename Policy, typename Allocator>
34  return compact_array_tuple_sketch<Array, Allocator>(*this, ordered);
35 }
36 
37 // builder
38 
39 template<typename Array, typename Policy, typename Allocator>
40 update_array_tuple_sketch<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator):
41 tuple_base_builder<builder, Policy, Allocator>(policy, allocator) {}
42 
43 template<typename Array, typename Policy, typename Allocator>
45  return update_array_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
46 }
47 
48 // compact sketch
49 
50 template<typename Array, typename Allocator>
51 template<typename S>
53 Base(other, ordered), num_values_(other.get_num_values()) {}
54 
55 template<typename Array, typename Allocator>
57  uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values):
58 Base(is_empty, is_ordered, seed_hash, theta, std::move(entries)), num_values_(num_values) {}
59 
60 template<typename Array, typename Allocator>
62 Base(std::move(base)), num_values_(num_values) {}
63 
64 template<typename Array, typename Allocator>
66  return num_values_;
67 }
68 
69 template<typename Array, typename Allocator>
71  const uint8_t preamble_longs = 1;
72  write(os, preamble_longs);
73  const uint8_t serial_version = SERIAL_VERSION;
74  write(os, serial_version);
75  const uint8_t family = SKETCH_FAMILY;
76  write(os, family);
77  const uint8_t type = SKETCH_TYPE;
78  write(os, type);
79  const uint8_t flags_byte(
80  (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
81  (this->get_num_retained() > 0 ? 1 << flags::HAS_ENTRIES : 0) |
82  (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
83  );
84  write(os, flags_byte);
85  write(os, num_values_);
86  const uint16_t seed_hash = this->get_seed_hash();
87  write(os, seed_hash);
88  write(os, this->theta_);
89  if (this->get_num_retained() > 0) {
90  const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
91  write(os, num_entries);
92  const uint32_t unused32 = 0;
93  write(os, unused32);
94  for (const auto& it: this->entries_) {
95  write(os, it.first);
96  }
97  for (const auto& it: this->entries_) {
98  write(os, it.second.data(), it.second.size() * sizeof(typename Array::value_type));
99  }
100  }
101 }
102 
103 template<typename Array, typename Allocator>
104 auto compact_array_tuple_sketch<Array, Allocator>::serialize(unsigned header_size_bytes) const -> vector_bytes {
105  const uint8_t preamble_longs = 1;
106  const size_t size = header_size_bytes + 16 // preamble and theta
107  + (this->entries_.size() > 0 ? 8 : 0)
108  + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values_) * this->entries_.size();
109  vector_bytes bytes(size, 0, this->entries_.get_allocator());
110  uint8_t* ptr = bytes.data() + header_size_bytes;
111 
112  ptr += copy_to_mem(preamble_longs, ptr);
113  const uint8_t serial_version = SERIAL_VERSION;
114  ptr += copy_to_mem(serial_version, ptr);
115  const uint8_t family = SKETCH_FAMILY;
116  ptr += copy_to_mem(family, ptr);
117  const uint8_t type = SKETCH_TYPE;
118  ptr += copy_to_mem(type, ptr);
119  const uint8_t flags_byte(
120  (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
121  (this->get_num_retained() ? 1 << flags::HAS_ENTRIES : 0) |
122  (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
123  );
124  ptr += copy_to_mem(flags_byte, ptr);
125  ptr += copy_to_mem(num_values_, ptr);
126  const uint16_t seed_hash = this->get_seed_hash();
127  ptr += copy_to_mem(seed_hash, ptr);
128  ptr += copy_to_mem((this->theta_), ptr);
129  if (this->get_num_retained() > 0) {
130  const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
131  ptr += copy_to_mem(num_entries, ptr);
132  ptr += sizeof(uint32_t); // unused
133  for (const auto& it: this->entries_) {
134  ptr += copy_to_mem(it.first, ptr);
135  }
136  for (const auto& it: this->entries_) {
137  ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(typename Array::value_type));
138  }
139  }
140  return bytes;
141 }
142 
143 template<typename Array, typename Allocator>
145  read<uint8_t>(is); // unused
146  const auto serial_version = read<uint8_t>(is);
147  const auto family = read<uint8_t>(is);
148  const auto type = read<uint8_t>(is);
149  const auto flags_byte = read<uint8_t>(is);
150  const auto num_values = read<uint8_t>(is);
151  const auto seed_hash = read<uint16_t>(is);
152  checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
153  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
154  checker<true>::check_sketch_type(type, SKETCH_TYPE);
155  const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
156  if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
157 
158  const auto theta = read<uint64_t>(is);
159  std::vector<Entry, AllocEntry> entries(allocator);
160  if (has_entries) {
161  const auto num_entries = read<uint32_t>(is);
162  read<uint32_t>(is); // unused
163  entries.reserve(num_entries);
164  std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
165  read(is, keys.data(), num_entries * sizeof(uint64_t));
166  for (size_t i = 0; i < num_entries; ++i) {
167  Array summary(num_values, 0, allocator);
168  read(is, summary.data(), num_values * sizeof(typename Array::value_type));
169  entries.push_back(Entry(keys[i], std::move(summary)));
170  }
171  }
172  if (!is.good()) throw std::runtime_error("error reading from std::istream");
173  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
174  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
175  return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
176 }
177 
178 template<typename Array, typename Allocator>
179 compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(const void* bytes, size_t size, uint64_t seed, const Allocator& allocator) {
180  ensure_minimum_memory(size, 16);
181  const char* ptr = static_cast<const char*>(bytes);
182  ptr += sizeof(uint8_t); // unused
183  uint8_t serial_version;
184  ptr += copy_from_mem(ptr, serial_version);
185  uint8_t family;
186  ptr += copy_from_mem(ptr, family);
187  uint8_t type;
188  ptr += copy_from_mem(ptr, type);
189  uint8_t flags_byte;
190  ptr += copy_from_mem(ptr, flags_byte);
191  uint8_t num_values;
192  ptr += copy_from_mem(ptr, num_values);
193  uint16_t seed_hash;
194  ptr += copy_from_mem(ptr, seed_hash);
195  checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
196  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
197  checker<true>::check_sketch_type(type, SKETCH_TYPE);
198  const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
199  if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
200 
201  uint64_t theta;
202  ptr += copy_from_mem(ptr, theta);
203  std::vector<Entry, AllocEntry> entries(allocator);
204  if (has_entries) {
205  ensure_minimum_memory(size, 24);
206  uint32_t num_entries;
207  ptr += copy_from_mem(ptr, num_entries);
208  ptr += sizeof(uint32_t); // unused
209  ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values) * num_entries);
210  entries.reserve(num_entries);
211  std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
212  ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * num_entries);
213  for (size_t i = 0; i < num_entries; ++i) {
214  Array summary(num_values, 0, allocator);
215  ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(typename Array::value_type));
216  entries.push_back(Entry(keys[i], std::move(summary)));
217  }
218  }
219  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
220  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
221  return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
222 }
223 
224 } /* namespace datasketches */
Compact array tuple sketch.
Definition: array_tuple_sketch.hpp:163
uint8_t get_num_values() const
Definition: array_tuple_sketch_impl.hpp:65
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition: array_tuple_sketch_impl.hpp:70
compact_array_tuple_sketch(const Sketch &other, bool ordered=true)
Copy constructor.
static compact_array_tuple_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
Definition: array_tuple_sketch_impl.hpp:144
Tuple base builder.
Definition: tuple_sketch.hpp:614
Update array tuple sketch builder.
Definition: array_tuple_sketch.hpp:145
update_array_tuple_sketch build() const
Definition: array_tuple_sketch_impl.hpp:44
Update array tuple sketch.
Definition: array_tuple_sketch.hpp:125
uint8_t get_num_values() const
Definition: array_tuple_sketch_impl.hpp:28
Update Tuple sketch.
Definition: tuple_sketch.hpp:217
DataSketches namespace.
Definition: binomial_bounds.hpp:38