196 const uint16_t unused = 0;
203 write(os, num_retained_);
207 size_t pt_size =
sizeof(T) * dim_;
208 for (
const Level& lvl : levels_) {
209 const uint32_t level_size =
static_cast<uint32_t
>(lvl.size());
210 write(os, level_size);
211 for (
const Vector& pt : lvl) {
212 write(os, pt.data(), pt_size);
217template<
typename T,
typename K,
typename A>
219 const uint8_t preamble_ints = (is_empty() ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_LONG);
222 size_t size = header_size_bytes + preamble_ints *
sizeof(uint32_t);
224 for (
const Level& lvl : levels_)
225 size +=
sizeof(uint32_t) + (lvl.size() * dim_ *
sizeof(T));
227 vector_bytes bytes(size, 0, levels_.get_allocator());
228 uint8_t* ptr = bytes.data() + header_size_bytes;
229 const uint8_t* end_ptr = ptr + size;
231 ptr += copy_to_mem(preamble_ints, ptr);
232 const uint8_t ser_ver = SERIAL_VERSION;
233 ptr += copy_to_mem(ser_ver, ptr);
234 const uint8_t family = FAMILY_ID;
235 ptr += copy_to_mem(family, ptr);
238 const uint8_t flags_byte = (is_empty() ? 1 << flags::IS_EMPTY : 0);
239 ptr += copy_to_mem(flags_byte, ptr);
240 ptr += copy_to_mem(k_, ptr);
241 ptr +=
sizeof(uint16_t);
242 ptr += copy_to_mem(dim_, ptr);
247 ptr += copy_to_mem(num_retained_, ptr);
248 ptr += copy_to_mem(n_, ptr);
251 size_t pt_size =
sizeof(T) * dim_;
252 for (
const Level& lvl : levels_) {
253 ptr += copy_to_mem(
static_cast<uint32_t
>(lvl.size()), ptr);
254 for (
const Vector& pt : lvl) {
255 ptr += copy_to_mem(pt.data(), ptr, pt_size);
260 throw std::runtime_error(
"Actual output size does not equal expected output size");
265template<
typename T,
typename K,
typename A>
267 const auto preamble_ints = read<uint8_t>(is);
268 const auto serial_version = read<uint8_t>(is);
269 const auto family_id = read<uint8_t>(is);
270 const auto flags_byte = read<uint8_t>(is);
271 const auto k = read<uint16_t>(is);
273 const auto dim = read<uint32_t>(is);
276 check_serial_version(serial_version);
277 check_family_id(family_id);
278 check_header_validity(preamble_ints, flags_byte, serial_version);
280 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
281 const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
286 const auto num_retained = read<uint32_t>(is);
287 const auto n = read<uint64_t>(is);
290 size_t pt_size =
sizeof(T) * dim;
291 Levels levels(allocator);
292 int64_t num_to_read = num_retained;
293 while (num_to_read > 0) {
294 const auto level_size = read<uint32_t>(is);
295 Level lvl(allocator);
296 lvl.reserve(level_size);
297 for (uint32_t i = 0; i < level_size; ++i) {
298 Vector pt(dim, 0, allocator);
299 read(is, pt.data(), pt_size);
302 levels.push_back(lvl);
303 num_to_read -= lvl.size();
306 if (num_to_read != 0)
307 throw std::runtime_error(
"Error deserializing sketch: Incorrect number of items read");
308 if (!is.good())
throw std::runtime_error(
"error reading from std::istream");
310 return density_sketch(k, dim, num_retained, n, std::move(levels), kernel);
313template<
typename T,
typename K,
typename A>
314density_sketch<T, K, A> density_sketch<T, K, A>::deserialize(
const void* bytes,
size_t size,
const K& kernel,
const A& allocator) {
315 ensure_minimum_memory(size, PREAMBLE_INTS_SHORT *
sizeof(uint32_t));
316 const char* ptr =
static_cast<const char*
>(bytes);
317 const char* end_ptr =
static_cast<const char*
>(bytes) + size;
318 uint8_t preamble_ints;
319 ptr += copy_from_mem(ptr, preamble_ints);
320 uint8_t serial_version;
321 ptr += copy_from_mem(ptr, serial_version);
323 ptr += copy_from_mem(ptr, family_id);
325 ptr += copy_from_mem(ptr, flags_byte);
327 ptr += copy_from_mem(ptr, k);
329 ptr += copy_from_mem(ptr, unused);
331 ptr += copy_from_mem(ptr, dim);
334 check_serial_version(serial_version);
335 check_family_id(family_id);
336 check_header_validity(preamble_ints, flags_byte, serial_version);
338 const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
340 return density_sketch(k, dim, kernel, allocator);
343 ensure_minimum_memory(size, PREAMBLE_INTS_LONG *
sizeof(uint32_t));
344 uint32_t num_retained;
345 ptr += copy_from_mem(ptr, num_retained);
347 ptr += copy_from_mem(ptr, n);
352 size_t pt_size =
sizeof(T) * dim;
353 ensure_minimum_memory(end_ptr - ptr, num_retained * pt_size);
356 Levels levels(allocator);
357 int64_t num_to_read = num_retained;
358 while (num_to_read > 0) {
360 ptr += copy_from_mem(ptr, level_size);
361 ensure_minimum_memory(end_ptr - ptr, level_size * pt_size);
362 Level lvl(allocator);
363 lvl.reserve(level_size);
364 for (uint32_t i = 0; i < level_size; ++i) {
365 Vector pt(dim, 0, allocator);
366 ptr += copy_from_mem(ptr, pt.data(), pt_size);
369 levels.push_back(lvl);
370 num_to_read -= lvl.size();
373 if (num_to_read != 0)
374 throw std::runtime_error(
"Error deserializing sketch: Incorrect number of items read");
375 if (ptr > end_ptr)
throw std::runtime_error(
"Error deserializing sketch: Read beyond provided memory");
377 return density_sketch(k, dim, num_retained, n, std::move(levels), kernel);
380template<
typename T,
typename K,
typename A>
381void density_sketch<T, K, A>::check_k(uint16_t k) {
383 throw std::invalid_argument(
"k must be > 1. Found: " + std::to_string(k));
386template<
typename T,
typename K,
typename A>
387void density_sketch<T, K, A>::check_serial_version(uint8_t serial_version) {
388 if (serial_version == SERIAL_VERSION)
391 throw std::invalid_argument(
"Possible corruption. Unrecognized serialization version: " + std::to_string(serial_version));
394template<
typename T,
typename K,
typename A>
395void density_sketch<T, K, A>::check_family_id(uint8_t family_id) {
396 if (family_id == FAMILY_ID)
399 throw std::invalid_argument(
"Possible corruption. Family id does not indicate density sketch: " + std::to_string(family_id));
402template<
typename T,
typename K,
typename A>
403void density_sketch<T, K, A>::check_header_validity(uint8_t preamble_ints, uint8_t flags_byte, uint8_t serial_version) {
404 const bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
406 if ((empty && preamble_ints == PREAMBLE_INTS_SHORT)
407 || (!empty && preamble_ints == PREAMBLE_INTS_LONG))
410 std::ostringstream os;
411 os <<
"Possible sketch corruption. Inconsistent state: "
412 <<
"preamble_ints = " << preamble_ints
413 <<
", empty = " << (empty ?
"true" :
"false")
414 <<
", serialization_version = " << serial_version;
415 throw std::invalid_argument(os.str());
419template<
typename T,
typename K,
typename A>
423 std::ostringstream os;
424 os <<
"### Density sketch summary:" << std::endl;
425 os <<
" K : " << k_ << std::endl;
426 os <<
" Dim : " << dim_ << std::endl;
427 os <<
" Empty : " << (is_empty() ?
"true" :
"false") << std::endl;
428 os <<
" N : " << n_ << std::endl;
429 os <<
" Retained items : " << num_retained_ << std::endl;
430 os <<
" Estimation mode: " << (is_estimation_mode() ?
"true" :
"false") << std::endl;
431 os <<
" Levels : " << levels_.size() << std::endl;
432 os <<
"### End sketch summary" << std::endl;
435 os <<
"### Density sketch levels:" << std::endl;
436 os <<
" height: size" << std::endl;
437 for (
unsigned height = 0; height < levels_.size(); ++height) {
438 os <<
" " << height <<
": "
439 << levels_[height].size() << std::endl;
441 os <<
"### End sketch levels" << std::endl;
445 os <<
"### Density sketch data:" << std::endl;
446 for (
unsigned height = 0; height < levels_.size(); ++height) {
447 os <<
" level " << height <<
": " << std::endl;
448 for (
const auto& point: levels_[height]) {
451 for (
auto value: point) {
459 os <<
"]" << std::endl;
462 os <<
"### End sketch data" << std::endl;
464 return string<A>(os.str().c_str(), levels_.get_allocator());
467template<
typename T,
typename K,
typename A>
469 return const_iterator(levels_.begin(), levels_.end());
472template<
typename T,
typename K,
typename A>
474 return const_iterator(levels_.end(), levels_.end());