datasketches-cpp
array_tuple_sketch.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef ARRAY_TUPLE_SKETCH_HPP_
21 #define ARRAY_TUPLE_SKETCH_HPP_
22 
23 #include <vector>
24 #include <memory>
25 
26 #include "serde.hpp"
27 #include "tuple_sketch.hpp"
28 
29 namespace datasketches {
30 
31 // This simple array is faster than std::vector and should be sufficient for this application
32 template<typename T, typename Allocator = std::allocator<T>>
33 class array {
34 public:
35  using value_type = T;
36  using allocator_type = Allocator;
37 
38  explicit array(uint8_t size, T value, const Allocator& allocator = Allocator()):
39  allocator_(allocator), size_(size), array_(allocator_.allocate(size_)) {
40  std::fill(array_, array_ + size_, value);
41  }
42  array(const array& other):
43  allocator_(other.allocator_),
44  size_(other.size_),
45  array_(allocator_.allocate(size_))
46  {
47  std::copy(other.array_, other.array_ + size_, array_);
48  }
49  array(array&& other) noexcept:
50  allocator_(std::move(other.allocator_)),
51  size_(other.size_),
52  array_(other.array_)
53  {
54  other.array_ = nullptr;
55  }
56  ~array() {
57  if (array_ != nullptr) allocator_.deallocate(array_, size_);
58  }
59  array& operator=(const array& other) {
60  array copy(other);
61  std::swap(allocator_, copy.allocator_);
62  std::swap(size_, copy.size_);
63  std::swap(array_, copy.array_);
64  return *this;
65  }
66  array& operator=(array&& other) {
67  std::swap(allocator_, other.allocator_);
68  std::swap(size_, other.size_);
69  std::swap(array_, other.array_);
70  return *this;
71  }
72  T& operator[](size_t index) { return array_[index]; }
73  T operator[](size_t index) const { return array_[index]; }
74  uint8_t size() const { return size_; }
75  T* data() { return array_; }
76  const T* data() const { return array_; }
77  bool operator==(const array& other) const {
78  for (uint8_t i = 0; i < size_; ++i) if (array_[i] != other.array_[i]) return false;
79  return true;
80  }
81 private:
82  Allocator allocator_;
83  uint8_t size_;
84  T* array_;
85 };
86 
88 template<typename Array, typename Allocator = typename Array::allocator_type>
90 public:
91  default_array_tuple_update_policy(uint8_t num_values = 1, const Allocator& allocator = Allocator()):
92  allocator_(allocator), num_values_(num_values) {}
93  Array create() const {
94  return Array(num_values_, 0, allocator_);
95  }
96  template<typename InputArray> // to allow any type with indexed access (such as double* or std::vector)
97  void update(Array& array, const InputArray& update) const {
98  for (uint8_t i = 0; i < num_values_; ++i) array[i] += update[i];
99  }
100  uint8_t get_num_values() const {
101  return num_values_;
102  }
103 
104 private:
105  Allocator allocator_;
106  uint8_t num_values_;
107 };
108 
109 // forward declaration
110 template<typename Array, typename Allocator> class compact_array_tuple_sketch;
111 
120 template<
121  typename Array,
123  typename Allocator = typename Array::allocator_type
124 >
125 class update_array_tuple_sketch: public update_tuple_sketch<Array, Array, Policy, Allocator> {
126 public:
128  using resize_factor = typename Base::resize_factor;
129 
130  class builder;
131 
132  compact_array_tuple_sketch<Array, Allocator> compact(bool ordered = true) const;
133 
135  uint8_t get_num_values() const;
136 
137 private:
138  // for builder
139  update_array_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta,
140  uint64_t seed, const Policy& policy, const Allocator& allocator);
141 };
142 
144 template<typename Array, typename Policy, typename Allocator>
145 class update_array_tuple_sketch<Array, Policy, Allocator>::builder: public tuple_base_builder<builder, Policy, Allocator> {
146 public:
152  builder(const Policy& policy = Policy(), const Allocator& allocator = Allocator());
153 
155  update_array_tuple_sketch build() const;
156 };
157 
159 template<
160  typename Array,
161  typename Allocator = typename Array::allocator_type
162 >
163 class compact_array_tuple_sketch: public compact_tuple_sketch<Array, Allocator> {
164 public:
166  using Entry = typename Base::Entry;
167  using AllocEntry = typename Base::AllocEntry;
168  using AllocU64 = typename Base::AllocU64;
169  using vector_bytes = typename Base::vector_bytes;
170 
171  static const uint8_t SERIAL_VERSION = 1;
172  static const uint8_t SKETCH_FAMILY = 9;
173  static const uint8_t SKETCH_TYPE = 3;
174  enum flags { UNUSED1, UNUSED2, IS_EMPTY, HAS_ENTRIES, IS_ORDERED };
175 
182  template<typename Sketch>
183  compact_array_tuple_sketch(const Sketch& other, bool ordered = true);
184 
186  uint8_t get_num_values() const;
187 
192  void serialize(std::ostream& os) const;
193 
201  vector_bytes serialize(unsigned header_size_bytes = 0) const;
202 
210  static compact_array_tuple_sketch deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
211 
220  static compact_array_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED,
221  const Allocator& allocator = Allocator());
222 
223 private:
224  uint8_t num_values_;
225 
226  template<typename Ar, typename P, typename Al> friend class array_tuple_union;
227  template<typename Ar, typename P, typename Al> friend class array_tuple_intersection;
228  template<typename Ar, typename Al> friend class array_tuple_a_not_b;
229  compact_array_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values);
230  compact_array_tuple_sketch(uint8_t num_values, Base&& base);
231 };
232 
233 } /* namespace datasketches */
234 
235 #include "array_tuple_sketch_impl.hpp"
236 
237 #endif
array tuple A-not-B
Definition: array_tuple_a_not_b.hpp:33
array tuple intersection
Definition: array_tuple_intersection.hpp:37
array tuple union
Definition: array_tuple_union.hpp:54
Compact array tuple sketch.
Definition: array_tuple_sketch.hpp:163
uint8_t get_num_values() const
Definition: array_tuple_sketch_impl.hpp:65
void serialize(std::ostream &os) const
This method serializes the sketch into a given stream in a binary form.
Definition: array_tuple_sketch_impl.hpp:70
compact_array_tuple_sketch(const Sketch &other, bool ordered=true)
Copy constructor.
static compact_array_tuple_sketch deserialize(std::istream &is, uint64_t seed=DEFAULT_SEED, const Allocator &allocator=Allocator())
This method deserializes a sketch from a given stream.
Definition: array_tuple_sketch_impl.hpp:144
Compact Tuple sketch.
Definition: tuple_sketch.hpp:416
virtual bool is_empty() const
Definition: tuple_sketch_impl.hpp:329
virtual bool is_ordered() const
Definition: tuple_sketch_impl.hpp:334
default array tuple update policy
Definition: array_tuple_sketch.hpp:89
Tuple base builder.
Definition: tuple_sketch.hpp:614
Update array tuple sketch builder.
Definition: array_tuple_sketch.hpp:145
Update array tuple sketch.
Definition: array_tuple_sketch.hpp:125
uint8_t get_num_values() const
Definition: array_tuple_sketch_impl.hpp:28
Update Tuple sketch.
Definition: tuple_sketch.hpp:217
DataSketches namespace.
Definition: binomial_bounds.hpp:38