datasketches-cpp
HllSketchImplFactory.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef _HLLSKETCHIMPLFACTORY_HPP_
21 #define _HLLSKETCHIMPLFACTORY_HPP_
22 
23 #include <stdexcept>
24 
25 #include "HllUtil.hpp"
26 #include "HllSketchImpl.hpp"
27 #include "CouponList.hpp"
28 #include "CouponHashSet.hpp"
29 #include "HllArray.hpp"
30 #include "Hll4Array.hpp"
31 #include "Hll6Array.hpp"
32 #include "Hll8Array.hpp"
33 
34 namespace datasketches {
35 
36 template<typename A>
37 class HllSketchImplFactory final {
38 public:
39  static HllSketchImpl<A>* deserialize(std::istream& os, const A& allocator);
40  static HllSketchImpl<A>* deserialize(const void* bytes, size_t len, const A& allocator);
41 
42  static CouponHashSet<A>* promoteListToSet(const CouponList<A>& list);
43  static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list);
44  static HllArray<A>* newHll(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
45 
46  // resets the input impl, deleting the input pointer and returning a new pointer
47  static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize);
48 
49  static Hll4Array<A>* convertToHll4(const HllArray<A>& srcHllArr);
50  static Hll6Array<A>* convertToHll6(const HllArray<A>& srcHllArr);
51  static Hll8Array<A>* convertToHll8(const HllArray<A>& srcHllArr);
52 };
53 
54 template<typename A>
55 CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) {
56  using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
57  CouponHashSet<A>* chSet = new (ChsAlloc(list.getAllocator()).allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType(), list.getAllocator());
58  for (const auto coupon: list) {
59  chSet->couponUpdate(coupon);
60  }
61  return chSet;
62 }
63 
64 template<typename A>
65 HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) {
66  HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType(), false, src.getAllocator());
67  tgtHllArr->putKxQ0(1 << src.getLgConfigK());
68  for (const auto coupon: src) {
69  tgtHllArr->couponUpdate(coupon);
70  }
71  tgtHllArr->putHipAccum(src.getEstimate());
72  tgtHllArr->putOutOfOrderFlag(false);
73  return tgtHllArr;
74 }
75 
76 template<typename A>
77 HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is, const A& allocator) {
78  // we'll hand off the sketch based on PreInts so we don't need
79  // to move the stream pointer back and forth -- perhaps somewhat fragile?
80  const uint8_t preInts = static_cast<uint8_t>(is.peek());
81  if (preInts == hll_constants::HLL_PREINTS) {
82  return HllArray<A>::newHll(is, allocator);
83  } else if (preInts == hll_constants::HASH_SET_PREINTS) {
84  return CouponHashSet<A>::newSet(is, allocator);
85  } else if (preInts == hll_constants::LIST_PREINTS) {
86  return CouponList<A>::newList(is, allocator);
87  } else {
88  throw std::invalid_argument("Attempt to deserialize unknown object type");
89  }
90 }
91 
92 template<typename A>
93 HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
94  // read current mode directly
95  const uint8_t preInts = static_cast<const uint8_t*>(bytes)[0];
96  if (preInts == hll_constants::HLL_PREINTS) {
97  return HllArray<A>::newHll(bytes, len, allocator);
98  } else if (preInts == hll_constants::HASH_SET_PREINTS) {
99  return CouponHashSet<A>::newSet(bytes, len, allocator);
100  } else if (preInts == hll_constants::LIST_PREINTS) {
101  return CouponList<A>::newList(bytes, len, allocator);
102  } else {
103  throw std::invalid_argument("Attempt to deserialize unknown object type");
104  }
105 }
106 
107 template<typename A>
108 HllArray<A>* HllSketchImplFactory<A>::newHll(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator) {
109  switch (tgtHllType) {
110  case HLL_8:
111  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
112  return new (Hll8Alloc(allocator).allocate(1)) Hll8Array<A>(lgConfigK, startFullSize, allocator);
113  case HLL_6:
114  using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
115  return new (Hll6Alloc(allocator).allocate(1)) Hll6Array<A>(lgConfigK, startFullSize, allocator);
116  case HLL_4:
117  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
118  return new (Hll4Alloc(allocator).allocate(1)) Hll4Array<A>(lgConfigK, startFullSize, allocator);
119  }
120  throw std::logic_error("Invalid target_hll_type");
121 }
122 
123 template<typename A>
124 HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool startFullSize) {
125  if (startFullSize) {
126  HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize, impl->getAllocator());
127  impl->get_deleter()(impl);
128  return hll;
129  } else {
130  using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
131  CouponList<A>* cl = new (ClAlloc(impl->getAllocator()).allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST, impl->getAllocator());
132  impl->get_deleter()(impl);
133  return cl;
134  }
135 }
136 
137 template<typename A>
138 Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
139  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
140  return new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1)) Hll4Array<A>(srcHllArr);
141 }
142 
143 template<typename A>
144 Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
145  using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
146  return new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1)) Hll6Array<A>(srcHllArr);
147 }
148 
149 template<typename A>
150 Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
151  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
152  return new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1)) Hll8Array<A>(srcHllArr);
153 }
154 
155 }
156 
157 #endif /* _HLLSKETCHIMPLFACTORY_HPP_ */
DataSketches namespace.
Definition: binomial_bounds.hpp:38
target_hll_type
Specifies the target type of HLL sketch to be created.
Definition: hll.hpp:72
@ HLL_6
6 bits per entry (fixed size)
Definition: hll.hpp:74
@ HLL_8
8 bits per entry (fastest, fixed size)
Definition: hll.hpp:75
@ HLL_4
4 bits per entry (most compact, size may vary)
Definition: hll.hpp:73