datasketches-cpp
cpc_util.hpp
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef CPC_UTIL_HPP_
21 #define CPC_UTIL_HPP_
22 
23 #include <stdexcept>
24 
25 namespace datasketches {
26 
27 static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) {
28  if (y == 0) throw std::invalid_argument("divide_longs_rounding_up: bad argument");
29  const uint64_t quotient = x / y;
30  if (quotient * y == x) return (quotient);
31  else return quotient + 1;
32 }
33 
34 static inline uint8_t floor_log2_of_long(uint64_t x) {
35  if (x < 1) throw std::invalid_argument("floor_log2_of_long: bad argument");
36  uint8_t p = 0;
37  uint64_t y = 1;
38  while (true) {
39  if (y == x) return p;
40  if (y > x) return p - 1;
41  p += 1;
42  y <<= 1;
43  }
44 }
45 
46 // This place-holder code was inadequate because it caused
47 // the cost of the post-merge get_result() operation to be O(C)
48 // instead of O(K). It did have the advantage of being
49 // very simple and trustworthy during initial testing.
50 static inline uint64_t wegner_count_bits_set_in_matrix(const uint64_t* array, size_t length) {
51  uint64_t pattern = 0;
52  uint64_t count = 0;
53  // clock_t t0, t1;
54  // t0 = clock();
55  // Wegner's Bit-Counting Algorithm, CACM 3 (1960), p. 322.
56  for (uint64_t i = 0; i < length; i++) {
57  pattern = array[i];
58  while (pattern != 0) {
59  pattern &= (pattern - 1);
60  count++;
61  }
62  }
63  // t1 = clock();
64  // printf ("\n(Wegner CountBitsTime %.1f)\n", ((double) (t1 - t0)) / 1000.0);
65  // fflush (stdout);
66  return count;
67 }
68 
69 // Note: this is an adaptation of the Java code,
70 // which is apparently a variation of Figure 5-2 in "Hacker's Delight"
71 // by Henry S. Warren.
72 static inline uint32_t warren_bit_count(uint64_t i) {
73  i = i - ((i >> 1) & 0x5555555555555555ULL);
74  i = (i & 0x3333333333333333ULL) + ((i >> 2) & 0x3333333333333333ULL);
75  i = (i + (i >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
76  i = i + (i >> 8);
77  i = i + (i >> 16);
78  i = i + (i >> 32);
79  return i & 0x7f;
80 }
81 
82 static inline uint32_t warren_count_bits_set_in_matrix(const uint64_t* array, uint32_t length) {
83  uint32_t count = 0;
84  for (uint32_t i = 0; i < length; i++) {
85  count += warren_bit_count(array[i]);
86  }
87  return count;
88 }
89 
90 // This code is Figure 5-9 in "Hacker's Delight" by Henry S. Warren.
91 
92 #define DATASKETCHES_CSA(h, l, a, b, c) \
93  { \
94  uint64_t u = a ^ b; \
95  uint64_t v = c; \
96  h = (a & b) | (u & v); \
97  l = u ^ v; \
98  }
99 
100 static inline uint32_t count_bits_set_in_matrix(const uint64_t* a, uint32_t length) {
101  if ((length & 0x7) != 0) throw std::invalid_argument("the length of the array must be a multiple of 8");
102  uint32_t total = 0;
103  uint64_t ones, twos, twos_a, twos_b, fours, fours_a, fours_b, eights;
104  fours = twos = ones = 0;
105 
106  for (uint32_t i = 0; i <= length - 8; i += 8) {
107  DATASKETCHES_CSA(twos_a, ones, ones, a[i+0], a[i+1]);
108  DATASKETCHES_CSA(twos_b, ones, ones, a[i+2], a[i+3]);
109  DATASKETCHES_CSA(fours_a, twos, twos, twos_a, twos_b);
110 
111  DATASKETCHES_CSA(twos_a, ones, ones, a[i+4], a[i+5]);
112  DATASKETCHES_CSA(twos_b, ones, ones, a[i+6], a[i+7]);
113  DATASKETCHES_CSA(fours_b, twos, twos, twos_a, twos_b);
114 
115  DATASKETCHES_CSA(eights, fours, fours, fours_a, fours_b);
116 
117  total += warren_bit_count(eights);
118  }
119  total = 8 * total + 4 * warren_bit_count(fours) + 2 * warren_bit_count(twos) + warren_bit_count(ones);
120 
121  // Because I still don't fully trust this fancy version
122  // assert(total == wegner_count_bits_set_in_matrix(A, length));
123  //if (total != wegner_count_bits_set_in_matrix(a, length)) throw std::logic_error("count_bits_set_in_matrix error");
124 
125  return total;
126 }
127 
128 #undef DATASKETCHES_CSA
129 
130 // Here are some timings made with quickTestMerge.c
131 // for the "5 5" case:
132 
133 // Wegner CountBitsTime 29.3
134 // Warren CountBitsTime 5.3
135 // CSA CountBitsTime 4.3
136 
137 } /* namespace datasketches */
138 
139 #endif
DataSketches namespace.
Definition: binomial_bounds.hpp:38