]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #pragma once | |
19 | ||
20 | #if defined(_MSC_VER) | |
21 | #include <intrin.h> // IWYU pragma: keep | |
22 | #include <nmmintrin.h> | |
23 | #pragma intrinsic(_BitScanReverse) | |
24 | #pragma intrinsic(_BitScanForward) | |
25 | #define ARROW_POPCOUNT64 __popcnt64 | |
26 | #define ARROW_POPCOUNT32 __popcnt | |
27 | #else | |
28 | #define ARROW_POPCOUNT64 __builtin_popcountll | |
29 | #define ARROW_POPCOUNT32 __builtin_popcount | |
30 | #endif | |
31 | ||
32 | #include <cstdint> | |
33 | #include <type_traits> | |
34 | ||
35 | #include "arrow/util/macros.h" | |
36 | #include "arrow/util/visibility.h" | |
37 | ||
38 | namespace arrow { | |
39 | namespace detail { | |
40 | ||
41 | template <typename Integer> | |
42 | typename std::make_unsigned<Integer>::type as_unsigned(Integer x) { | |
43 | return static_cast<typename std::make_unsigned<Integer>::type>(x); | |
44 | } | |
45 | ||
46 | } // namespace detail | |
47 | ||
48 | namespace BitUtil { | |
49 | ||
50 | // The number of set bits in a given unsigned byte value, pre-computed | |
51 | // | |
52 | // Generated with the following Python code | |
53 | // output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};' | |
54 | // popcounts = [str(bin(i).count('1')) for i in range(0, 256)] | |
55 | // print(output.format(', '.join(popcounts))) | |
56 | static constexpr uint8_t kBytePopcount[] = { | |
57 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, | |
58 | 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, | |
59 | 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, | |
60 | 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, | |
61 | 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, | |
62 | 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, | |
63 | 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, | |
64 | 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, | |
65 | 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; | |
66 | ||
67 | static inline uint64_t PopCount(uint64_t bitmap) { return ARROW_POPCOUNT64(bitmap); } | |
68 | static inline uint32_t PopCount(uint32_t bitmap) { return ARROW_POPCOUNT32(bitmap); } | |
69 | ||
70 | // | |
71 | // Bit-related computations on integer values | |
72 | // | |
73 | ||
74 | // Returns the ceil of value/divisor | |
75 | constexpr int64_t CeilDiv(int64_t value, int64_t divisor) { | |
76 | return (value == 0) ? 0 : 1 + (value - 1) / divisor; | |
77 | } | |
78 | ||
79 | // Return the number of bytes needed to fit the given number of bits | |
80 | constexpr int64_t BytesForBits(int64_t bits) { | |
81 | // This formula avoids integer overflow on very large `bits` | |
82 | return (bits >> 3) + ((bits & 7) != 0); | |
83 | } | |
84 | ||
85 | constexpr bool IsPowerOf2(int64_t value) { | |
86 | return value > 0 && (value & (value - 1)) == 0; | |
87 | } | |
88 | ||
89 | constexpr bool IsPowerOf2(uint64_t value) { | |
90 | return value > 0 && (value & (value - 1)) == 0; | |
91 | } | |
92 | ||
93 | // Returns the smallest power of two that contains v. If v is already a | |
94 | // power of two, it is returned as is. | |
95 | static inline int64_t NextPower2(int64_t n) { | |
96 | // Taken from | |
97 | // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 | |
98 | n--; | |
99 | n |= n >> 1; | |
100 | n |= n >> 2; | |
101 | n |= n >> 4; | |
102 | n |= n >> 8; | |
103 | n |= n >> 16; | |
104 | n |= n >> 32; | |
105 | n++; | |
106 | return n; | |
107 | } | |
108 | ||
109 | constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; } | |
110 | ||
111 | constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; } | |
112 | ||
113 | // Returns a mask for the bit_index lower order bits. | |
114 | // Only valid for bit_index in the range [0, 64). | |
115 | constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) { | |
116 | return (static_cast<uint64_t>(1) << bit_index) - 1; | |
117 | } | |
118 | ||
119 | // Returns 'value' rounded up to the nearest multiple of 'factor' | |
120 | constexpr int64_t RoundUp(int64_t value, int64_t factor) { | |
121 | return CeilDiv(value, factor) * factor; | |
122 | } | |
123 | ||
124 | // Returns 'value' rounded down to the nearest multiple of 'factor' | |
125 | constexpr int64_t RoundDown(int64_t value, int64_t factor) { | |
126 | return (value / factor) * factor; | |
127 | } | |
128 | ||
129 | // Returns 'value' rounded up to the nearest multiple of 'factor' when factor | |
130 | // is a power of two. | |
131 | // The result is undefined on overflow, i.e. if `value > 2**64 - factor`, | |
132 | // since we cannot return the correct result which would be 2**64. | |
133 | constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) { | |
134 | // DCHECK(value >= 0); | |
135 | // DCHECK(IsPowerOf2(factor)); | |
136 | return (value + (factor - 1)) & ~(factor - 1); | |
137 | } | |
138 | ||
139 | constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) { | |
140 | // DCHECK(IsPowerOf2(factor)); | |
141 | return (value + (factor - 1)) & ~(factor - 1); | |
142 | } | |
143 | ||
144 | constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); } | |
145 | ||
146 | constexpr int64_t RoundUpToMultipleOf64(int64_t num) { | |
147 | return RoundUpToPowerOf2(num, 64); | |
148 | } | |
149 | ||
150 | // Returns the number of bytes covering a sliced bitmap. Find the length | |
151 | // rounded to cover full bytes on both extremities. | |
152 | // | |
153 | // The following example represents a slice (offset=10, length=9) | |
154 | // | |
155 | // 0 8 16 24 | |
156 | // |-------|-------|------| | |
157 | // [ ] (slice) | |
158 | // [ ] (same slice aligned to bytes bounds, length=16) | |
159 | // | |
160 | // The covering bytes is the length (in bytes) of this new aligned slice. | |
161 | constexpr int64_t CoveringBytes(int64_t offset, int64_t length) { | |
162 | return (BitUtil::RoundUp(length + offset, 8) - BitUtil::RoundDown(offset, 8)) / 8; | |
163 | } | |
164 | ||
165 | // Returns the 'num_bits' least-significant bits of 'v'. | |
166 | static inline uint64_t TrailingBits(uint64_t v, int num_bits) { | |
167 | if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0; | |
168 | if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v; | |
169 | int n = 64 - num_bits; | |
170 | return (v << n) >> n; | |
171 | } | |
172 | ||
173 | /// \brief Count the number of leading zeros in an unsigned integer. | |
174 | static inline int CountLeadingZeros(uint32_t value) { | |
175 | #if defined(__clang__) || defined(__GNUC__) | |
176 | if (value == 0) return 32; | |
177 | return static_cast<int>(__builtin_clz(value)); | |
178 | #elif defined(_MSC_VER) | |
179 | unsigned long index; // NOLINT | |
180 | if (_BitScanReverse(&index, static_cast<unsigned long>(value))) { // NOLINT | |
181 | return 31 - static_cast<int>(index); | |
182 | } else { | |
183 | return 32; | |
184 | } | |
185 | #else | |
186 | int bitpos = 0; | |
187 | while (value != 0) { | |
188 | value >>= 1; | |
189 | ++bitpos; | |
190 | } | |
191 | return 32 - bitpos; | |
192 | #endif | |
193 | } | |
194 | ||
195 | static inline int CountLeadingZeros(uint64_t value) { | |
196 | #if defined(__clang__) || defined(__GNUC__) | |
197 | if (value == 0) return 64; | |
198 | return static_cast<int>(__builtin_clzll(value)); | |
199 | #elif defined(_MSC_VER) | |
200 | unsigned long index; // NOLINT | |
201 | if (_BitScanReverse64(&index, value)) { // NOLINT | |
202 | return 63 - static_cast<int>(index); | |
203 | } else { | |
204 | return 64; | |
205 | } | |
206 | #else | |
207 | int bitpos = 0; | |
208 | while (value != 0) { | |
209 | value >>= 1; | |
210 | ++bitpos; | |
211 | } | |
212 | return 64 - bitpos; | |
213 | #endif | |
214 | } | |
215 | ||
216 | static inline int CountTrailingZeros(uint32_t value) { | |
217 | #if defined(__clang__) || defined(__GNUC__) | |
218 | if (value == 0) return 32; | |
219 | return static_cast<int>(__builtin_ctzl(value)); | |
220 | #elif defined(_MSC_VER) | |
221 | unsigned long index; // NOLINT | |
222 | if (_BitScanForward(&index, value)) { | |
223 | return static_cast<int>(index); | |
224 | } else { | |
225 | return 32; | |
226 | } | |
227 | #else | |
228 | int bitpos = 0; | |
229 | if (value) { | |
230 | while (value & 1 == 0) { | |
231 | value >>= 1; | |
232 | ++bitpos; | |
233 | } | |
234 | } else { | |
235 | bitpos = 32; | |
236 | } | |
237 | return bitpos; | |
238 | #endif | |
239 | } | |
240 | ||
241 | static inline int CountTrailingZeros(uint64_t value) { | |
242 | #if defined(__clang__) || defined(__GNUC__) | |
243 | if (value == 0) return 64; | |
244 | return static_cast<int>(__builtin_ctzll(value)); | |
245 | #elif defined(_MSC_VER) | |
246 | unsigned long index; // NOLINT | |
247 | if (_BitScanForward64(&index, value)) { | |
248 | return static_cast<int>(index); | |
249 | } else { | |
250 | return 64; | |
251 | } | |
252 | #else | |
253 | int bitpos = 0; | |
254 | if (value) { | |
255 | while (value & 1 == 0) { | |
256 | value >>= 1; | |
257 | ++bitpos; | |
258 | } | |
259 | } else { | |
260 | bitpos = 64; | |
261 | } | |
262 | return bitpos; | |
263 | #endif | |
264 | } | |
265 | ||
266 | // Returns the minimum number of bits needed to represent an unsigned value | |
267 | static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); } | |
268 | ||
269 | // Returns ceil(log2(x)). | |
270 | static inline int Log2(uint64_t x) { | |
271 | // DCHECK_GT(x, 0); | |
272 | return NumRequiredBits(x - 1); | |
273 | } | |
274 | ||
275 | // | |
276 | // Utilities for reading and writing individual bits by their index | |
277 | // in a memory area. | |
278 | // | |
279 | ||
280 | // Bitmask selecting the k-th bit in a byte | |
281 | static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; | |
282 | ||
283 | // the bitwise complement version of kBitmask | |
284 | static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127}; | |
285 | ||
286 | // Bitmask selecting the (k - 1) preceding bits in a byte | |
287 | static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127}; | |
288 | static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63, 127}; | |
289 | ||
290 | // the bitwise complement version of kPrecedingBitmask | |
291 | static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128}; | |
292 | ||
293 | static constexpr bool GetBit(const uint8_t* bits, uint64_t i) { | |
294 | return (bits[i >> 3] >> (i & 0x07)) & 1; | |
295 | } | |
296 | ||
297 | // Gets the i-th bit from a byte. Should only be used with i <= 7. | |
298 | static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) { | |
299 | return byte & kBitmask[i]; | |
300 | } | |
301 | ||
302 | static inline void ClearBit(uint8_t* bits, int64_t i) { | |
303 | bits[i / 8] &= kFlippedBitmask[i % 8]; | |
304 | } | |
305 | ||
306 | static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; } | |
307 | ||
308 | static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) { | |
309 | // https://graphics.stanford.edu/~seander/bithacks.html | |
310 | // "Conditionally set or clear bits without branching" | |
311 | // NOTE: this seems to confuse Valgrind as it reads from potentially | |
312 | // uninitialized memory | |
313 | bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) & | |
314 | kBitmask[i % 8]; | |
315 | } | |
316 | ||
317 | /// \brief set or clear a range of bits quickly | |
318 | ARROW_EXPORT | |
319 | void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set); | |
320 | ||
321 | /// \brief Sets all bits in the bitmap to true | |
322 | ARROW_EXPORT | |
323 | void SetBitmap(uint8_t* data, int64_t offset, int64_t length); | |
324 | ||
325 | /// \brief Clears all bits in the bitmap (set to false) | |
326 | ARROW_EXPORT | |
327 | void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); | |
328 | ||
329 | /// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be | |
330 | /// returned | |
331 | /// ex: | |
332 | /// ref: https://stackoverflow.com/a/59523400 | |
333 | template <typename Word> | |
334 | constexpr Word PrecedingWordBitmask(unsigned int const i) { | |
335 | return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1; | |
336 | } | |
337 | static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, ""); | |
338 | static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, ""); | |
339 | static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, ""); | |
340 | static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, ""); | |
341 | ||
342 | /// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits | |
343 | /// from `high`. | |
344 | /// Word ret | |
345 | /// for (i = 0; i < sizeof(Word)*8; i++){ | |
346 | /// ret[i]= i < n ? low[i]: high[i]; | |
347 | /// } | |
348 | template <typename Word> | |
349 | constexpr Word SpliceWord(int n, Word low, Word high) { | |
350 | return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n)); | |
351 | } | |
352 | ||
353 | } // namespace BitUtil | |
354 | } // namespace arrow |