1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 // From Apache Impala (incubating) as of 2016-01-29
27 #include "arrow/util/bit_util.h"
28 #include "arrow/util/bpacking.h"
29 #include "arrow/util/logging.h"
30 #include "arrow/util/macros.h"
31 #include "arrow/util/ubsan.h"
36 /// Utility class to write bit/byte streams. This class can write data to either be
37 /// bit packed or byte aligned (and a single stream that has a mix of both).
38 /// This class does not allocate memory.
41 /// buffer: buffer to write bits to. Buffer should be preallocated with
42 /// 'buffer_len' bytes.
43 BitWriter(uint8_t* buffer
, int buffer_len
) : buffer_(buffer
), max_bytes_(buffer_len
) {
53 /// The number of current bytes written, including the current byte (i.e. may include a
54 /// fraction of a byte). Includes buffered values.
55 int bytes_written() const {
56 return byte_offset_
+ static_cast<int>(BitUtil::BytesForBits(bit_offset_
));
58 uint8_t* buffer() const { return buffer_
; }
59 int buffer_len() const { return max_bytes_
; }
61 /// Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit
62 /// packed. Returns false if there was not enough space. num_bits must be <= 32.
63 bool PutValue(uint64_t v
, int num_bits
);
65 /// Writes v to the next aligned byte using num_bytes. If T is larger than
66 /// num_bytes, the extra high-order bytes will be ignored. Returns false if
67 /// there was not enough space.
68 /// Assume the v is stored in buffer_ as a litte-endian format
70 bool PutAligned(T v
, int num_bytes
);
72 /// Write a Vlq encoded int to the buffer. Returns false if there was not enough
73 /// room. The value is written byte aligned.
74 /// For more details on vlq:
75 /// en.wikipedia.org/wiki/Variable-length_quantity
76 bool PutVlqInt(uint32_t v
);
78 // Writes an int zigzag encoded.
79 bool PutZigZagVlqInt(int32_t v
);
81 /// Write a Vlq encoded int64 to the buffer. Returns false if there was not enough
82 /// room. The value is written byte aligned.
83 /// For more details on vlq:
84 /// en.wikipedia.org/wiki/Variable-length_quantity
85 bool PutVlqInt(uint64_t v
);
87 // Writes an int64 zigzag encoded.
88 bool PutZigZagVlqInt(int64_t v
);
90 /// Get a pointer to the next aligned byte and advance the underlying buffer
92 /// Returns NULL if there was not enough space.
93 uint8_t* GetNextBytePtr(int num_bytes
= 1);
95 /// Flushes all buffered values to the buffer. Call this when done writing to
96 /// the buffer. If 'align' is true, buffered_values_ is reset and any future
97 /// writes will be written to the next byte boundary.
98 void Flush(bool align
= false);
104 /// Bit-packed values are initially written to this variable before being memcpy'd to
105 /// buffer_. This is faster than writing values byte by byte directly to buffer_.
106 uint64_t buffered_values_
;
108 int byte_offset_
; // Offset in buffer_
109 int bit_offset_
; // Offset in buffered_values_
112 /// Utility class to read bit/byte stream. This class can read bits or bytes
113 /// that are either byte aligned or not. It also has utilities to read multiple
114 /// bytes in one read (e.g. encoded int).
117 /// 'buffer' is the buffer to read from. The buffer's length is 'buffer_len'.
118 BitReader(const uint8_t* buffer
, int buffer_len
)
119 : buffer_(buffer
), max_bytes_(buffer_len
), byte_offset_(0), bit_offset_(0) {
120 int num_bytes
= std::min(8, max_bytes_
- byte_offset_
);
121 memcpy(&buffered_values_
, buffer_
+ byte_offset_
, num_bytes
);
122 buffered_values_
= arrow::BitUtil::FromLittleEndian(buffered_values_
);
132 void Reset(const uint8_t* buffer
, int buffer_len
) {
134 max_bytes_
= buffer_len
;
137 int num_bytes
= std::min(8, max_bytes_
- byte_offset_
);
138 memcpy(&buffered_values_
, buffer_
+ byte_offset_
, num_bytes
);
139 buffered_values_
= arrow::BitUtil::FromLittleEndian(buffered_values_
);
142 /// Gets the next value from the buffer. Returns true if 'v' could be read or false if
143 /// there are not enough bytes left. num_bits must be <= 32.
144 template <typename T
>
145 bool GetValue(int num_bits
, T
* v
);
147 /// Get a number of values from the buffer. Return the number of values actually read.
148 template <typename T
>
149 int GetBatch(int num_bits
, T
* v
, int batch_size
);
151 /// Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T
152 /// needs to be a little-endian native type and big enough to store
153 /// 'num_bytes'. The value is assumed to be byte-aligned so the stream will
154 /// be advanced to the start of the next byte before 'v' is read. Returns
155 /// false if there are not enough bytes left.
156 /// Assume the v was stored in buffer_ as a litte-endian format
157 template <typename T
>
158 bool GetAligned(int num_bytes
, T
* v
);
160 /// Reads a vlq encoded int from the stream. The encoded int must start at
161 /// the beginning of a byte. Return false if there were not enough bytes in
163 bool GetVlqInt(uint32_t* v
);
165 // Reads a zigzag encoded int `into` v.
166 bool GetZigZagVlqInt(int32_t* v
);
168 /// Reads a vlq encoded int64 from the stream. The encoded int must start at
169 /// the beginning of a byte. Return false if there were not enough bytes in
171 bool GetVlqInt(uint64_t* v
);
173 // Reads a zigzag encoded int64 `into` v.
174 bool GetZigZagVlqInt(int64_t* v
);
176 /// Returns the number of bytes left in the stream, not including the current
177 /// byte (i.e., there may be an additional fraction of a byte).
180 (byte_offset_
+ static_cast<int>(BitUtil::BytesForBits(bit_offset_
)));
183 /// Maximum byte length of a vlq encoded int
184 static constexpr int kMaxVlqByteLength
= 5;
186 /// Maximum byte length of a vlq encoded int64
187 static constexpr int kMaxVlqByteLengthForInt64
= 10;
190 const uint8_t* buffer_
;
193 /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is
194 /// faster than reading values byte by byte directly from buffer_.
195 uint64_t buffered_values_
;
197 int byte_offset_
; // Offset in buffer_
198 int bit_offset_
; // Offset in buffered_values_
201 inline bool BitWriter::PutValue(uint64_t v
, int num_bits
) {
202 // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
203 DCHECK_LE(num_bits
, 32);
204 DCHECK_EQ(v
>> num_bits
, 0) << "v = " << v
<< ", num_bits = " << num_bits
;
206 if (ARROW_PREDICT_FALSE(byte_offset_
* 8 + bit_offset_
+ num_bits
> max_bytes_
* 8))
209 buffered_values_
|= v
<< bit_offset_
;
210 bit_offset_
+= num_bits
;
212 if (ARROW_PREDICT_FALSE(bit_offset_
>= 64)) {
213 // Flush buffered_values_ and write out bits of v that did not fit
214 buffered_values_
= arrow::BitUtil::ToLittleEndian(buffered_values_
);
215 memcpy(buffer_
+ byte_offset_
, &buffered_values_
, 8);
216 buffered_values_
= 0;
219 buffered_values_
= v
>> (num_bits
- bit_offset_
);
221 DCHECK_LT(bit_offset_
, 64);
225 inline void BitWriter::Flush(bool align
) {
226 int num_bytes
= static_cast<int>(BitUtil::BytesForBits(bit_offset_
));
227 DCHECK_LE(byte_offset_
+ num_bytes
, max_bytes_
);
228 auto buffered_values
= arrow::BitUtil::ToLittleEndian(buffered_values_
);
229 memcpy(buffer_
+ byte_offset_
, &buffered_values
, num_bytes
);
232 buffered_values_
= 0;
233 byte_offset_
+= num_bytes
;
238 inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes
) {
239 Flush(/* align */ true);
240 DCHECK_LE(byte_offset_
, max_bytes_
);
241 if (byte_offset_
+ num_bytes
> max_bytes_
) return NULL
;
242 uint8_t* ptr
= buffer_
+ byte_offset_
;
243 byte_offset_
+= num_bytes
;
247 template <typename T
>
248 inline bool BitWriter::PutAligned(T val
, int num_bytes
) {
249 uint8_t* ptr
= GetNextBytePtr(num_bytes
);
250 if (ptr
== NULL
) return false;
251 val
= arrow::BitUtil::ToLittleEndian(val
);
252 memcpy(ptr
, &val
, num_bytes
);
258 template <typename T
>
259 inline void GetValue_(int num_bits
, T
* v
, int max_bytes
, const uint8_t* buffer
,
260 int* bit_offset
, int* byte_offset
, uint64_t* buffered_values
) {
262 #pragma warning(push)
263 #pragma warning(disable : 4800)
265 *v
= static_cast<T
>(BitUtil::TrailingBits(*buffered_values
, *bit_offset
+ num_bits
) >>
270 *bit_offset
+= num_bits
;
271 if (*bit_offset
>= 64) {
275 int bytes_remaining
= max_bytes
- *byte_offset
;
276 if (ARROW_PREDICT_TRUE(bytes_remaining
>= 8)) {
277 memcpy(buffered_values
, buffer
+ *byte_offset
, 8);
279 memcpy(buffered_values
, buffer
+ *byte_offset
, bytes_remaining
);
281 *buffered_values
= arrow::BitUtil::FromLittleEndian(*buffered_values
);
283 #pragma warning(push)
284 #pragma warning(disable : 4800 4805)
286 // Read bits of v that crossed into new buffered_values_
287 if (ARROW_PREDICT_TRUE(num_bits
- *bit_offset
< static_cast<int>(8 * sizeof(T
)))) {
288 // if shift exponent(num_bits - *bit_offset) is not less than sizeof(T), *v will not
289 // change and the following code may cause a runtime error that the shift exponent
291 *v
= *v
| static_cast<T
>(BitUtil::TrailingBits(*buffered_values
, *bit_offset
)
292 << (num_bits
- *bit_offset
));
297 DCHECK_LE(*bit_offset
, 64);
301 } // namespace detail
303 template <typename T
>
304 inline bool BitReader::GetValue(int num_bits
, T
* v
) {
305 return GetBatch(num_bits
, v
, 1) == 1;
308 template <typename T
>
309 inline int BitReader::GetBatch(int num_bits
, T
* v
, int batch_size
) {
310 DCHECK(buffer_
!= NULL
);
311 DCHECK_LE(num_bits
, static_cast<int>(sizeof(T
) * 8));
313 int bit_offset
= bit_offset_
;
314 int byte_offset
= byte_offset_
;
315 uint64_t buffered_values
= buffered_values_
;
316 int max_bytes
= max_bytes_
;
317 const uint8_t* buffer
= buffer_
;
319 uint64_t needed_bits
= num_bits
* batch_size
;
320 constexpr uint64_t kBitsPerByte
= 8;
321 uint64_t remaining_bits
= (max_bytes
- byte_offset
) * kBitsPerByte
- bit_offset
;
322 if (remaining_bits
< needed_bits
) {
323 batch_size
= static_cast<int>(remaining_bits
) / num_bits
;
327 if (ARROW_PREDICT_FALSE(bit_offset
!= 0)) {
328 for (; i
< batch_size
&& bit_offset
!= 0; ++i
) {
329 detail::GetValue_(num_bits
, &v
[i
], max_bytes
, buffer
, &bit_offset
, &byte_offset
,
334 if (sizeof(T
) == 4) {
336 internal::unpack32(reinterpret_cast<const uint32_t*>(buffer
+ byte_offset
),
337 reinterpret_cast<uint32_t*>(v
+ i
), batch_size
- i
, num_bits
);
339 byte_offset
+= num_unpacked
* num_bits
/ 8;
340 } else if (sizeof(T
) == 8 && num_bits
> 32) {
341 // Use unpack64 only if num_bits is larger than 32
342 // TODO (ARROW-13677): improve the performance of internal::unpack64
343 // and remove the restriction of num_bits
345 internal::unpack64(buffer
+ byte_offset
, reinterpret_cast<uint64_t*>(v
+ i
),
346 batch_size
- i
, num_bits
);
348 byte_offset
+= num_unpacked
* num_bits
/ 8;
350 // TODO: revisit this limit if necessary
351 DCHECK_LE(num_bits
, 32);
352 const int buffer_size
= 1024;
353 uint32_t unpack_buffer
[buffer_size
];
354 while (i
< batch_size
) {
355 int unpack_size
= std::min(buffer_size
, batch_size
- i
);
357 internal::unpack32(reinterpret_cast<const uint32_t*>(buffer
+ byte_offset
),
358 unpack_buffer
, unpack_size
, num_bits
);
359 if (num_unpacked
== 0) {
362 for (int k
= 0; k
< num_unpacked
; ++k
) {
364 #pragma warning(push)
365 #pragma warning(disable : 4800)
367 v
[i
+ k
] = static_cast<T
>(unpack_buffer
[k
]);
373 byte_offset
+= num_unpacked
* num_bits
/ 8;
377 int bytes_remaining
= max_bytes
- byte_offset
;
378 if (bytes_remaining
>= 8) {
379 memcpy(&buffered_values
, buffer
+ byte_offset
, 8);
381 memcpy(&buffered_values
, buffer
+ byte_offset
, bytes_remaining
);
383 buffered_values
= arrow::BitUtil::FromLittleEndian(buffered_values
);
385 for (; i
< batch_size
; ++i
) {
386 detail::GetValue_(num_bits
, &v
[i
], max_bytes
, buffer
, &bit_offset
, &byte_offset
,
390 bit_offset_
= bit_offset
;
391 byte_offset_
= byte_offset
;
392 buffered_values_
= buffered_values
;
397 template <typename T
>
398 inline bool BitReader::GetAligned(int num_bytes
, T
* v
) {
399 if (ARROW_PREDICT_FALSE(num_bytes
> static_cast<int>(sizeof(T
)))) {
403 int bytes_read
= static_cast<int>(BitUtil::BytesForBits(bit_offset_
));
404 if (ARROW_PREDICT_FALSE(byte_offset_
+ bytes_read
+ num_bytes
> max_bytes_
)) {
408 // Advance byte_offset to next unread byte and read num_bytes
409 byte_offset_
+= bytes_read
;
410 memcpy(v
, buffer_
+ byte_offset_
, num_bytes
);
411 *v
= arrow::BitUtil::FromLittleEndian(*v
);
412 byte_offset_
+= num_bytes
;
414 // Reset buffered_values_
416 int bytes_remaining
= max_bytes_
- byte_offset_
;
417 if (ARROW_PREDICT_TRUE(bytes_remaining
>= 8)) {
418 memcpy(&buffered_values_
, buffer_
+ byte_offset_
, 8);
420 memcpy(&buffered_values_
, buffer_
+ byte_offset_
, bytes_remaining
);
422 buffered_values_
= arrow::BitUtil::FromLittleEndian(buffered_values_
);
426 inline bool BitWriter::PutVlqInt(uint32_t v
) {
428 while ((v
& 0xFFFFFF80UL
) != 0UL) {
429 result
&= PutAligned
<uint8_t>(static_cast<uint8_t>((v
& 0x7F) | 0x80), 1);
432 result
&= PutAligned
<uint8_t>(static_cast<uint8_t>(v
& 0x7F), 1);
436 inline bool BitReader::GetVlqInt(uint32_t* v
) {
439 for (int i
= 0; i
< kMaxVlqByteLength
; i
++) {
441 if (ARROW_PREDICT_FALSE(!GetAligned
<uint8_t>(1, &byte
))) {
444 tmp
|= static_cast<uint32_t>(byte
& 0x7F) << (7 * i
);
446 if ((byte
& 0x80) == 0) {
455 inline bool BitWriter::PutZigZagVlqInt(int32_t v
) {
456 uint32_t u_v
= ::arrow::util::SafeCopy
<uint32_t>(v
);
457 u_v
= (u_v
<< 1) ^ static_cast<uint32_t>(v
>> 31);
458 return PutVlqInt(u_v
);
461 inline bool BitReader::GetZigZagVlqInt(int32_t* v
) {
463 if (!GetVlqInt(&u
)) return false;
464 u
= (u
>> 1) ^ (~(u
& 1) + 1);
465 *v
= ::arrow::util::SafeCopy
<int32_t>(u
);
469 inline bool BitWriter::PutVlqInt(uint64_t v
) {
471 while ((v
& 0xFFFFFFFFFFFFFF80ULL
) != 0ULL) {
472 result
&= PutAligned
<uint8_t>(static_cast<uint8_t>((v
& 0x7F) | 0x80), 1);
475 result
&= PutAligned
<uint8_t>(static_cast<uint8_t>(v
& 0x7F), 1);
479 inline bool BitReader::GetVlqInt(uint64_t* v
) {
482 for (int i
= 0; i
< kMaxVlqByteLengthForInt64
; i
++) {
484 if (ARROW_PREDICT_FALSE(!GetAligned
<uint8_t>(1, &byte
))) {
487 tmp
|= static_cast<uint64_t>(byte
& 0x7F) << (7 * i
);
489 if ((byte
& 0x80) == 0) {
498 inline bool BitWriter::PutZigZagVlqInt(int64_t v
) {
499 uint64_t u_v
= ::arrow::util::SafeCopy
<uint64_t>(v
);
500 u_v
= (u_v
<< 1) ^ static_cast<uint64_t>(v
>> 63);
501 return PutVlqInt(u_v
);
504 inline bool BitReader::GetZigZagVlqInt(int64_t* v
) {
506 if (!GetVlqInt(&u
)) return false;
507 u
= (u
>> 1) ^ (~(u
& 1) + 1);
508 *v
= ::arrow::util::SafeCopy
<int64_t>(u
);
512 } // namespace BitUtil