ceph/src/arrow/cpp/src/arrow/util/bit_stream_utils.h

   1 // Licensed to the Apache Software Foundation (ASF) under one
   2 // or more contributor license agreements.  See the NOTICE file
   3 // distributed with this work for additional information
   4 // regarding copyright ownership.  The ASF licenses this file
   5 // to you under the Apache License, Version 2.0 (the
   6 // "License"); you may not use this file except in compliance
   7 // with the License.  You may obtain a copy of the License at
   8 //
   9 //   http://www.apache.org/licenses/LICENSE-2.0
  10 //
  11 // Unless required by applicable law or agreed to in writing,
  12 // software distributed under the License is distributed on an
  13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14 // KIND, either express or implied.  See the License for the
  15 // specific language governing permissions and limitations
  16 // under the License.
  17
  18 // From Apache Impala (incubating) as of 2016-01-29
  19
  20 #pragma once
  21
  22 #include <string.h>
  23
  24 #include <algorithm>
  25 #include <cstdint>
  26
  27 #include "arrow/util/bit_util.h"
  28 #include "arrow/util/bpacking.h"
  29 #include "arrow/util/logging.h"
  30 #include "arrow/util/macros.h"
  31 #include "arrow/util/ubsan.h"
  32
  33 namespace arrow {
  34 namespace BitUtil {
  35
  36 /// Utility class to write bit/byte streams.  This class can write data to either be
  37 /// bit packed or byte aligned (and a single stream that has a mix of both).
  38 /// This class does not allocate memory.
  39 class BitWriter {
  40  public:
  41   /// buffer: buffer to write bits to.  Buffer should be preallocated with
  42   /// 'buffer_len' bytes.
  43   BitWriter(uint8_t* buffer, int buffer_len) : buffer_(buffer), max_bytes_(buffer_len) {
  44     Clear();
  45   }
  46
  47   void Clear() {
  48     buffered_values_ = 0;
  49     byte_offset_ = 0;
  50     bit_offset_ = 0;
  51   }
  52
  53   /// The number of current bytes written, including the current byte (i.e. may include a
  54   /// fraction of a byte). Includes buffered values.
  55   int bytes_written() const {
  56     return byte_offset_ + static_cast<int>(BitUtil::BytesForBits(bit_offset_));
  57   }
  58   uint8_t* buffer() const { return buffer_; }
  59   int buffer_len() const { return max_bytes_; }
  60
  61   /// Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
  62   /// packed.  Returns false if there was not enough space. num_bits must be <= 32.
  63   bool PutValue(uint64_t v, int num_bits);
  64
  65   /// Writes v to the next aligned byte using num_bytes. If T is larger than
  66   /// num_bytes, the extra high-order bytes will be ignored. Returns false if
  67   /// there was not enough space.
  68   /// Assume the v is stored in buffer_ as a litte-endian format
  69   template <typename T>
  70   bool PutAligned(T v, int num_bytes);
  71
  72   /// Write a Vlq encoded int to the buffer.  Returns false if there was not enough
  73   /// room.  The value is written byte aligned.
  74   /// For more details on vlq:
  75   /// en.wikipedia.org/wiki/Variable-length_quantity
  76   bool PutVlqInt(uint32_t v);
  77
  78   // Writes an int zigzag encoded.
  79   bool PutZigZagVlqInt(int32_t v);
  80
  81   /// Write a Vlq encoded int64 to the buffer.  Returns false if there was not enough
  82   /// room.  The value is written byte aligned.
  83   /// For more details on vlq:
  84   /// en.wikipedia.org/wiki/Variable-length_quantity
  85   bool PutVlqInt(uint64_t v);
  86
  87   // Writes an int64 zigzag encoded.
  88   bool PutZigZagVlqInt(int64_t v);
  89
  90   /// Get a pointer to the next aligned byte and advance the underlying buffer
  91   /// by num_bytes.
  92   /// Returns NULL if there was not enough space.
  93   uint8_t* GetNextBytePtr(int num_bytes = 1);
  94
  95   /// Flushes all buffered values to the buffer. Call this when done writing to
  96   /// the buffer.  If 'align' is true, buffered_values_ is reset and any future
  97   /// writes will be written to the next byte boundary.
  98   void Flush(bool align = false);
  99
 100  private:
 101   uint8_t* buffer_;
 102   int max_bytes_;
 103
 104   /// Bit-packed values are initially written to this variable before being memcpy'd to
 105   /// buffer_. This is faster than writing values byte by byte directly to buffer_.
 106   uint64_t buffered_values_;
 107
 108   int byte_offset_;  // Offset in buffer_
 109   int bit_offset_;   // Offset in buffered_values_
 110 };
 111
 112 /// Utility class to read bit/byte stream.  This class can read bits or bytes
 113 /// that are either byte aligned or not.  It also has utilities to read multiple
 114 /// bytes in one read (e.g. encoded int).
 115 class BitReader {
 116  public:
 117   /// 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
 118   BitReader(const uint8_t* buffer, int buffer_len)
 119       : buffer_(buffer), max_bytes_(buffer_len), byte_offset_(0), bit_offset_(0) {
 120     int num_bytes = std::min(8, max_bytes_ - byte_offset_);
 121     memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
 122     buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_);
 123   }
 124
 125   BitReader()
 126       : buffer_(NULL),
 127         max_bytes_(0),
 128         buffered_values_(0),
 129         byte_offset_(0),
 130         bit_offset_(0) {}
 131
 132   void Reset(const uint8_t* buffer, int buffer_len) {
 133     buffer_ = buffer;
 134     max_bytes_ = buffer_len;
 135     byte_offset_ = 0;
 136     bit_offset_ = 0;
 137     int num_bytes = std::min(8, max_bytes_ - byte_offset_);
 138     memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
 139     buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_);
 140   }
 141
 142   /// Gets the next value from the buffer.  Returns true if 'v' could be read or false if
 143   /// there are not enough bytes left. num_bits must be <= 32.
 144   template <typename T>
 145   bool GetValue(int num_bits, T* v);
 146
 147   /// Get a number of values from the buffer. Return the number of values actually read.
 148   template <typename T>
 149   int GetBatch(int num_bits, T* v, int batch_size);
 150
 151   /// Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T
 152   /// needs to be a little-endian native type and big enough to store
 153   /// 'num_bytes'. The value is assumed to be byte-aligned so the stream will
 154   /// be advanced to the start of the next byte before 'v' is read. Returns
 155   /// false if there are not enough bytes left.
 156   /// Assume the v was stored in buffer_ as a litte-endian format
 157   template <typename T>
 158   bool GetAligned(int num_bytes, T* v);
 159
 160   /// Reads a vlq encoded int from the stream.  The encoded int must start at
 161   /// the beginning of a byte. Return false if there were not enough bytes in
 162   /// the buffer.
 163   bool GetVlqInt(uint32_t* v);
 164
 165   // Reads a zigzag encoded int `into` v.
 166   bool GetZigZagVlqInt(int32_t* v);
 167
 168   /// Reads a vlq encoded int64 from the stream.  The encoded int must start at
 169   /// the beginning of a byte. Return false if there were not enough bytes in
 170   /// the buffer.
 171   bool GetVlqInt(uint64_t* v);
 172
 173   // Reads a zigzag encoded int64 `into` v.
 174   bool GetZigZagVlqInt(int64_t* v);
 175
 176   /// Returns the number of bytes left in the stream, not including the current
 177   /// byte (i.e., there may be an additional fraction of a byte).
 178   int bytes_left() {
 179     return max_bytes_ -
 180            (byte_offset_ + static_cast<int>(BitUtil::BytesForBits(bit_offset_)));
 181   }
 182
 183   /// Maximum byte length of a vlq encoded int
 184   static constexpr int kMaxVlqByteLength = 5;
 185
 186   /// Maximum byte length of a vlq encoded int64
 187   static constexpr int kMaxVlqByteLengthForInt64 = 10;
 188
 189  private:
 190   const uint8_t* buffer_;
 191   int max_bytes_;
 192
 193   /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is
 194   /// faster than reading values byte by byte directly from buffer_.
 195   uint64_t buffered_values_;
 196
 197   int byte_offset_;  // Offset in buffer_
 198   int bit_offset_;   // Offset in buffered_values_
 199 };
 200
 201 inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
 202   // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases)
 203   DCHECK_LE(num_bits, 32);
 204   DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
 205
 206   if (ARROW_PREDICT_FALSE(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8))
 207     return false;
 208
 209   buffered_values_ |= v << bit_offset_;
 210   bit_offset_ += num_bits;
 211
 212   if (ARROW_PREDICT_FALSE(bit_offset_ >= 64)) {
 213     // Flush buffered_values_ and write out bits of v that did not fit
 214     buffered_values_ = arrow::BitUtil::ToLittleEndian(buffered_values_);
 215     memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
 216     buffered_values_ = 0;
 217     byte_offset_ += 8;
 218     bit_offset_ -= 64;
 219     buffered_values_ = v >> (num_bits - bit_offset_);
 220   }
 221   DCHECK_LT(bit_offset_, 64);
 222   return true;
 223 }
 224
 225 inline void BitWriter::Flush(bool align) {
 226   int num_bytes = static_cast<int>(BitUtil::BytesForBits(bit_offset_));
 227   DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
 228   auto buffered_values = arrow::BitUtil::ToLittleEndian(buffered_values_);
 229   memcpy(buffer_ + byte_offset_, &buffered_values, num_bytes);
 230
 231   if (align) {
 232     buffered_values_ = 0;
 233     byte_offset_ += num_bytes;
 234     bit_offset_ = 0;
 235   }
 236 }
 237
 238 inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
 239   Flush(/* align */ true);
 240   DCHECK_LE(byte_offset_, max_bytes_);
 241   if (byte_offset_ + num_bytes > max_bytes_) return NULL;
 242   uint8_t* ptr = buffer_ + byte_offset_;
 243   byte_offset_ += num_bytes;
 244   return ptr;
 245 }
 246
 247 template <typename T>
 248 inline bool BitWriter::PutAligned(T val, int num_bytes) {
 249   uint8_t* ptr = GetNextBytePtr(num_bytes);
 250   if (ptr == NULL) return false;
 251   val = arrow::BitUtil::ToLittleEndian(val);
 252   memcpy(ptr, &val, num_bytes);
 253   return true;
 254 }
 255
 256 namespace detail {
 257
 258 template <typename T>
 259 inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
 260                       int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
 261 #ifdef _MSC_VER
 262 #pragma warning(push)
 263 #pragma warning(disable : 4800)
 264 #endif
 265   *v = static_cast<T>(BitUtil::TrailingBits(*buffered_values, *bit_offset + num_bits) >>
 266                       *bit_offset);
 267 #ifdef _MSC_VER
 268 #pragma warning(pop)
 269 #endif
 270   *bit_offset += num_bits;
 271   if (*bit_offset >= 64) {
 272     *byte_offset += 8;
 273     *bit_offset -= 64;
 274
 275     int bytes_remaining = max_bytes - *byte_offset;
 276     if (ARROW_PREDICT_TRUE(bytes_remaining >= 8)) {
 277       memcpy(buffered_values, buffer + *byte_offset, 8);
 278     } else {
 279       memcpy(buffered_values, buffer + *byte_offset, bytes_remaining);
 280     }
 281     *buffered_values = arrow::BitUtil::FromLittleEndian(*buffered_values);
 282 #ifdef _MSC_VER
 283 #pragma warning(push)
 284 #pragma warning(disable : 4800 4805)
 285 #endif
 286     // Read bits of v that crossed into new buffered_values_
 287     if (ARROW_PREDICT_TRUE(num_bits - *bit_offset < static_cast<int>(8 * sizeof(T)))) {
 288       // if shift exponent(num_bits - *bit_offset) is not less than sizeof(T), *v will not
 289       // change and the following code may cause a runtime error that the shift exponent
 290       // is too large
 291       *v = *v | static_cast<T>(BitUtil::TrailingBits(*buffered_values, *bit_offset)
 292                                << (num_bits - *bit_offset));
 293     }
 294 #ifdef _MSC_VER
 295 #pragma warning(pop)
 296 #endif
 297     DCHECK_LE(*bit_offset, 64);
 298   }
 299 }
 300
 301 }  // namespace detail
 302
 303 template <typename T>
 304 inline bool BitReader::GetValue(int num_bits, T* v) {
 305   return GetBatch(num_bits, v, 1) == 1;
 306 }
 307
 308 template <typename T>
 309 inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
 310   DCHECK(buffer_ != NULL);
 311   DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8));
 312
 313   int bit_offset = bit_offset_;
 314   int byte_offset = byte_offset_;
 315   uint64_t buffered_values = buffered_values_;
 316   int max_bytes = max_bytes_;
 317   const uint8_t* buffer = buffer_;
 318
 319   uint64_t needed_bits = num_bits * batch_size;
 320   constexpr uint64_t kBitsPerByte = 8;
 321   uint64_t remaining_bits = (max_bytes - byte_offset) * kBitsPerByte - bit_offset;
 322   if (remaining_bits < needed_bits) {
 323     batch_size = static_cast<int>(remaining_bits) / num_bits;
 324   }
 325
 326   int i = 0;
 327   if (ARROW_PREDICT_FALSE(bit_offset != 0)) {
 328     for (; i < batch_size && bit_offset != 0; ++i) {
 329       detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
 330                         &buffered_values);
 331     }
 332   }
 333
 334   if (sizeof(T) == 4) {
 335     int num_unpacked =
 336         internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
 337                            reinterpret_cast<uint32_t*>(v + i), batch_size - i, num_bits);
 338     i += num_unpacked;
 339     byte_offset += num_unpacked * num_bits / 8;
 340   } else if (sizeof(T) == 8 && num_bits > 32) {
 341     // Use unpack64 only if num_bits is larger than 32
 342     // TODO (ARROW-13677): improve the performance of internal::unpack64
 343     // and remove the restriction of num_bits
 344     int num_unpacked =
 345         internal::unpack64(buffer + byte_offset, reinterpret_cast<uint64_t*>(v + i),
 346                            batch_size - i, num_bits);
 347     i += num_unpacked;
 348     byte_offset += num_unpacked * num_bits / 8;
 349   } else {
 350     // TODO: revisit this limit if necessary
 351     DCHECK_LE(num_bits, 32);
 352     const int buffer_size = 1024;
 353     uint32_t unpack_buffer[buffer_size];
 354     while (i < batch_size) {
 355       int unpack_size = std::min(buffer_size, batch_size - i);
 356       int num_unpacked =
 357           internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
 358                              unpack_buffer, unpack_size, num_bits);
 359       if (num_unpacked == 0) {
 360         break;
 361       }
 362       for (int k = 0; k < num_unpacked; ++k) {
 363 #ifdef _MSC_VER
 364 #pragma warning(push)
 365 #pragma warning(disable : 4800)
 366 #endif
 367         v[i + k] = static_cast<T>(unpack_buffer[k]);
 368 #ifdef _MSC_VER
 369 #pragma warning(pop)
 370 #endif
 371       }
 372       i += num_unpacked;
 373       byte_offset += num_unpacked * num_bits / 8;
 374     }
 375   }
 376
 377   int bytes_remaining = max_bytes - byte_offset;
 378   if (bytes_remaining >= 8) {
 379     memcpy(&buffered_values, buffer + byte_offset, 8);
 380   } else {
 381     memcpy(&buffered_values, buffer + byte_offset, bytes_remaining);
 382   }
 383   buffered_values = arrow::BitUtil::FromLittleEndian(buffered_values);
 384
 385   for (; i < batch_size; ++i) {
 386     detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
 387                       &buffered_values);
 388   }
 389
 390   bit_offset_ = bit_offset;
 391   byte_offset_ = byte_offset;
 392   buffered_values_ = buffered_values;
 393
 394   return batch_size;
 395 }
 396
 397 template <typename T>
 398 inline bool BitReader::GetAligned(int num_bytes, T* v) {
 399   if (ARROW_PREDICT_FALSE(num_bytes > static_cast<int>(sizeof(T)))) {
 400     return false;
 401   }
 402
 403   int bytes_read = static_cast<int>(BitUtil::BytesForBits(bit_offset_));
 404   if (ARROW_PREDICT_FALSE(byte_offset_ + bytes_read + num_bytes > max_bytes_)) {
 405     return false;
 406   }
 407
 408   // Advance byte_offset to next unread byte and read num_bytes
 409   byte_offset_ += bytes_read;
 410   memcpy(v, buffer_ + byte_offset_, num_bytes);
 411   *v = arrow::BitUtil::FromLittleEndian(*v);
 412   byte_offset_ += num_bytes;
 413
 414   // Reset buffered_values_
 415   bit_offset_ = 0;
 416   int bytes_remaining = max_bytes_ - byte_offset_;
 417   if (ARROW_PREDICT_TRUE(bytes_remaining >= 8)) {
 418     memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
 419   } else {
 420     memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
 421   }
 422   buffered_values_ = arrow::BitUtil::FromLittleEndian(buffered_values_);
 423   return true;
 424 }
 425
 426 inline bool BitWriter::PutVlqInt(uint32_t v) {
 427   bool result = true;
 428   while ((v & 0xFFFFFF80UL) != 0UL) {
 429     result &= PutAligned<uint8_t>(static_cast<uint8_t>((v & 0x7F) | 0x80), 1);
 430     v >>= 7;
 431   }
 432   result &= PutAligned<uint8_t>(static_cast<uint8_t>(v & 0x7F), 1);
 433   return result;
 434 }
 435
 436 inline bool BitReader::GetVlqInt(uint32_t* v) {
 437   uint32_t tmp = 0;
 438
 439   for (int i = 0; i < kMaxVlqByteLength; i++) {
 440     uint8_t byte = 0;
 441     if (ARROW_PREDICT_FALSE(!GetAligned<uint8_t>(1, &byte))) {
 442       return false;
 443     }
 444     tmp |= static_cast<uint32_t>(byte & 0x7F) << (7 * i);
 445
 446     if ((byte & 0x80) == 0) {
 447       *v = tmp;
 448       return true;
 449     }
 450   }
 451
 452   return false;
 453 }
 454
 455 inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
 456   uint32_t u_v = ::arrow::util::SafeCopy<uint32_t>(v);
 457   u_v = (u_v << 1) ^ static_cast<uint32_t>(v >> 31);
 458   return PutVlqInt(u_v);
 459 }
 460
 461 inline bool BitReader::GetZigZagVlqInt(int32_t* v) {
 462   uint32_t u;
 463   if (!GetVlqInt(&u)) return false;
 464   u = (u >> 1) ^ (~(u & 1) + 1);
 465   *v = ::arrow::util::SafeCopy<int32_t>(u);
 466   return true;
 467 }
 468
 469 inline bool BitWriter::PutVlqInt(uint64_t v) {
 470   bool result = true;
 471   while ((v & 0xFFFFFFFFFFFFFF80ULL) != 0ULL) {
 472     result &= PutAligned<uint8_t>(static_cast<uint8_t>((v & 0x7F) | 0x80), 1);
 473     v >>= 7;
 474   }
 475   result &= PutAligned<uint8_t>(static_cast<uint8_t>(v & 0x7F), 1);
 476   return result;
 477 }
 478
 479 inline bool BitReader::GetVlqInt(uint64_t* v) {
 480   uint64_t tmp = 0;
 481
 482   for (int i = 0; i < kMaxVlqByteLengthForInt64; i++) {
 483     uint8_t byte = 0;
 484     if (ARROW_PREDICT_FALSE(!GetAligned<uint8_t>(1, &byte))) {
 485       return false;
 486     }
 487     tmp |= static_cast<uint64_t>(byte & 0x7F) << (7 * i);
 488
 489     if ((byte & 0x80) == 0) {
 490       *v = tmp;
 491       return true;
 492     }
 493   }
 494
 495   return false;
 496 }
 497
 498 inline bool BitWriter::PutZigZagVlqInt(int64_t v) {
 499   uint64_t u_v = ::arrow::util::SafeCopy<uint64_t>(v);
 500   u_v = (u_v << 1) ^ static_cast<uint64_t>(v >> 63);
 501   return PutVlqInt(u_v);
 502 }
 503
 504 inline bool BitReader::GetZigZagVlqInt(int64_t* v) {
 505   uint64_t u;
 506   if (!GetVlqInt(&u)) return false;
 507   u = (u >> 1) ^ (~(u & 1) + 1);
 508   *v = ::arrow::util::SafeCopy<int64_t>(u);
 509   return true;
 510 }
 511
 512 }  // namespace BitUtil
 513 }  // namespace arrow