--- /dev/null
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+namespace detail {
+
+inline uint64_t LoadWord(const uint8_t* bytes) {
+ return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes));
+}
+
+inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) {
+ if (shift == 0) {
+ return current;
+ }
+ return (current >> shift) | (next << (64 - shift));
+}
+
+// These templates are here to help with unit tests
+
+template <typename T>
+struct BitBlockAnd {
+ static T Call(T left, T right) { return left & right; }
+};
+
+template <>
+struct BitBlockAnd<bool> {
+ static bool Call(bool left, bool right) { return left && right; }
+};
+
+template <typename T>
+struct BitBlockAndNot {
+ static T Call(T left, T right) { return left & ~right; }
+};
+
+template <>
+struct BitBlockAndNot<bool> {
+ static bool Call(bool left, bool right) { return left && !right; }
+};
+
+template <typename T>
+struct BitBlockOr {
+ static T Call(T left, T right) { return left | right; }
+};
+
+template <>
+struct BitBlockOr<bool> {
+ static bool Call(bool left, bool right) { return left || right; }
+};
+
+template <typename T>
+struct BitBlockOrNot {
+ static T Call(T left, T right) { return left | ~right; }
+};
+
+template <>
+struct BitBlockOrNot<bool> {
+ static bool Call(bool left, bool right) { return left || !right; }
+};
+
+} // namespace detail
+
+/// \brief Return value from bit block counters: the total number of bits and
+/// the number of set bits.
+struct BitBlockCount {
+ int16_t length;
+ int16_t popcount;
+
+ bool NoneSet() const { return this->popcount == 0; }
+ bool AllSet() const { return this->length == this->popcount; }
+};
+
+/// \brief A class that scans through a true/false bitmap to compute popcounts
+/// 64 or 256 bits at a time. This is used to accelerate processing of
+/// mostly-not-null array data.
+class ARROW_EXPORT BitBlockCounter {
+ public:
+ BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+ : bitmap_(util::MakeNonNull(bitmap) + start_offset / 8),
+ bits_remaining_(length),
+ offset_(start_offset % 8) {}
+
+ /// \brief The bit size of each word run
+ static constexpr int64_t kWordBits = 64;
+
+ /// \brief The bit size of four words run
+ static constexpr int64_t kFourWordsBits = kWordBits * 4;
+
+ /// \brief Return the next run of available bits, usually 256. The returned
+ /// pair contains the size of run and the number of true values. The last
+ /// block will have a length less than 256 if the bitmap length is not a
+ /// multiple of 256, and will return 0-length blocks in subsequent
+ /// invocations.
+ BitBlockCount NextFourWords() {
+ using detail::LoadWord;
+ using detail::ShiftWord;
+
+ if (!bits_remaining_) {
+ return {0, 0};
+ }
+ int64_t total_popcount = 0;
+ if (offset_ == 0) {
+ if (bits_remaining_ < kFourWordsBits) {
+ return GetBlockSlow(kFourWordsBits);
+ }
+ total_popcount += BitUtil::PopCount(LoadWord(bitmap_));
+ total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 8));
+ total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 16));
+ total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 24));
+ } else {
+ // When the offset is > 0, we need there to be a word beyond the last
+ // aligned word in the bitmap for the bit shifting logic.
+ if (bits_remaining_ < 5 * kFourWordsBits - offset_) {
+ return GetBlockSlow(kFourWordsBits);
+ }
+ auto current = LoadWord(bitmap_);
+ auto next = LoadWord(bitmap_ + 8);
+ total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+ current = next;
+ next = LoadWord(bitmap_ + 16);
+ total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+ current = next;
+ next = LoadWord(bitmap_ + 24);
+ total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+ current = next;
+ next = LoadWord(bitmap_ + 32);
+ total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
+ }
+ bitmap_ += BitUtil::BytesForBits(kFourWordsBits);
+ bits_remaining_ -= kFourWordsBits;
+ return {256, static_cast<int16_t>(total_popcount)};
+ }
+
+ /// \brief Return the next run of available bits, usually 64. The returned
+ /// pair contains the size of run and the number of true values. The last
+ /// block will have a length less than 64 if the bitmap length is not a
+ /// multiple of 64, and will return 0-length blocks in subsequent
+ /// invocations.
+ BitBlockCount NextWord() {
+ using detail::LoadWord;
+ using detail::ShiftWord;
+
+ if (!bits_remaining_) {
+ return {0, 0};
+ }
+ int64_t popcount = 0;
+ if (offset_ == 0) {
+ if (bits_remaining_ < kWordBits) {
+ return GetBlockSlow(kWordBits);
+ }
+ popcount = BitUtil::PopCount(LoadWord(bitmap_));
+ } else {
+ // When the offset is > 0, we need there to be a word beyond the last
+ // aligned word in the bitmap for the bit shifting logic.
+ if (bits_remaining_ < 2 * kWordBits - offset_) {
+ return GetBlockSlow(kWordBits);
+ }
+ popcount =
+ BitUtil::PopCount(ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_));
+ }
+ bitmap_ += kWordBits / 8;
+ bits_remaining_ -= kWordBits;
+ return {64, static_cast<int16_t>(popcount)};
+ }
+
+ private:
+ /// \brief Return block with the requested size when doing word-wise
+ /// computation is not possible due to inadequate bits remaining.
+ BitBlockCount GetBlockSlow(int64_t block_size) noexcept;
+
+ const uint8_t* bitmap_;
+ int64_t bits_remaining_;
+ int64_t offset_;
+};
+
+/// \brief A tool to iterate through a possibly non-existent validity bitmap,
+/// to allow us to write one code path for both the with-nulls and no-nulls
+/// cases without giving up a lot of performance.
+class ARROW_EXPORT OptionalBitBlockCounter {
+ public:
+ // validity_bitmap may be NULLPTR
+ OptionalBitBlockCounter(const uint8_t* validity_bitmap, int64_t offset, int64_t length);
+
+ // validity_bitmap may be null
+ OptionalBitBlockCounter(const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset,
+ int64_t length);
+
+ /// Return block count for next word when the bitmap is available otherwise
+ /// return a block with length up to INT16_MAX when there is no validity
+ /// bitmap (so all the referenced values are not null).
+ BitBlockCount NextBlock() {
+ static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+ if (has_bitmap_) {
+ BitBlockCount block = counter_.NextWord();
+ position_ += block.length;
+ return block;
+ } else {
+ int16_t block_size =
+ static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+ position_ += block_size;
+ // All values are non-null
+ return {block_size, block_size};
+ }
+ }
+
+ // Like NextBlock, but returns a word-sized block even when there is no
+ // validity bitmap
+ BitBlockCount NextWord() {
+ static constexpr int64_t kWordSize = 64;
+ if (has_bitmap_) {
+ BitBlockCount block = counter_.NextWord();
+ position_ += block.length;
+ return block;
+ } else {
+ int16_t block_size = static_cast<int16_t>(std::min(kWordSize, length_ - position_));
+ position_ += block_size;
+ // All values are non-null
+ return {block_size, block_size};
+ }
+ }
+
+ private:
+ const bool has_bitmap_;
+ int64_t position_;
+ int64_t length_;
+ BitBlockCounter counter_;
+};
+
+/// \brief A class that computes popcounts on the result of bitwise operations
+/// between two bitmaps, 64 bits at a time. A 64-bit word is loaded from each
+/// bitmap, then the popcount is computed on e.g. the bitwise-and of the two
+/// words.
+class ARROW_EXPORT BinaryBitBlockCounter {
+ public:
+ BinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+ const uint8_t* right_bitmap, int64_t right_offset, int64_t length)
+ : left_bitmap_(util::MakeNonNull(left_bitmap) + left_offset / 8),
+ left_offset_(left_offset % 8),
+ right_bitmap_(util::MakeNonNull(right_bitmap) + right_offset / 8),
+ right_offset_(right_offset % 8),
+ bits_remaining_(length) {}
+
+ /// \brief Return the popcount of the bitwise-and of the next run of
+ /// available bits, up to 64. The returned pair contains the size of run and
+ /// the number of true values. The last block will have a length less than 64
+ /// if the bitmap length is not a multiple of 64, and will return 0-length
+ /// blocks in subsequent invocations.
+ BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); }
+
+ /// \brief Computes "x & ~y" block for each available run of bits.
+ BitBlockCount NextAndNotWord() { return NextWord<detail::BitBlockAndNot>(); }
+
+ /// \brief Computes "x | y" block for each available run of bits.
+ BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); }
+
+ /// \brief Computes "x | ~y" block for each available run of bits.
+ BitBlockCount NextOrNotWord() { return NextWord<detail::BitBlockOrNot>(); }
+
+ private:
+ template <template <typename T> class Op>
+ BitBlockCount NextWord() {
+ using detail::LoadWord;
+ using detail::ShiftWord;
+
+ if (!bits_remaining_) {
+ return {0, 0};
+ }
+ // When the offset is > 0, we need there to be a word beyond the last aligned
+ // word in the bitmap for the bit shifting logic.
+ constexpr int64_t kWordBits = BitBlockCounter::kWordBits;
+ const int64_t bits_required_to_use_words =
+ std::max(left_offset_ == 0 ? 64 : 64 + (64 - left_offset_),
+ right_offset_ == 0 ? 64 : 64 + (64 - right_offset_));
+ if (bits_remaining_ < bits_required_to_use_words) {
+ const int16_t run_length =
+ static_cast<int16_t>(std::min(bits_remaining_, kWordBits));
+ int16_t popcount = 0;
+ for (int64_t i = 0; i < run_length; ++i) {
+ if (Op<bool>::Call(BitUtil::GetBit(left_bitmap_, left_offset_ + i),
+ BitUtil::GetBit(right_bitmap_, right_offset_ + i))) {
+ ++popcount;
+ }
+ }
+ // This code path should trigger _at most_ 2 times. In the "two times"
+ // case, the first time the run length will be a multiple of 8.
+ left_bitmap_ += run_length / 8;
+ right_bitmap_ += run_length / 8;
+ bits_remaining_ -= run_length;
+ return {run_length, popcount};
+ }
+
+ int64_t popcount = 0;
+ if (left_offset_ == 0 && right_offset_ == 0) {
+ popcount = BitUtil::PopCount(
+ Op<uint64_t>::Call(LoadWord(left_bitmap_), LoadWord(right_bitmap_)));
+ } else {
+ auto left_word =
+ ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), left_offset_);
+ auto right_word =
+ ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), right_offset_);
+ popcount = BitUtil::PopCount(Op<uint64_t>::Call(left_word, right_word));
+ }
+ left_bitmap_ += kWordBits / 8;
+ right_bitmap_ += kWordBits / 8;
+ bits_remaining_ -= kWordBits;
+ return {64, static_cast<int16_t>(popcount)};
+ }
+
+ const uint8_t* left_bitmap_;
+ int64_t left_offset_;
+ const uint8_t* right_bitmap_;
+ int64_t right_offset_;
+ int64_t bits_remaining_;
+};
+
+class ARROW_EXPORT OptionalBinaryBitBlockCounter {
+ public:
+ // Any bitmap may be NULLPTR
+ OptionalBinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+ const uint8_t* right_bitmap, int64_t right_offset,
+ int64_t length);
+
+ // Any bitmap may be null
+ OptionalBinaryBitBlockCounter(const std::shared_ptr<Buffer>& left_bitmap,
+ int64_t left_offset,
+ const std::shared_ptr<Buffer>& right_bitmap,
+ int64_t right_offset, int64_t length);
+
+ BitBlockCount NextAndBlock() {
+ static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+ switch (has_bitmap_) {
+ case HasBitmap::BOTH: {
+ BitBlockCount block = binary_counter_.NextAndWord();
+ position_ += block.length;
+ return block;
+ }
+ case HasBitmap::ONE: {
+ BitBlockCount block = unary_counter_.NextWord();
+ position_ += block.length;
+ return block;
+ }
+ case HasBitmap::NONE:
+ default: {
+ const int16_t block_size =
+ static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+ position_ += block_size;
+ // All values are non-null
+ return {block_size, block_size};
+ }
+ }
+ }
+
+ BitBlockCount NextOrNotBlock() {
+ static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+ switch (has_bitmap_) {
+ case HasBitmap::BOTH: {
+ BitBlockCount block = binary_counter_.NextOrNotWord();
+ position_ += block.length;
+ return block;
+ }
+ case HasBitmap::ONE: {
+ BitBlockCount block = unary_counter_.NextWord();
+ position_ += block.length;
+ return block;
+ }
+ case HasBitmap::NONE:
+ default: {
+ const int16_t block_size =
+ static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+ position_ += block_size;
+ // All values are non-null
+ return {block_size, block_size};
+ }
+ }
+ }
+
+ private:
+ enum class HasBitmap : int { BOTH, ONE, NONE };
+
+ const HasBitmap has_bitmap_;
+ int64_t position_;
+ int64_t length_;
+ BitBlockCounter unary_counter_;
+ BinaryBitBlockCounter binary_counter_;
+
+ static HasBitmap HasBitmapFromBitmaps(bool has_left, bool has_right) {
+ switch (static_cast<int>(has_left) + static_cast<int>(has_right)) {
+ case 0:
+ return HasBitmap::NONE;
+ case 1:
+ return HasBitmap::ONE;
+ default: // 2
+ return HasBitmap::BOTH;
+ }
+ }
+};
+
+// Functional-style bit block visitors.
+
+template <typename VisitNotNull, typename VisitNull>
+static Status VisitBitBlocks(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset,
+ int64_t length, VisitNotNull&& visit_not_null,
+ VisitNull&& visit_null) {
+ const uint8_t* bitmap = NULLPTR;
+ if (bitmap_buf != NULLPTR) {
+ bitmap = bitmap_buf->data();
+ }
+ internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
+ int64_t position = 0;
+ while (position < length) {
+ internal::BitBlockCount block = bit_counter.NextBlock();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ ARROW_RETURN_NOT_OK(visit_not_null(position));
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ ARROW_RETURN_NOT_OK(visit_null());
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ if (BitUtil::GetBit(bitmap, offset + position)) {
+ ARROW_RETURN_NOT_OK(visit_not_null(position));
+ } else {
+ ARROW_RETURN_NOT_OK(visit_null());
+ }
+ }
+ }
+ }
+ return Status::OK();
+}
+
+template <typename VisitNotNull, typename VisitNull>
+static void VisitBitBlocksVoid(const std::shared_ptr<Buffer>& bitmap_buf, int64_t offset,
+ int64_t length, VisitNotNull&& visit_not_null,
+ VisitNull&& visit_null) {
+ const uint8_t* bitmap = NULLPTR;
+ if (bitmap_buf != NULLPTR) {
+ bitmap = bitmap_buf->data();
+ }
+ internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
+ int64_t position = 0;
+ while (position < length) {
+ internal::BitBlockCount block = bit_counter.NextBlock();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_not_null(position);
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_null();
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ if (BitUtil::GetBit(bitmap, offset + position)) {
+ visit_not_null(position);
+ } else {
+ visit_null();
+ }
+ }
+ }
+ }
+}
+
+template <typename VisitNotNull, typename VisitNull>
+static void VisitTwoBitBlocksVoid(const std::shared_ptr<Buffer>& left_bitmap_buf,
+ int64_t left_offset,
+ const std::shared_ptr<Buffer>& right_bitmap_buf,
+ int64_t right_offset, int64_t length,
+ VisitNotNull&& visit_not_null, VisitNull&& visit_null) {
+ if (left_bitmap_buf == NULLPTR || right_bitmap_buf == NULLPTR) {
+ // At most one bitmap is present
+ if (left_bitmap_buf == NULLPTR) {
+ return VisitBitBlocksVoid(right_bitmap_buf, right_offset, length,
+ std::forward<VisitNotNull>(visit_not_null),
+ std::forward<VisitNull>(visit_null));
+ } else {
+ return VisitBitBlocksVoid(left_bitmap_buf, left_offset, length,
+ std::forward<VisitNotNull>(visit_not_null),
+ std::forward<VisitNull>(visit_null));
+ }
+ }
+ // Both bitmaps are present
+ const uint8_t* left_bitmap = left_bitmap_buf->data();
+ const uint8_t* right_bitmap = right_bitmap_buf->data();
+ BinaryBitBlockCounter bit_counter(left_bitmap, left_offset, right_bitmap, right_offset,
+ length);
+ int64_t position = 0;
+ while (position < length) {
+ BitBlockCount block = bit_counter.NextAndWord();
+ if (block.AllSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_not_null(position);
+ }
+ } else if (block.NoneSet()) {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ visit_null();
+ }
+ } else {
+ for (int64_t i = 0; i < block.length; ++i, ++position) {
+ if (BitUtil::GetBit(left_bitmap, left_offset + position) &&
+ BitUtil::GetBit(right_bitmap, right_offset + position)) {
+ visit_not_null(position);
+ } else {
+ visit_null();
+ }
+ }
+ }
+ }
+}
+
+} // namespace internal
+} // namespace arrow