]>
git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/util/bitmap_writer.h
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
23 #include "arrow/util/bit_util.h"
24 #include "arrow/util/endian.h"
25 #include "arrow/util/macros.h"
31 // A sequential bitwise writer that preserves surrounding bit values.
34 BitmapWriter(uint8_t* bitmap
, int64_t start_offset
, int64_t length
)
35 : bitmap_(bitmap
), position_(0), length_(length
) {
36 byte_offset_
= start_offset
/ 8;
37 bit_mask_
= BitUtil::kBitmask
[start_offset
% 8];
39 current_byte_
= bitmap
[byte_offset_
];
45 void Set() { current_byte_
|= bit_mask_
; }
47 void Clear() { current_byte_
&= bit_mask_
^ 0xFF; }
50 bit_mask_
= static_cast<uint8_t>(bit_mask_
<< 1);
53 // Finished this byte, need advancing
55 bitmap_
[byte_offset_
++] = current_byte_
;
56 if (ARROW_PREDICT_TRUE(position_
< length_
)) {
57 current_byte_
= bitmap_
[byte_offset_
];
63 // Store current byte if we didn't went past bitmap storage
64 if (length_
> 0 && (bit_mask_
!= 0x01 || position_
< length_
)) {
65 bitmap_
[byte_offset_
] = current_byte_
;
69 int64_t position() const { return position_
; }
76 uint8_t current_byte_
;
81 class FirstTimeBitmapWriter
{
82 // Like BitmapWriter, but any bit values *following* the bits written
83 // might be clobbered. It is hence faster than BitmapWriter, and can
84 // also avoid false positives with Valgrind.
87 FirstTimeBitmapWriter(uint8_t* bitmap
, int64_t start_offset
, int64_t length
)
88 : bitmap_(bitmap
), position_(0), length_(length
) {
90 byte_offset_
= start_offset
/ 8;
91 bit_mask_
= BitUtil::kBitmask
[start_offset
% 8];
93 current_byte_
= bitmap
[byte_offset_
] & BitUtil::kPrecedingBitmask
[start_offset
% 8];
99 /// Appends number_of_bits from word to valid_bits and valid_bits_offset.
101 /// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed
102 /// to be unset (i.e. 0).
103 /// \param[in] number_of_bits The number of bits to append from word.
104 void AppendWord(uint64_t word
, int64_t number_of_bits
) {
105 if (ARROW_PREDICT_FALSE(number_of_bits
== 0)) {
109 // Location that the first byte needs to be written to.
110 uint8_t* append_position
= bitmap_
+ byte_offset_
;
112 // Update state variables except for current_byte_ here.
113 position_
+= number_of_bits
;
114 int64_t bit_offset
= BitUtil::CountTrailingZeros(static_cast<uint32_t>(bit_mask_
));
115 bit_mask_
= BitUtil::kBitmask
[(bit_offset
+ number_of_bits
) % 8];
116 byte_offset_
+= (bit_offset
+ number_of_bits
) / 8;
118 if (bit_offset
!= 0) {
119 // We are in the middle of the byte. This code updates the byte and shifts
120 // bits appropriately within word so it can be memcpy'd below.
121 int64_t bits_to_carry
= 8 - bit_offset
;
122 // Carry over bits from word to current_byte_. We assume any extra bits in word
123 // unset so no additional accounting is needed for when number_of_bits <
125 current_byte_
|= (word
& BitUtil::kPrecedingBitmask
[bits_to_carry
]) << bit_offset
;
126 // Check if everything is transfered into current_byte_.
127 if (ARROW_PREDICT_FALSE(number_of_bits
< bits_to_carry
)) {
130 *append_position
= current_byte_
;
132 // Move the carry bits off of word.
133 word
= word
>> bits_to_carry
;
134 number_of_bits
-= bits_to_carry
;
136 word
= BitUtil::ToLittleEndian(word
);
137 int64_t bytes_for_word
= ::arrow::BitUtil::BytesForBits(number_of_bits
);
138 std::memcpy(append_position
, &word
, bytes_for_word
);
139 // At this point, the previous current_byte_ has been written to bitmap_.
140 // The new current_byte_ is either the last relevant byte in 'word'
141 // or cleared if the new position is byte aligned (i.e. a fresh byte).
142 if (bit_mask_
== 0x1) {
145 current_byte_
= *(append_position
+ bytes_for_word
- 1);
149 void Set() { current_byte_
|= bit_mask_
; }
154 bit_mask_
= static_cast<uint8_t>(bit_mask_
<< 1);
156 if (bit_mask_
== 0) {
157 // Finished this byte, need advancing
159 bitmap_
[byte_offset_
++] = current_byte_
;
165 // Store current byte if we didn't went go bitmap storage
166 if (length_
> 0 && (bit_mask_
!= 0x01 || position_
< length_
)) {
167 bitmap_
[byte_offset_
] = current_byte_
;
171 int64_t position() const { return position_
; }
178 uint8_t current_byte_
;
180 int64_t byte_offset_
;
183 template <typename Word
, bool may_have_byte_offset
= true>
184 class BitmapWordWriter
{
186 BitmapWordWriter() = default;
187 BitmapWordWriter(uint8_t* bitmap
, int64_t offset
, int64_t length
)
188 : offset_(static_cast<int64_t>(may_have_byte_offset
) * (offset
% 8)),
189 bitmap_(bitmap
+ offset
/ 8),
190 bitmap_end_(bitmap_
+ BitUtil::BytesForBits(offset_
+ length
)),
191 mask_((1U << offset_
) - 1) {
193 if (length
>= static_cast<int>(sizeof(Word
) * 8)) {
194 current_data
.word_
= load
<Word
>(bitmap_
);
195 } else if (length
> 0) {
196 current_data
.epi
.byte_
= load
<uint8_t>(bitmap_
);
201 void PutNextWord(Word word
) {
202 if (may_have_byte_offset
&& offset_
) {
203 // split one word into two adjacent words, don't touch unused bits
204 // |<------ word ----->|
205 // +-----+-------------+
207 // +-----+-------------+
210 // +-------------+-----+-------------+-----+
211 // | --- | A | B | --- |
212 // +-------------+-----+-------------+-----+
213 // |<------ next ----->|<---- current ---->|
214 word
= (word
<< offset_
) | (word
>> (sizeof(Word
) * 8 - offset_
));
215 Word next_word
= load
<Word
>(bitmap_
+ sizeof(Word
));
216 current_data
.word_
= (current_data
.word_
& mask_
) | (word
& ~mask_
);
217 next_word
= (next_word
& ~mask_
) | (word
& mask_
);
218 store
<Word
>(bitmap_
, current_data
.word_
);
219 store
<Word
>(bitmap_
+ sizeof(Word
), next_word
);
220 current_data
.word_
= next_word
;
222 store
<Word
>(bitmap_
, word
);
224 bitmap_
+= sizeof(Word
);
227 void PutNextTrailingByte(uint8_t byte
, int valid_bits
) {
228 if (valid_bits
== 8) {
229 if (may_have_byte_offset
&& offset_
) {
230 byte
= (byte
<< offset_
) | (byte
>> (8 - offset_
));
231 uint8_t next_byte
= load
<uint8_t>(bitmap_
+ 1);
232 current_data
.epi
.byte_
= (current_data
.epi
.byte_
& mask_
) | (byte
& ~mask_
);
233 next_byte
= (next_byte
& ~mask_
) | (byte
& mask_
);
234 store
<uint8_t>(bitmap_
, current_data
.epi
.byte_
);
235 store
<uint8_t>(bitmap_
+ 1, next_byte
);
236 current_data
.epi
.byte_
= next_byte
;
238 store
<uint8_t>(bitmap_
, byte
);
242 assert(valid_bits
> 0);
243 assert(valid_bits
< 8);
244 assert(bitmap_
+ BitUtil::BytesForBits(offset_
+ valid_bits
) <= bitmap_end_
);
245 internal::BitmapWriter
writer(bitmap_
, offset_
, valid_bits
);
246 for (int i
= 0; i
< valid_bits
; ++i
) {
247 (byte
& 0x01) ? writer
.Set() : writer
.Clear();
259 const uint8_t* bitmap_end_
;
264 #if ARROW_LITTLE_ENDIAN == 0
265 uint8_t padding_bytes_
[sizeof(Word
) - 1];
271 template <typename DType
>
272 DType
load(const uint8_t* bitmap
) {
273 assert(bitmap
+ sizeof(DType
) <= bitmap_end_
);
274 return BitUtil::ToLittleEndian(util::SafeLoadAs
<DType
>(bitmap
));
277 template <typename DType
>
278 void store(uint8_t* bitmap
, DType data
) {
279 assert(bitmap
+ sizeof(DType
) <= bitmap_end_
);
280 util::SafeStore(bitmap
, BitUtil::FromLittleEndian(data
));
284 } // namespace internal