2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org
.apache
.arrow
.vector
;
20 import static io
.netty
.util
.internal
.PlatformDependent
.getByte
;
21 import static io
.netty
.util
.internal
.PlatformDependent
.getInt
;
22 import static io
.netty
.util
.internal
.PlatformDependent
.getLong
;
23 import static org
.apache
.arrow
.memory
.util
.LargeMemoryUtil
.checkedCastToInt
;
25 import org
.apache
.arrow
.memory
.ArrowBuf
;
26 import org
.apache
.arrow
.memory
.BoundsChecking
;
27 import org
.apache
.arrow
.memory
.BufferAllocator
;
28 import org
.apache
.arrow
.vector
.ipc
.message
.ArrowFieldNode
;
29 import org
.apache
.arrow
.vector
.util
.DataSizeRoundingUtil
;
31 import io
.netty
.util
.internal
.PlatformDependent
;
34 * Helper class for performing generic operations on a bit vector buffer.
35 * External use of this class is not recommended.
37 public class BitVectorHelper
{
39 private BitVectorHelper() {}
42 * Get the index of byte corresponding to bit index in validity buffer.
44 public static long byteIndex(long absoluteBitIndex
) {
45 return absoluteBitIndex
>> 3;
49 * Get the relative index of bit within the byte in validity buffer.
51 public static int bitIndex(long absoluteBitIndex
) {
52 return checkedCastToInt(absoluteBitIndex
& 7);
56 * Get the index of byte corresponding to bit index in validity buffer.
58 public static int byteIndex(int absoluteBitIndex
) {
59 return absoluteBitIndex
>> 3;
63 * Get the relative index of bit within the byte in validity buffer.
65 public static int bitIndex(int absoluteBitIndex
) {
66 return absoluteBitIndex
& 7;
70 * Set the bit at provided index to 1.
72 * @param validityBuffer validity buffer of the vector
73 * @param index index to be set
75 public static void setBit(ArrowBuf validityBuffer
, long index
) {
76 // it can be observed that some logic is duplicate of the logic in setValidityBit.
77 // this is because JIT cannot always remove the if branch in setValidityBit,
78 // so we give a dedicated implementation for setting bits.
79 final long byteIndex
= byteIndex(index
);
80 final int bitIndex
= bitIndex(index
);
82 // the byte is promoted to an int, because according to Java specification,
83 // bytes will be promoted to ints automatically, upon expression evaluation.
84 // by promoting it manually, we avoid the unnecessary conversions.
85 int currentByte
= validityBuffer
.getByte(byteIndex
);
86 final int bitMask
= 1 << bitIndex
;
87 currentByte
|= bitMask
;
88 validityBuffer
.setByte(byteIndex
, currentByte
);
92 * Set the bit at provided index to 0.
94 * @param validityBuffer validity buffer of the vector
95 * @param index index to be set
97 public static void unsetBit(ArrowBuf validityBuffer
, int index
) {
98 // it can be observed that some logic is duplicate of the logic in setValidityBit.
99 // this is because JIT cannot always remove the if branch in setValidityBit,
100 // so we give a dedicated implementation for unsetting bits.
101 final int byteIndex
= byteIndex(index
);
102 final int bitIndex
= bitIndex(index
);
104 // the byte is promoted to an int, because according to Java specification,
105 // bytes will be promoted to ints automatically, upon expression evaluation.
106 // by promoting it manually, we avoid the unnecessary conversions.
107 int currentByte
= validityBuffer
.getByte(byteIndex
);
108 final int bitMask
= 1 << bitIndex
;
109 currentByte
&= ~bitMask
;
110 validityBuffer
.setByte(byteIndex
, currentByte
);
114 * Set the bit at a given index to provided value (1 or 0).
116 * @param validityBuffer validity buffer of the vector
117 * @param index index to be set
118 * @param value value to set
120 public static void setValidityBit(ArrowBuf validityBuffer
, int index
, int value
) {
121 final int byteIndex
= byteIndex(index
);
122 final int bitIndex
= bitIndex(index
);
124 // the byte is promoted to an int, because according to Java specification,
125 // bytes will be promoted to ints automatically, upon expression evaluation.
126 // by promoting it manually, we avoid the unnecessary conversions.
127 int currentByte
= validityBuffer
.getByte(byteIndex
);
128 final int bitMask
= 1 << bitIndex
;
130 currentByte
|= bitMask
;
132 currentByte
&= ~bitMask
;
134 validityBuffer
.setByte(byteIndex
, currentByte
);
138 * Set the bit at a given index to provided value (1 or 0). Internally
139 * takes care of allocating the buffer if the caller didn't do so.
141 * @param validityBuffer validity buffer of the vector
142 * @param allocator allocator for the buffer
143 * @param valueCount number of values to allocate/set
144 * @param index index to be set
145 * @param value value to set
148 public static ArrowBuf
setValidityBit(ArrowBuf validityBuffer
, BufferAllocator allocator
,
149 int valueCount
, int index
, int value
) {
150 if (validityBuffer
== null) {
151 validityBuffer
= allocator
.buffer(getValidityBufferSize(valueCount
));
153 setValidityBit(validityBuffer
, index
, value
);
154 if (index
== (valueCount
- 1)) {
155 validityBuffer
.writerIndex(getValidityBufferSize(valueCount
));
158 return validityBuffer
;
162 * Check if a bit at a given index is set or not.
164 * @param buffer buffer to check
165 * @param index index of the buffer
166 * @return 1 if bit is set, 0 otherwise.
168 public static int get(final ArrowBuf buffer
, int index
) {
169 final int byteIndex
= index
>> 3;
170 final byte b
= buffer
.getByte(byteIndex
);
171 final int bitIndex
= index
& 7;
172 return (b
>> bitIndex
) & 0x01;
176 * Compute the size of validity buffer required to manage a given number
177 * of elements in a vector.
179 * @param valueCount number of elements in the vector
180 * @return buffer size
182 public static int getValidityBufferSize(int valueCount
) {
183 return DataSizeRoundingUtil
.divideBy8Ceil(valueCount
);
187 * Given a validity buffer, find the number of bits that are not set.
188 * This is used to compute the number of null elements in a nullable vector.
190 * @param validityBuffer validity buffer of the vector
191 * @param valueCount number of values in the vector
192 * @return number of bits not set.
194 public static int getNullCount(final ArrowBuf validityBuffer
, final int valueCount
) {
195 if (valueCount
== 0) {
199 final int sizeInBytes
= getValidityBufferSize(valueCount
);
200 // If value count is not a multiple of 8, then calculate number of used bits in the last byte
201 final int remainder
= valueCount
% 8;
202 final int fullBytesCount
= remainder
== 0 ? sizeInBytes
: sizeInBytes
- 1;
205 while (index
+ 8 <= fullBytesCount
) {
206 long longValue
= validityBuffer
.getLong(index
);
207 count
+= Long
.bitCount(longValue
);
211 if (index
+ 4 <= fullBytesCount
) {
212 int intValue
= validityBuffer
.getInt(index
);
213 count
+= Integer
.bitCount(intValue
);
217 while (index
< fullBytesCount
) {
218 byte byteValue
= validityBuffer
.getByte(index
);
219 count
+= Integer
.bitCount(byteValue
& 0xFF);
223 // handling with the last bits
224 if (remainder
!= 0) {
225 byte byteValue
= validityBuffer
.getByte(sizeInBytes
- 1);
227 // making the remaining bits all 1s if it is not fully filled
228 byte mask
= (byte) (0xFF << remainder
);
229 byteValue
= (byte) (byteValue
| mask
);
230 count
+= Integer
.bitCount(byteValue
& 0xFF);
233 return 8 * sizeInBytes
- count
;
237 * Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter.
238 * @param validityBuffer the validity buffer.
239 * @param valueCount the bit count.
240 * @param checkOneBits if set to true, the method checks if all bits are equal to 1;
241 * otherwise, it checks if all bits are equal to 0.
242 * @return true if all bits are 0 or 1 according to the parameter, and false otherwise.
244 public static boolean checkAllBitsEqualTo(
245 final ArrowBuf validityBuffer
, final int valueCount
, final boolean checkOneBits
) {
246 if (valueCount
== 0) {
249 final int sizeInBytes
= getValidityBufferSize(valueCount
);
252 validityBuffer
.checkBytes(0, sizeInBytes
);
254 // If value count is not a multiple of 8, then calculate number of used bits in the last byte
255 final int remainder
= valueCount
% 8;
256 final int fullBytesCount
= remainder
== 0 ? sizeInBytes
: sizeInBytes
- 1;
258 // the integer number to compare against
259 final int intToCompare
= checkOneBits ?
-1 : 0;
262 while (index
+ 8 <= fullBytesCount
) {
263 long longValue
= getLong(validityBuffer
.memoryAddress() + index
);
264 if (longValue
!= (long) intToCompare
) {
270 if (index
+ 4 <= fullBytesCount
) {
271 int intValue
= getInt(validityBuffer
.memoryAddress() + index
);
272 if (intValue
!= intToCompare
) {
278 while (index
< fullBytesCount
) {
279 byte byteValue
= getByte(validityBuffer
.memoryAddress() + index
);
280 if (byteValue
!= (byte) intToCompare
) {
286 // handling with the last bits
287 if (remainder
!= 0) {
288 byte byteValue
= getByte(validityBuffer
.memoryAddress() + sizeInBytes
- 1);
289 byte mask
= (byte) ((1 << remainder
) - 1);
290 byteValue
= (byte) (byteValue
& mask
);
292 if ((mask
& byteValue
) != mask
) {
296 if (byteValue
!= (byte) 0) {
304 /** Returns the byte at index from data right-shifted by offset. */
305 public static byte getBitsFromCurrentByte(final ArrowBuf data
, final int index
, final int offset
) {
306 return (byte) ((data
.getByte(index
) & 0xFF) >>> offset
);
310 * Returns the byte at <code>index</code> from left-shifted by (8 - <code>offset</code>).
312 public static byte getBitsFromNextByte(ArrowBuf data
, int index
, int offset
) {
313 return (byte) ((data
.getByte(index
) << (8 - offset
)));
317 * Returns a new buffer if the source validity buffer is either all null or all
318 * not-null, otherwise returns a buffer pointing to the same memory as source.
320 * @param fieldNode The fieldNode containing the null count
321 * @param sourceValidityBuffer The source validity buffer that will have its
322 * position copied if there is a mix of null and non-null values
323 * @param allocator The allocator to use for creating a new buffer if necessary.
324 * @return A new buffer that is either allocated or points to the same memory as sourceValidityBuffer.
326 public static ArrowBuf
loadValidityBuffer(final ArrowFieldNode fieldNode
,
327 final ArrowBuf sourceValidityBuffer
,
328 final BufferAllocator allocator
) {
329 final int valueCount
= fieldNode
.getLength();
330 ArrowBuf newBuffer
= null;
331 /* either all NULLs or all non-NULLs */
332 if (fieldNode
.getNullCount() == 0 || fieldNode
.getNullCount() == valueCount
) {
333 newBuffer
= allocator
.buffer(getValidityBufferSize(valueCount
));
334 newBuffer
.setZero(0, newBuffer
.capacity());
335 if (fieldNode
.getNullCount() != 0) {
340 int fullBytesCount
= valueCount
/ 8;
341 newBuffer
.setOne(0, fullBytesCount
);
342 int remainder
= valueCount
% 8;
344 byte bitMask
= (byte) (0xFFL
>>> ((8 - remainder
) & 7));
345 newBuffer
.setByte(fullBytesCount
, bitMask
);
348 /* mixed byte pattern -- create another ArrowBuf associated with the
351 newBuffer
= sourceValidityBuffer
.getReferenceManager().retain(sourceValidityBuffer
, allocator
);
358 * Set the byte of the given index in the data buffer by applying a bit mask to
359 * the current byte at that index.
361 * @param data buffer to set
362 * @param byteIndex byteIndex within the buffer
363 * @param bitMask bit mask to be set
365 static void setBitMaskedByte(ArrowBuf data
, int byteIndex
, byte bitMask
) {
366 byte currentByte
= data
.getByte(byteIndex
);
367 currentByte
|= bitMask
;
368 data
.setByte(byteIndex
, currentByte
);
372 * Concat two validity buffers.
373 * @param input1 the first validity buffer.
374 * @param numBits1 the number of bits in the first validity buffer.
375 * @param input2 the second validity buffer.
376 * @param numBits2 the number of bits in the second validity buffer.
377 * @param output the output validity buffer. It can be the same one as the first input.
378 * The caller must make sure the output buffer has enough capacity.
380 public static void concatBits(ArrowBuf input1
, int numBits1
, ArrowBuf input2
, int numBits2
, ArrowBuf output
) {
381 int numBytes1
= DataSizeRoundingUtil
.divideBy8Ceil(numBits1
);
382 int numBytes2
= DataSizeRoundingUtil
.divideBy8Ceil(numBits2
);
383 int numBytesOut
= DataSizeRoundingUtil
.divideBy8Ceil(numBits1
+ numBits2
);
385 if (BoundsChecking
.BOUNDS_CHECKING_ENABLED
) {
386 output
.checkBytes(0, numBytesOut
);
389 // copy the first bit set
390 if (input1
!= output
) {
391 PlatformDependent
.copyMemory(input1
.memoryAddress(), output
.memoryAddress(), numBytes1
);
394 if (bitIndex(numBits1
) == 0) {
395 // The number of bits for the first bit set is a multiple of 8, so the boundary is at byte boundary.
396 // For this case, we have a shortcut to copy all bytes from the second set after the byte boundary.
397 PlatformDependent
.copyMemory(input2
.memoryAddress(), output
.memoryAddress() + numBytes1
, numBytes2
);
401 // the number of bits to fill a full byte after the first input is processed
402 int numBitsToFill
= 8 - bitIndex(numBits1
);
404 // mask to clear high bits
405 int mask
= (1 << (8 - numBitsToFill
)) - 1;
407 int numFullBytes
= numBits2
/ 8;
409 int prevByte
= output
.getByte(numBytes1
- 1) & mask
;
410 for (int i
= 0; i
< numFullBytes
; i
++) {
411 int curByte
= input2
.getByte(i
) & 0xff;
413 // first fill the bits to a full byte
414 int byteToFill
= (curByte
<< (8 - numBitsToFill
)) & 0xff;
415 output
.setByte(numBytes1
+ i
- 1, byteToFill
| prevByte
);
417 // fill remaining bits in the current byte
418 // note that it is also the previous byte for the next iteration
419 prevByte
= curByte
>>> numBitsToFill
;
422 int lastOutputByte
= prevByte
;
424 // the number of extra bits for the second input, relative to full bytes
425 int numTrailingBits
= bitIndex(numBits2
);
427 if (numTrailingBits
== 0) {
428 output
.setByte(numBytes1
+ numFullBytes
- 1, lastOutputByte
);
432 // process remaining bits from input2
433 int remByte
= input2
.getByte(numBytes2
- 1) & 0xff;
435 int byteToFill
= remByte
<< (8 - numBitsToFill
);
436 lastOutputByte
|= byteToFill
;
438 output
.setByte(numBytes1
+ numFullBytes
- 1, lastOutputByte
);
440 if (numTrailingBits
> numBitsToFill
) {
441 // clear all bits for the last byte before writing
442 output
.setByte(numBytes1
+ numFullBytes
, 0);
444 // some remaining bits cannot be filled in the previous byte
445 int leftByte
= remByte
>>> numBitsToFill
;
446 output
.setByte(numBytes1
+ numFullBytes
, leftByte
);