2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org
.apache
.arrow
.vector
.complex
;
20 import static java
.util
.Collections
.singletonList
;
21 import static org
.apache
.arrow
.memory
.util
.LargeMemoryUtil
.capAtMaxInt
;
22 import static org
.apache
.arrow
.memory
.util
.LargeMemoryUtil
.checkedCastToInt
;
23 import static org
.apache
.arrow
.util
.Preconditions
.checkNotNull
;
25 import java
.util
.ArrayList
;
26 import java
.util
.Arrays
;
27 import java
.util
.Collections
;
28 import java
.util
.List
;
30 import org
.apache
.arrow
.memory
.ArrowBuf
;
31 import org
.apache
.arrow
.memory
.BufferAllocator
;
32 import org
.apache
.arrow
.memory
.OutOfMemoryException
;
33 import org
.apache
.arrow
.memory
.util
.ArrowBufPointer
;
34 import org
.apache
.arrow
.memory
.util
.ByteFunctionHelpers
;
35 import org
.apache
.arrow
.memory
.util
.CommonUtil
;
36 import org
.apache
.arrow
.memory
.util
.hash
.ArrowBufHasher
;
37 import org
.apache
.arrow
.util
.Preconditions
;
38 import org
.apache
.arrow
.vector
.AddOrGetResult
;
39 import org
.apache
.arrow
.vector
.BitVectorHelper
;
40 import org
.apache
.arrow
.vector
.BufferBacked
;
41 import org
.apache
.arrow
.vector
.FieldVector
;
42 import org
.apache
.arrow
.vector
.ValueVector
;
43 import org
.apache
.arrow
.vector
.ZeroVector
;
44 import org
.apache
.arrow
.vector
.compare
.VectorVisitor
;
45 import org
.apache
.arrow
.vector
.complex
.impl
.ComplexCopier
;
46 import org
.apache
.arrow
.vector
.complex
.impl
.UnionListReader
;
47 import org
.apache
.arrow
.vector
.complex
.impl
.UnionListWriter
;
48 import org
.apache
.arrow
.vector
.complex
.reader
.FieldReader
;
49 import org
.apache
.arrow
.vector
.complex
.writer
.FieldWriter
;
50 import org
.apache
.arrow
.vector
.ipc
.message
.ArrowFieldNode
;
51 import org
.apache
.arrow
.vector
.types
.Types
.MinorType
;
52 import org
.apache
.arrow
.vector
.types
.pojo
.ArrowType
;
53 import org
.apache
.arrow
.vector
.types
.pojo
.Field
;
54 import org
.apache
.arrow
.vector
.types
.pojo
.FieldType
;
55 import org
.apache
.arrow
.vector
.util
.CallBack
;
56 import org
.apache
.arrow
.vector
.util
.JsonStringArrayList
;
57 import org
.apache
.arrow
.vector
.util
.OversizedAllocationException
;
58 import org
.apache
.arrow
.vector
.util
.TransferPair
;
61 * A list vector contains lists of a specific type of elements. Its structure contains 3 elements.
63 * <li>A validity buffer.</li>
64 * <li> An offset buffer, that denotes lists boundaries. </li>
65 * <li> A child data vector that contains the elements of lists. </li>
67 * The latter two are managed by its superclass.
69 public class ListVector
extends BaseRepeatedValueVector
implements PromotableVector
{
71 public static ListVector
empty(String name
, BufferAllocator allocator
) {
72 return new ListVector(name
, allocator
, FieldType
.nullable(ArrowType
.List
.INSTANCE
), null);
75 protected ArrowBuf validityBuffer
;
76 protected UnionListReader reader
;
77 private CallBack callBack
;
78 private final FieldType fieldType
;
79 private int validityAllocationSizeInBytes
;
82 * The maximum index that is actually set.
87 * Constructs a new instance.
89 * @param name The name of the instance.
90 * @param allocator The allocator to use for allocating/reallocating buffers.
91 * @param fieldType The type of this list.
92 * @param callBack A schema change callback.
94 public ListVector(String name
, BufferAllocator allocator
, FieldType fieldType
, CallBack callBack
) {
95 super(name
, allocator
, callBack
);
96 this.validityBuffer
= allocator
.getEmpty();
97 this.fieldType
= checkNotNull(fieldType
);
98 this.callBack
= callBack
;
99 this.validityAllocationSizeInBytes
= getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION
);
104 public void initializeChildrenFromFields(List
<Field
> children
) {
105 if (children
.size() != 1) {
106 throw new IllegalArgumentException("Lists have only one child. Found: " + children
);
108 Field field
= children
.get(0);
109 AddOrGetResult
<FieldVector
> addOrGetVector
= addOrGetVector(field
.getFieldType());
110 if (!addOrGetVector
.isCreated()) {
111 throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector
.getVector());
114 addOrGetVector
.getVector().initializeChildrenFromFields(field
.getChildren());
118 public void setInitialCapacity(int numRecords
) {
119 validityAllocationSizeInBytes
= getValidityBufferSizeFromCount(numRecords
);
120 super.setInitialCapacity(numRecords
);
124 * Specialized version of setInitialCapacity() for ListVector. This is
125 * used by some callers when they want to explicitly control and be
126 * conservative about memory allocated for inner data vector. This is
127 * very useful when we are working with memory constraints for a query
128 * and have a fixed amount of memory reserved for the record batch. In
129 * such cases, we are likely to face OOM or related problems when
130 * we reserve memory for a record batch with value count x and
131 * do setInitialCapacity(x) such that each vector allocates only
132 * what is necessary and not the default amount but the multiplier
133 * forces the memory requirement to go beyond what was needed.
135 * @param numRecords value count
136 * @param density density of ListVector. Density is the average size of
137 * list per position in the List vector. For example, a
138 * density value of 10 implies each position in the list
139 * vector has a list of 10 values.
140 * A density value of 0.1 implies out of 10 positions in
141 * the list vector, 1 position has a list of size 1 and
142 * remaining positions are null (no lists) or empty lists.
143 * This helps in tightly controlling the memory we provision
144 * for inner data vector.
147 public void setInitialCapacity(int numRecords
, double density
) {
148 validityAllocationSizeInBytes
= getValidityBufferSizeFromCount(numRecords
);
149 super.setInitialCapacity(numRecords
, density
);
153 * Get the density of this ListVector.
156 public double getDensity() {
157 if (valueCount
== 0) {
160 final int startOffset
= offsetBuffer
.getInt(0);
161 final int endOffset
= offsetBuffer
.getInt(valueCount
* OFFSET_WIDTH
);
162 final double totalListSize
= endOffset
- startOffset
;
163 return totalListSize
/ valueCount
;
167 public List
<FieldVector
> getChildrenFromFields() {
168 return singletonList(getDataVector());
172 * Load the buffers of this vector with provided source buffers.
173 * The caller manages the source buffers and populates them before invoking
175 * @param fieldNode the fieldNode indicating the value count
176 * @param ownBuffers the buffers for this Field (own buffers only, children not included)
179 public void loadFieldBuffers(ArrowFieldNode fieldNode
, List
<ArrowBuf
> ownBuffers
) {
180 if (ownBuffers
.size() != 2) {
181 throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers
.size());
184 ArrowBuf bitBuffer
= ownBuffers
.get(0);
185 ArrowBuf offBuffer
= ownBuffers
.get(1);
187 validityBuffer
.getReferenceManager().release();
188 validityBuffer
= BitVectorHelper
.loadValidityBuffer(fieldNode
, bitBuffer
, allocator
);
189 offsetBuffer
.getReferenceManager().release();
190 offsetBuffer
= offBuffer
.getReferenceManager().retain(offBuffer
, allocator
);
192 validityAllocationSizeInBytes
= checkedCastToInt(validityBuffer
.capacity());
193 offsetAllocationSizeInBytes
= offsetBuffer
.capacity();
195 lastSet
= fieldNode
.getLength() - 1;
196 valueCount
= fieldNode
.getLength();
200 * Get the buffers belonging to this vector.
201 * @return the inner buffers.
204 public List
<ArrowBuf
> getFieldBuffers() {
205 List
<ArrowBuf
> result
= new ArrayList
<>(2);
206 setReaderAndWriterIndex();
207 result
.add(validityBuffer
);
208 result
.add(offsetBuffer
);
214 * Set the reader and writer indexes for the inner buffers.
216 private void setReaderAndWriterIndex() {
217 validityBuffer
.readerIndex(0);
218 offsetBuffer
.readerIndex(0);
219 if (valueCount
== 0) {
220 validityBuffer
.writerIndex(0);
221 offsetBuffer
.writerIndex(0);
223 validityBuffer
.writerIndex(getValidityBufferSizeFromCount(valueCount
));
224 offsetBuffer
.writerIndex((valueCount
+ 1) * OFFSET_WIDTH
);
229 * Get the inner vectors.
231 * @deprecated This API will be removed as the current implementations no longer support inner vectors.
233 * @return the inner vectors for this field as defined by the TypeLayout
237 public List
<BufferBacked
> getFieldInnerVectors() {
238 throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
241 public UnionListWriter
getWriter() {
242 return new UnionListWriter(this);
246 * Same as {@link #allocateNewSafe()}.
249 public void allocateNew() throws OutOfMemoryException
{
250 if (!allocateNewSafe()) {
251 throw new OutOfMemoryException("Failure while allocating memory");
256 * Allocate memory for the vector. We internally use a default value count
257 * of 4096 to allocate memory for at least these many elements in the
260 * @return false if memory allocation fails, true otherwise.
262 public boolean allocateNewSafe() {
263 boolean success
= false;
265 /* we are doing a new allocation -- release the current buffers */
267 /* allocate validity buffer */
268 allocateValidityBuffer(validityAllocationSizeInBytes
);
269 /* allocate offset and data buffer */
270 success
= super.allocateNewSafe();
280 private void allocateValidityBuffer(final long size
) {
281 final int curSize
= (int) size
;
282 validityBuffer
= allocator
.buffer(curSize
);
283 validityBuffer
.readerIndex(0);
284 validityAllocationSizeInBytes
= curSize
;
285 validityBuffer
.setZero(0, validityBuffer
.capacity());
289 * Resize the vector to increase the capacity. The internal behavior is to
290 * double the current value capacity.
293 public void reAlloc() {
294 /* reallocate the validity buffer */
295 reallocValidityBuffer();
296 /* reallocate the offset and data */
300 private void reallocValidityAndOffsetBuffers() {
301 reallocOffsetBuffer();
302 reallocValidityBuffer();
305 private void reallocValidityBuffer() {
306 final int currentBufferCapacity
= checkedCastToInt(validityBuffer
.capacity());
307 long newAllocationSize
= currentBufferCapacity
* 2;
308 if (newAllocationSize
== 0) {
309 if (validityAllocationSizeInBytes
> 0) {
310 newAllocationSize
= validityAllocationSizeInBytes
;
312 newAllocationSize
= getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION
) * 2;
315 newAllocationSize
= CommonUtil
.nextPowerOfTwo(newAllocationSize
);
316 assert newAllocationSize
>= 1;
318 if (newAllocationSize
> MAX_ALLOCATION_SIZE
) {
319 throw new OversizedAllocationException("Unable to expand the buffer");
322 final ArrowBuf newBuf
= allocator
.buffer((int) newAllocationSize
);
323 newBuf
.setBytes(0, validityBuffer
, 0, currentBufferCapacity
);
324 newBuf
.setZero(currentBufferCapacity
, newBuf
.capacity() - currentBufferCapacity
);
325 validityBuffer
.getReferenceManager().release(1);
326 validityBuffer
= newBuf
;
327 validityAllocationSizeInBytes
= (int) newAllocationSize
;
331 * Same as {@link #copyFrom(int, int, ValueVector)} except that
332 * it handles the case when the capacity of the vector needs to be expanded
334 * @param inIndex position to copy from in source vector
335 * @param outIndex position to copy to in this vector
336 * @param from source vector
339 public void copyFromSafe(int inIndex
, int outIndex
, ValueVector from
) {
340 copyFrom(inIndex
, outIndex
, from
);
344 * Copy a cell value from a particular index in source vector to a particular
345 * position in this vector.
346 * @param inIndex position to copy from in source vector
347 * @param outIndex position to copy to in this vector
348 * @param from source vector
351 public void copyFrom(int inIndex
, int outIndex
, ValueVector from
) {
352 Preconditions
.checkArgument(this.getMinorType() == from
.getMinorType());
353 FieldReader in
= from
.getReader();
354 in
.setPosition(inIndex
);
355 FieldWriter out
= getWriter();
356 out
.setPosition(outIndex
);
357 ComplexCopier
.copy(in
, out
);
361 * Get the inner data vector for this list vector.
362 * @return data vector
365 public FieldVector
getDataVector() {
370 public TransferPair
getTransferPair(String ref
, BufferAllocator allocator
) {
371 return getTransferPair(ref
, allocator
, null);
375 public TransferPair
getTransferPair(String ref
, BufferAllocator allocator
, CallBack callBack
) {
376 return new TransferImpl(ref
, allocator
, callBack
);
380 public TransferPair
makeTransferPair(ValueVector target
) {
381 return new TransferImpl((ListVector
) target
);
385 public long getValidityBufferAddress() {
386 return (validityBuffer
.memoryAddress());
390 public long getDataBufferAddress() {
391 throw new UnsupportedOperationException();
395 public long getOffsetBufferAddress() {
396 return (offsetBuffer
.memoryAddress());
400 public ArrowBuf
getValidityBuffer() {
401 return validityBuffer
;
405 public ArrowBuf
getDataBuffer() {
406 throw new UnsupportedOperationException();
410 public ArrowBuf
getOffsetBuffer() {
415 public int hashCode(int index
) {
416 return hashCode(index
, null);
420 public int hashCode(int index
, ArrowBufHasher hasher
) {
421 if (isSet(index
) == 0) {
422 return ArrowBufPointer
.NULL_HASH_CODE
;
425 final int start
= offsetBuffer
.getInt(index
* OFFSET_WIDTH
);
426 final int end
= offsetBuffer
.getInt((index
+ 1) * OFFSET_WIDTH
);
427 for (int i
= start
; i
< end
; i
++) {
428 hash
= ByteFunctionHelpers
.combineHash(hash
, vector
.hashCode(i
, hasher
));
434 public <OUT
, IN
> OUT
accept(VectorVisitor
<OUT
, IN
> visitor
, IN value
) {
435 return visitor
.visit(this, value
);
438 private class TransferImpl
implements TransferPair
{
441 TransferPair dataTransferPair
;
443 public TransferImpl(String name
, BufferAllocator allocator
, CallBack callBack
) {
444 this(new ListVector(name
, allocator
, fieldType
, callBack
));
447 public TransferImpl(ListVector to
) {
449 to
.addOrGetVector(vector
.getField().getFieldType());
450 if (to
.getDataVector() instanceof ZeroVector
) {
451 to
.addOrGetVector(vector
.getField().getFieldType());
453 dataTransferPair
= getDataVector().makeTransferPair(to
.getDataVector());
457 * Transfer this vector'data to another vector. The memory associated
458 * with this vector is transferred to the allocator of target vector
459 * for accounting and management purposes.
462 public void transfer() {
464 dataTransferPair
.transfer();
465 to
.validityBuffer
= transferBuffer(validityBuffer
, to
.allocator
);
466 to
.offsetBuffer
= transferBuffer(offsetBuffer
, to
.allocator
);
467 to
.lastSet
= lastSet
;
468 if (valueCount
> 0) {
469 to
.setValueCount(valueCount
);
475 * Slice this vector at desired index and length and transfer the
476 * corresponding data to the target vector.
477 * @param startIndex start position of the split in source vector.
478 * @param length length of the split.
481 public void splitAndTransfer(int startIndex
, int length
) {
482 Preconditions
.checkArgument(startIndex
>= 0 && length
>= 0 && startIndex
+ length
<= valueCount
,
483 "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex
, length
, valueCount
);
484 final int startPoint
= offsetBuffer
.getInt(startIndex
* OFFSET_WIDTH
);
485 final int sliceLength
= offsetBuffer
.getInt((startIndex
+ length
) * OFFSET_WIDTH
) - startPoint
;
487 to
.allocateOffsetBuffer((length
+ 1) * OFFSET_WIDTH
);
488 /* splitAndTransfer offset buffer */
489 for (int i
= 0; i
< length
+ 1; i
++) {
490 final int relativeOffset
= offsetBuffer
.getInt((startIndex
+ i
) * OFFSET_WIDTH
) - startPoint
;
491 to
.offsetBuffer
.setInt(i
* OFFSET_WIDTH
, relativeOffset
);
493 /* splitAndTransfer validity buffer */
494 splitAndTransferValidityBuffer(startIndex
, length
, to
);
495 /* splitAndTransfer data buffer */
496 dataTransferPair
.splitAndTransfer(startPoint
, sliceLength
);
497 to
.lastSet
= length
- 1;
498 to
.setValueCount(length
);
502 * transfer the validity.
504 private void splitAndTransferValidityBuffer(int startIndex
, int length
, ListVector target
) {
505 int firstByteSource
= BitVectorHelper
.byteIndex(startIndex
);
506 int lastByteSource
= BitVectorHelper
.byteIndex(valueCount
- 1);
507 int byteSizeTarget
= getValidityBufferSizeFromCount(length
);
508 int offset
= startIndex
% 8;
513 if (target
.validityBuffer
!= null) {
514 target
.validityBuffer
.getReferenceManager().release();
516 target
.validityBuffer
= validityBuffer
.slice(firstByteSource
, byteSizeTarget
);
517 target
.validityBuffer
.getReferenceManager().retain(1);
520 * When the first bit starts from the middle of a byte (offset != 0),
521 * copy data from src BitVector.
522 * Each byte in the target is composed by a part in i-th byte,
523 * another part in (i+1)-th byte.
525 target
.allocateValidityBuffer(byteSizeTarget
);
527 for (int i
= 0; i
< byteSizeTarget
- 1; i
++) {
528 byte b1
= BitVectorHelper
.getBitsFromCurrentByte(validityBuffer
, firstByteSource
+ i
, offset
);
529 byte b2
= BitVectorHelper
.getBitsFromNextByte(validityBuffer
, firstByteSource
+ i
+ 1, offset
);
531 target
.validityBuffer
.setByte(i
, (b1
+ b2
));
534 /* Copying the last piece is done in the following manner:
535 * if the source vector has 1 or more bytes remaining, we copy
536 * the last piece as a byte formed by shifting data
537 * from the current byte and the next byte.
539 * if the source vector has no more bytes remaining
540 * (we are at the last byte), we copy the last piece as a byte
541 * by shifting data from the current byte.
543 if ((firstByteSource
+ byteSizeTarget
- 1) < lastByteSource
) {
544 byte b1
= BitVectorHelper
.getBitsFromCurrentByte(validityBuffer
,
545 firstByteSource
+ byteSizeTarget
- 1, offset
);
546 byte b2
= BitVectorHelper
.getBitsFromNextByte(validityBuffer
,
547 firstByteSource
+ byteSizeTarget
, offset
);
549 target
.validityBuffer
.setByte(byteSizeTarget
- 1, b1
+ b2
);
551 byte b1
= BitVectorHelper
.getBitsFromCurrentByte(validityBuffer
,
552 firstByteSource
+ byteSizeTarget
- 1, offset
);
553 target
.validityBuffer
.setByte(byteSizeTarget
- 1, b1
);
560 public ValueVector
getTo() {
565 public void copyValueSafe(int from
, int to
) {
566 this.to
.copyFrom(from
, to
, ListVector
.this);
571 public UnionListReader
getReader() {
572 if (reader
== null) {
573 reader
= new UnionListReader(this);
578 /** Initialize the child data vector to field type. */
579 public <T
extends ValueVector
> AddOrGetResult
<T
> addOrGetVector(FieldType fieldType
) {
580 AddOrGetResult
<T
> result
= super.addOrGetVector(fieldType
);
586 * Get the size (number of bytes) of underlying buffers used by this
588 * @return size of underlying buffers.
591 public int getBufferSize() {
592 if (valueCount
== 0) {
595 final int offsetBufferSize
= (valueCount
+ 1) * OFFSET_WIDTH
;
596 final int validityBufferSize
= getValidityBufferSizeFromCount(valueCount
);
597 return offsetBufferSize
+ validityBufferSize
+ vector
.getBufferSize();
601 public int getBufferSizeFor(int valueCount
) {
602 if (valueCount
== 0) {
605 final int validityBufferSize
= getValidityBufferSizeFromCount(valueCount
);
607 return super.getBufferSizeFor(valueCount
) + validityBufferSize
;
611 public Field
getField() {
612 return new Field(getName(), fieldType
, Collections
.singletonList(getDataVector().getField()));
616 public MinorType
getMinorType() {
617 return MinorType
.LIST
;
621 public void clear() {
623 validityBuffer
= releaseBuffer(validityBuffer
);
628 public void reset() {
630 validityBuffer
.setZero(0, validityBuffer
.capacity());
635 * Return the underlying buffers associated with this vector. Note that this doesn't
636 * impact the reference counts for this buffer so it only should be used for in-context
637 * access. Also note that this buffer changes regularly thus
638 * external classes shouldn't hold a reference to it (unless they change it).
640 * @param clear Whether to clear vector before returning; the buffers will still be refcounted
641 * but the returned array will be the only reference to them
642 * @return The underlying {@link ArrowBuf buffers} that is used by this
646 public ArrowBuf
[] getBuffers(boolean clear
) {
647 setReaderAndWriterIndex();
648 final ArrowBuf
[] buffers
;
649 if (getBufferSize() == 0) {
650 buffers
= new ArrowBuf
[0];
652 List
<ArrowBuf
> list
= new ArrayList
<>();
653 list
.add(offsetBuffer
);
654 list
.add(validityBuffer
);
655 list
.addAll(Arrays
.asList(vector
.getBuffers(false)));
656 buffers
= list
.toArray(new ArrowBuf
[list
.size()]);
659 for (ArrowBuf buffer
: buffers
) {
660 buffer
.getReferenceManager().retain();
668 public UnionVector
promoteToUnion() {
669 UnionVector vector
= new UnionVector("$data$", allocator
, /* field type*/ null, callBack
);
670 replaceDataVector(vector
);
672 if (callBack
!= null) {
678 protected void invalidateReader() {
683 * Get the element in the list vector at a particular index.
684 * @param index position of the element
685 * @return Object at given position
688 public List
<?
> getObject(int index
) {
689 if (isSet(index
) == 0) {
692 final List
<Object
> vals
= new JsonStringArrayList
<>();
693 final int start
= offsetBuffer
.getInt(index
* OFFSET_WIDTH
);
694 final int end
= offsetBuffer
.getInt((index
+ 1) * OFFSET_WIDTH
);
695 final ValueVector vv
= getDataVector();
696 for (int i
= start
; i
< end
; i
++) {
697 vals
.add(vv
.getObject(i
));
704 * Check if element at given index is null.
706 * @param index position of element
707 * @return true if element at given index is null, false otherwise
710 public boolean isNull(int index
) {
711 return (isSet(index
) == 0);
715 * Check if element at given index is empty list.
716 * @param index position of element
717 * @return true if element at given index is empty list or NULL, false otherwise
720 public boolean isEmpty(int index
) {
724 final int start
= offsetBuffer
.getInt(index
* OFFSET_WIDTH
);
725 final int end
= offsetBuffer
.getInt((index
+ 1) * OFFSET_WIDTH
);
731 * Same as {@link #isNull(int)}.
733 * @param index position of element
734 * @return 1 if element at given index is not null, 0 otherwise
736 public int isSet(int index
) {
737 final int byteIndex
= index
>> 3;
738 final byte b
= validityBuffer
.getByte(byteIndex
);
739 final int bitIndex
= index
& 7;
740 return (b
>> bitIndex
) & 0x01;
744 * Get the number of elements that are null in the vector.
746 * @return the number of null elements.
749 public int getNullCount() {
750 return BitVectorHelper
.getNullCount(validityBuffer
, valueCount
);
754 * Get the current value capacity for the vector.
755 * @return number of elements that vector can hold.
758 public int getValueCapacity() {
759 return getValidityAndOffsetValueCapacity();
762 private int getValidityAndOffsetValueCapacity() {
763 final int offsetValueCapacity
= Math
.max(getOffsetBufferValueCapacity() - 1, 0);
764 return Math
.min(offsetValueCapacity
, getValidityBufferValueCapacity());
767 private int getValidityBufferValueCapacity() {
768 return capAtMaxInt(validityBuffer
.capacity() * 8);
772 * Sets the list at index to be not-null. Reallocates validity buffer if index
773 * is larger than current capacity.
775 public void setNotNull(int index
) {
776 while (index
>= getValidityAndOffsetValueCapacity()) {
777 reallocValidityAndOffsetBuffers();
779 BitVectorHelper
.setBit(validityBuffer
, index
);
784 * Sets list at index to be null.
785 * @param index position in vector
787 public void setNull(int index
) {
788 while (index
>= getValidityAndOffsetValueCapacity()) {
789 reallocValidityAndOffsetBuffers();
791 if (lastSet
>= index
) {
794 for (int i
= lastSet
+ 1; i
<= index
; i
++) {
795 final int currentOffset
= offsetBuffer
.getInt(i
* OFFSET_WIDTH
);
796 offsetBuffer
.setInt((i
+ 1) * OFFSET_WIDTH
, currentOffset
);
798 BitVectorHelper
.unsetBit(validityBuffer
, index
);
802 * Start a new value in the list vector.
804 * @param index index of the value to start
807 public int startNewValue(int index
) {
808 while (index
>= getValidityAndOffsetValueCapacity()) {
809 reallocValidityAndOffsetBuffers();
811 if (lastSet
>= index
) {
814 for (int i
= lastSet
+ 1; i
<= index
; i
++) {
815 final int currentOffset
= offsetBuffer
.getInt(i
* OFFSET_WIDTH
);
816 offsetBuffer
.setInt((i
+ 1) * OFFSET_WIDTH
, currentOffset
);
818 BitVectorHelper
.setBit(validityBuffer
, index
);
820 return offsetBuffer
.getInt((lastSet
+ 1) * OFFSET_WIDTH
);
824 * End the current value.
826 * @param index index of the value to end
827 * @param size number of elements in the list that was written
829 public void endValue(int index
, int size
) {
830 final int currentOffset
= offsetBuffer
.getInt((index
+ 1) * OFFSET_WIDTH
);
831 offsetBuffer
.setInt((index
+ 1) * OFFSET_WIDTH
, currentOffset
+ size
);
835 * Sets the value count for the vector.
837 * @param valueCount value count
840 public void setValueCount(int valueCount
) {
841 this.valueCount
= valueCount
;
842 if (valueCount
> 0) {
843 while (valueCount
> getValidityAndOffsetValueCapacity()) {
844 /* check if validity and offset buffers need to be re-allocated */
845 reallocValidityAndOffsetBuffers();
847 for (int i
= lastSet
+ 1; i
< valueCount
; i
++) {
848 /* fill the holes with offsets */
849 final int currentOffset
= offsetBuffer
.getInt(i
* OFFSET_WIDTH
);
850 offsetBuffer
.setInt((i
+ 1) * OFFSET_WIDTH
, currentOffset
);
853 /* valueCount for the data vector is the current end offset */
854 final int childValueCount
= (valueCount
== 0) ?
0 :
855 offsetBuffer
.getInt((lastSet
+ 1) * OFFSET_WIDTH
);
856 /* set the value count of data vector and this will take care of
857 * checking whether data buffer needs to be reallocated.
859 vector
.setValueCount(childValueCount
);
862 public void setLastSet(int value
) {
866 public int getLastSet() {
871 public int getElementStartIndex(int index
) {
872 return offsetBuffer
.getInt(index
* OFFSET_WIDTH
);
876 public int getElementEndIndex(int index
) {
877 return offsetBuffer
.getInt((index
+ 1) * OFFSET_WIDTH
);