]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / vector / src / main / java / org / apache / arrow / vector / complex / ListVector.java
CommitLineData
1d09f67e
TL
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.arrow.vector.complex;
19
20import static java.util.Collections.singletonList;
21import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
22import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
23import static org.apache.arrow.util.Preconditions.checkNotNull;
24
25import java.util.ArrayList;
26import java.util.Arrays;
27import java.util.Collections;
28import java.util.List;
29
30import org.apache.arrow.memory.ArrowBuf;
31import org.apache.arrow.memory.BufferAllocator;
32import org.apache.arrow.memory.OutOfMemoryException;
33import org.apache.arrow.memory.util.ArrowBufPointer;
34import org.apache.arrow.memory.util.ByteFunctionHelpers;
35import org.apache.arrow.memory.util.CommonUtil;
36import org.apache.arrow.memory.util.hash.ArrowBufHasher;
37import org.apache.arrow.util.Preconditions;
38import org.apache.arrow.vector.AddOrGetResult;
39import org.apache.arrow.vector.BitVectorHelper;
40import org.apache.arrow.vector.BufferBacked;
41import org.apache.arrow.vector.FieldVector;
42import org.apache.arrow.vector.ValueVector;
43import org.apache.arrow.vector.ZeroVector;
44import org.apache.arrow.vector.compare.VectorVisitor;
45import org.apache.arrow.vector.complex.impl.ComplexCopier;
46import org.apache.arrow.vector.complex.impl.UnionListReader;
47import org.apache.arrow.vector.complex.impl.UnionListWriter;
48import org.apache.arrow.vector.complex.reader.FieldReader;
49import org.apache.arrow.vector.complex.writer.FieldWriter;
50import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
51import org.apache.arrow.vector.types.Types.MinorType;
52import org.apache.arrow.vector.types.pojo.ArrowType;
53import org.apache.arrow.vector.types.pojo.Field;
54import org.apache.arrow.vector.types.pojo.FieldType;
55import org.apache.arrow.vector.util.CallBack;
56import org.apache.arrow.vector.util.JsonStringArrayList;
57import org.apache.arrow.vector.util.OversizedAllocationException;
58import org.apache.arrow.vector.util.TransferPair;
59
60/**
61 * A list vector contains lists of a specific type of elements. Its structure contains 3 elements.
62 * <ol>
63 * <li>A validity buffer.</li>
64 * <li> An offset buffer, that denotes lists boundaries. </li>
65 * <li> A child data vector that contains the elements of lists. </li>
66 * </ol>
67 * The latter two are managed by its superclass.
68 */
69public class ListVector extends BaseRepeatedValueVector implements PromotableVector {
70
71 public static ListVector empty(String name, BufferAllocator allocator) {
72 return new ListVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null);
73 }
74
75 protected ArrowBuf validityBuffer;
76 protected UnionListReader reader;
77 private CallBack callBack;
78 private final FieldType fieldType;
79 private int validityAllocationSizeInBytes;
80
81 /**
82 * The maximum index that is actually set.
83 */
84 private int lastSet;
85
86 /**
87 * Constructs a new instance.
88 *
89 * @param name The name of the instance.
90 * @param allocator The allocator to use for allocating/reallocating buffers.
91 * @param fieldType The type of this list.
92 * @param callBack A schema change callback.
93 */
94 public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
95 super(name, allocator, callBack);
96 this.validityBuffer = allocator.getEmpty();
97 this.fieldType = checkNotNull(fieldType);
98 this.callBack = callBack;
99 this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
100 this.lastSet = -1;
101 }
102
103 @Override
104 public void initializeChildrenFromFields(List<Field> children) {
105 if (children.size() != 1) {
106 throw new IllegalArgumentException("Lists have only one child. Found: " + children);
107 }
108 Field field = children.get(0);
109 AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
110 if (!addOrGetVector.isCreated()) {
111 throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
112 }
113
114 addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
115 }
116
117 @Override
118 public void setInitialCapacity(int numRecords) {
119 validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
120 super.setInitialCapacity(numRecords);
121 }
122
123 /**
124 * Specialized version of setInitialCapacity() for ListVector. This is
125 * used by some callers when they want to explicitly control and be
126 * conservative about memory allocated for inner data vector. This is
127 * very useful when we are working with memory constraints for a query
128 * and have a fixed amount of memory reserved for the record batch. In
129 * such cases, we are likely to face OOM or related problems when
130 * we reserve memory for a record batch with value count x and
131 * do setInitialCapacity(x) such that each vector allocates only
132 * what is necessary and not the default amount but the multiplier
133 * forces the memory requirement to go beyond what was needed.
134 *
135 * @param numRecords value count
136 * @param density density of ListVector. Density is the average size of
137 * list per position in the List vector. For example, a
138 * density value of 10 implies each position in the list
139 * vector has a list of 10 values.
140 * A density value of 0.1 implies out of 10 positions in
141 * the list vector, 1 position has a list of size 1 and
142 * remaining positions are null (no lists) or empty lists.
143 * This helps in tightly controlling the memory we provision
144 * for inner data vector.
145 */
146 @Override
147 public void setInitialCapacity(int numRecords, double density) {
148 validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
149 super.setInitialCapacity(numRecords, density);
150 }
151
152 /**
153 * Get the density of this ListVector.
154 * @return density
155 */
156 public double getDensity() {
157 if (valueCount == 0) {
158 return 0.0D;
159 }
160 final int startOffset = offsetBuffer.getInt(0);
161 final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH);
162 final double totalListSize = endOffset - startOffset;
163 return totalListSize / valueCount;
164 }
165
166 @Override
167 public List<FieldVector> getChildrenFromFields() {
168 return singletonList(getDataVector());
169 }
170
171 /**
172 * Load the buffers of this vector with provided source buffers.
173 * The caller manages the source buffers and populates them before invoking
174 * this method.
175 * @param fieldNode the fieldNode indicating the value count
176 * @param ownBuffers the buffers for this Field (own buffers only, children not included)
177 */
178 @Override
179 public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
180 if (ownBuffers.size() != 2) {
181 throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size());
182 }
183
184 ArrowBuf bitBuffer = ownBuffers.get(0);
185 ArrowBuf offBuffer = ownBuffers.get(1);
186
187 validityBuffer.getReferenceManager().release();
188 validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
189 offsetBuffer.getReferenceManager().release();
190 offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
191
192 validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
193 offsetAllocationSizeInBytes = offsetBuffer.capacity();
194
195 lastSet = fieldNode.getLength() - 1;
196 valueCount = fieldNode.getLength();
197 }
198
199 /**
200 * Get the buffers belonging to this vector.
201 * @return the inner buffers.
202 */
203 @Override
204 public List<ArrowBuf> getFieldBuffers() {
205 List<ArrowBuf> result = new ArrayList<>(2);
206 setReaderAndWriterIndex();
207 result.add(validityBuffer);
208 result.add(offsetBuffer);
209
210 return result;
211 }
212
213 /**
214 * Set the reader and writer indexes for the inner buffers.
215 */
216 private void setReaderAndWriterIndex() {
217 validityBuffer.readerIndex(0);
218 offsetBuffer.readerIndex(0);
219 if (valueCount == 0) {
220 validityBuffer.writerIndex(0);
221 offsetBuffer.writerIndex(0);
222 } else {
223 validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
224 offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
225 }
226 }
227
228 /**
229 * Get the inner vectors.
230 *
231 * @deprecated This API will be removed as the current implementations no longer support inner vectors.
232 *
233 * @return the inner vectors for this field as defined by the TypeLayout
234 */
235 @Deprecated
236 @Override
237 public List<BufferBacked> getFieldInnerVectors() {
238 throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
239 }
240
241 public UnionListWriter getWriter() {
242 return new UnionListWriter(this);
243 }
244
245 /**
246 * Same as {@link #allocateNewSafe()}.
247 */
248 @Override
249 public void allocateNew() throws OutOfMemoryException {
250 if (!allocateNewSafe()) {
251 throw new OutOfMemoryException("Failure while allocating memory");
252 }
253 }
254
255 /**
256 * Allocate memory for the vector. We internally use a default value count
257 * of 4096 to allocate memory for at least these many elements in the
258 * vector.
259 *
260 * @return false if memory allocation fails, true otherwise.
261 */
262 public boolean allocateNewSafe() {
263 boolean success = false;
264 try {
265 /* we are doing a new allocation -- release the current buffers */
266 clear();
267 /* allocate validity buffer */
268 allocateValidityBuffer(validityAllocationSizeInBytes);
269 /* allocate offset and data buffer */
270 success = super.allocateNewSafe();
271 } finally {
272 if (!success) {
273 clear();
274 return false;
275 }
276 }
277 return true;
278 }
279
280 private void allocateValidityBuffer(final long size) {
281 final int curSize = (int) size;
282 validityBuffer = allocator.buffer(curSize);
283 validityBuffer.readerIndex(0);
284 validityAllocationSizeInBytes = curSize;
285 validityBuffer.setZero(0, validityBuffer.capacity());
286 }
287
288 /**
289 * Resize the vector to increase the capacity. The internal behavior is to
290 * double the current value capacity.
291 */
292 @Override
293 public void reAlloc() {
294 /* reallocate the validity buffer */
295 reallocValidityBuffer();
296 /* reallocate the offset and data */
297 super.reAlloc();
298 }
299
300 private void reallocValidityAndOffsetBuffers() {
301 reallocOffsetBuffer();
302 reallocValidityBuffer();
303 }
304
305 private void reallocValidityBuffer() {
306 final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
307 long newAllocationSize = currentBufferCapacity * 2;
308 if (newAllocationSize == 0) {
309 if (validityAllocationSizeInBytes > 0) {
310 newAllocationSize = validityAllocationSizeInBytes;
311 } else {
312 newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
313 }
314 }
315 newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
316 assert newAllocationSize >= 1;
317
318 if (newAllocationSize > MAX_ALLOCATION_SIZE) {
319 throw new OversizedAllocationException("Unable to expand the buffer");
320 }
321
322 final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
323 newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
324 newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
325 validityBuffer.getReferenceManager().release(1);
326 validityBuffer = newBuf;
327 validityAllocationSizeInBytes = (int) newAllocationSize;
328 }
329
330 /**
331 * Same as {@link #copyFrom(int, int, ValueVector)} except that
332 * it handles the case when the capacity of the vector needs to be expanded
333 * before copy.
334 * @param inIndex position to copy from in source vector
335 * @param outIndex position to copy to in this vector
336 * @param from source vector
337 */
338 @Override
339 public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
340 copyFrom(inIndex, outIndex, from);
341 }
342
343 /**
344 * Copy a cell value from a particular index in source vector to a particular
345 * position in this vector.
346 * @param inIndex position to copy from in source vector
347 * @param outIndex position to copy to in this vector
348 * @param from source vector
349 */
350 @Override
351 public void copyFrom(int inIndex, int outIndex, ValueVector from) {
352 Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
353 FieldReader in = from.getReader();
354 in.setPosition(inIndex);
355 FieldWriter out = getWriter();
356 out.setPosition(outIndex);
357 ComplexCopier.copy(in, out);
358 }
359
360 /**
361 * Get the inner data vector for this list vector.
362 * @return data vector
363 */
364 @Override
365 public FieldVector getDataVector() {
366 return vector;
367 }
368
369 @Override
370 public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
371 return getTransferPair(ref, allocator, null);
372 }
373
374 @Override
375 public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
376 return new TransferImpl(ref, allocator, callBack);
377 }
378
379 @Override
380 public TransferPair makeTransferPair(ValueVector target) {
381 return new TransferImpl((ListVector) target);
382 }
383
384 @Override
385 public long getValidityBufferAddress() {
386 return (validityBuffer.memoryAddress());
387 }
388
389 @Override
390 public long getDataBufferAddress() {
391 throw new UnsupportedOperationException();
392 }
393
394 @Override
395 public long getOffsetBufferAddress() {
396 return (offsetBuffer.memoryAddress());
397 }
398
399 @Override
400 public ArrowBuf getValidityBuffer() {
401 return validityBuffer;
402 }
403
404 @Override
405 public ArrowBuf getDataBuffer() {
406 throw new UnsupportedOperationException();
407 }
408
409 @Override
410 public ArrowBuf getOffsetBuffer() {
411 return offsetBuffer;
412 }
413
414 @Override
415 public int hashCode(int index) {
416 return hashCode(index, null);
417 }
418
419 @Override
420 public int hashCode(int index, ArrowBufHasher hasher) {
421 if (isSet(index) == 0) {
422 return ArrowBufPointer.NULL_HASH_CODE;
423 }
424 int hash = 0;
425 final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
426 final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
427 for (int i = start; i < end; i++) {
428 hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher));
429 }
430 return hash;
431 }
432
433 @Override
434 public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
435 return visitor.visit(this, value);
436 }
437
438 private class TransferImpl implements TransferPair {
439
440 ListVector to;
441 TransferPair dataTransferPair;
442
443 public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
444 this(new ListVector(name, allocator, fieldType, callBack));
445 }
446
447 public TransferImpl(ListVector to) {
448 this.to = to;
449 to.addOrGetVector(vector.getField().getFieldType());
450 if (to.getDataVector() instanceof ZeroVector) {
451 to.addOrGetVector(vector.getField().getFieldType());
452 }
453 dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
454 }
455
456 /**
457 * Transfer this vector'data to another vector. The memory associated
458 * with this vector is transferred to the allocator of target vector
459 * for accounting and management purposes.
460 */
461 @Override
462 public void transfer() {
463 to.clear();
464 dataTransferPair.transfer();
465 to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
466 to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
467 to.lastSet = lastSet;
468 if (valueCount > 0) {
469 to.setValueCount(valueCount);
470 }
471 clear();
472 }
473
474 /**
475 * Slice this vector at desired index and length and transfer the
476 * corresponding data to the target vector.
477 * @param startIndex start position of the split in source vector.
478 * @param length length of the split.
479 */
480 @Override
481 public void splitAndTransfer(int startIndex, int length) {
482 Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
483 "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
484 final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
485 final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
486 to.clear();
487 to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
488 /* splitAndTransfer offset buffer */
489 for (int i = 0; i < length + 1; i++) {
490 final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
491 to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
492 }
493 /* splitAndTransfer validity buffer */
494 splitAndTransferValidityBuffer(startIndex, length, to);
495 /* splitAndTransfer data buffer */
496 dataTransferPair.splitAndTransfer(startPoint, sliceLength);
497 to.lastSet = length - 1;
498 to.setValueCount(length);
499 }
500
501 /*
502 * transfer the validity.
503 */
504 private void splitAndTransferValidityBuffer(int startIndex, int length, ListVector target) {
505 int firstByteSource = BitVectorHelper.byteIndex(startIndex);
506 int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
507 int byteSizeTarget = getValidityBufferSizeFromCount(length);
508 int offset = startIndex % 8;
509
510 if (length > 0) {
511 if (offset == 0) {
512 // slice
513 if (target.validityBuffer != null) {
514 target.validityBuffer.getReferenceManager().release();
515 }
516 target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
517 target.validityBuffer.getReferenceManager().retain(1);
518 } else {
519 /* Copy data
520 * When the first bit starts from the middle of a byte (offset != 0),
521 * copy data from src BitVector.
522 * Each byte in the target is composed by a part in i-th byte,
523 * another part in (i+1)-th byte.
524 */
525 target.allocateValidityBuffer(byteSizeTarget);
526
527 for (int i = 0; i < byteSizeTarget - 1; i++) {
528 byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
529 byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
530
531 target.validityBuffer.setByte(i, (b1 + b2));
532 }
533
534 /* Copying the last piece is done in the following manner:
535 * if the source vector has 1 or more bytes remaining, we copy
536 * the last piece as a byte formed by shifting data
537 * from the current byte and the next byte.
538 *
539 * if the source vector has no more bytes remaining
540 * (we are at the last byte), we copy the last piece as a byte
541 * by shifting data from the current byte.
542 */
543 if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
544 byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
545 firstByteSource + byteSizeTarget - 1, offset);
546 byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
547 firstByteSource + byteSizeTarget, offset);
548
549 target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
550 } else {
551 byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
552 firstByteSource + byteSizeTarget - 1, offset);
553 target.validityBuffer.setByte(byteSizeTarget - 1, b1);
554 }
555 }
556 }
557 }
558
559 @Override
560 public ValueVector getTo() {
561 return to;
562 }
563
564 @Override
565 public void copyValueSafe(int from, int to) {
566 this.to.copyFrom(from, to, ListVector.this);
567 }
568 }
569
570 @Override
571 public UnionListReader getReader() {
572 if (reader == null) {
573 reader = new UnionListReader(this);
574 }
575 return reader;
576 }
577
578 /** Initialize the child data vector to field type. */
579 public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
580 AddOrGetResult<T> result = super.addOrGetVector(fieldType);
581 invalidateReader();
582 return result;
583 }
584
585 /**
586 * Get the size (number of bytes) of underlying buffers used by this
587 * vector.
588 * @return size of underlying buffers.
589 */
590 @Override
591 public int getBufferSize() {
592 if (valueCount == 0) {
593 return 0;
594 }
595 final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
596 final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
597 return offsetBufferSize + validityBufferSize + vector.getBufferSize();
598 }
599
600 @Override
601 public int getBufferSizeFor(int valueCount) {
602 if (valueCount == 0) {
603 return 0;
604 }
605 final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
606
607 return super.getBufferSizeFor(valueCount) + validityBufferSize;
608 }
609
610 @Override
611 public Field getField() {
612 return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField()));
613 }
614
615 @Override
616 public MinorType getMinorType() {
617 return MinorType.LIST;
618 }
619
620 @Override
621 public void clear() {
622 super.clear();
623 validityBuffer = releaseBuffer(validityBuffer);
624 lastSet = -1;
625 }
626
627 @Override
628 public void reset() {
629 super.reset();
630 validityBuffer.setZero(0, validityBuffer.capacity());
631 lastSet = -1;
632 }
633
634 /**
635 * Return the underlying buffers associated with this vector. Note that this doesn't
636 * impact the reference counts for this buffer so it only should be used for in-context
637 * access. Also note that this buffer changes regularly thus
638 * external classes shouldn't hold a reference to it (unless they change it).
639 *
640 * @param clear Whether to clear vector before returning; the buffers will still be refcounted
641 * but the returned array will be the only reference to them
642 * @return The underlying {@link ArrowBuf buffers} that is used by this
643 * vector instance.
644 */
645 @Override
646 public ArrowBuf[] getBuffers(boolean clear) {
647 setReaderAndWriterIndex();
648 final ArrowBuf[] buffers;
649 if (getBufferSize() == 0) {
650 buffers = new ArrowBuf[0];
651 } else {
652 List<ArrowBuf> list = new ArrayList<>();
653 list.add(offsetBuffer);
654 list.add(validityBuffer);
655 list.addAll(Arrays.asList(vector.getBuffers(false)));
656 buffers = list.toArray(new ArrowBuf[list.size()]);
657 }
658 if (clear) {
659 for (ArrowBuf buffer : buffers) {
660 buffer.getReferenceManager().retain();
661 }
662 clear();
663 }
664 return buffers;
665 }
666
667 @Override
668 public UnionVector promoteToUnion() {
669 UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack);
670 replaceDataVector(vector);
671 invalidateReader();
672 if (callBack != null) {
673 callBack.doWork();
674 }
675 return vector;
676 }
677
678 protected void invalidateReader() {
679 reader = null;
680 }
681
682 /**
683 * Get the element in the list vector at a particular index.
684 * @param index position of the element
685 * @return Object at given position
686 */
687 @Override
688 public List<?> getObject(int index) {
689 if (isSet(index) == 0) {
690 return null;
691 }
692 final List<Object> vals = new JsonStringArrayList<>();
693 final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
694 final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
695 final ValueVector vv = getDataVector();
696 for (int i = start; i < end; i++) {
697 vals.add(vv.getObject(i));
698 }
699
700 return vals;
701 }
702
703 /**
704 * Check if element at given index is null.
705 *
706 * @param index position of element
707 * @return true if element at given index is null, false otherwise
708 */
709 @Override
710 public boolean isNull(int index) {
711 return (isSet(index) == 0);
712 }
713
714 /**
715 * Check if element at given index is empty list.
716 * @param index position of element
717 * @return true if element at given index is empty list or NULL, false otherwise
718 */
719 @Override
720 public boolean isEmpty(int index) {
721 if (isNull(index)) {
722 return true;
723 } else {
724 final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
725 final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
726 return start == end;
727 }
728 }
729
730 /**
731 * Same as {@link #isNull(int)}.
732 *
733 * @param index position of element
734 * @return 1 if element at given index is not null, 0 otherwise
735 */
736 public int isSet(int index) {
737 final int byteIndex = index >> 3;
738 final byte b = validityBuffer.getByte(byteIndex);
739 final int bitIndex = index & 7;
740 return (b >> bitIndex) & 0x01;
741 }
742
743 /**
744 * Get the number of elements that are null in the vector.
745 *
746 * @return the number of null elements.
747 */
748 @Override
749 public int getNullCount() {
750 return BitVectorHelper.getNullCount(validityBuffer, valueCount);
751 }
752
753 /**
754 * Get the current value capacity for the vector.
755 * @return number of elements that vector can hold.
756 */
757 @Override
758 public int getValueCapacity() {
759 return getValidityAndOffsetValueCapacity();
760 }
761
762 private int getValidityAndOffsetValueCapacity() {
763 final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
764 return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
765 }
766
767 private int getValidityBufferValueCapacity() {
768 return capAtMaxInt(validityBuffer.capacity() * 8);
769 }
770
771 /**
772 * Sets the list at index to be not-null. Reallocates validity buffer if index
773 * is larger than current capacity.
774 */
775 public void setNotNull(int index) {
776 while (index >= getValidityAndOffsetValueCapacity()) {
777 reallocValidityAndOffsetBuffers();
778 }
779 BitVectorHelper.setBit(validityBuffer, index);
780 lastSet = index;
781 }
782
783 /**
784 * Sets list at index to be null.
785 * @param index position in vector
786 */
787 public void setNull(int index) {
788 while (index >= getValidityAndOffsetValueCapacity()) {
789 reallocValidityAndOffsetBuffers();
790 }
791 if (lastSet >= index) {
792 lastSet = index - 1;
793 }
794 for (int i = lastSet + 1; i <= index; i++) {
795 final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
796 offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
797 }
798 BitVectorHelper.unsetBit(validityBuffer, index);
799 }
800
801 /**
802 * Start a new value in the list vector.
803 *
804 * @param index index of the value to start
805 */
806 @Override
807 public int startNewValue(int index) {
808 while (index >= getValidityAndOffsetValueCapacity()) {
809 reallocValidityAndOffsetBuffers();
810 }
811 if (lastSet >= index) {
812 lastSet = index - 1;
813 }
814 for (int i = lastSet + 1; i <= index; i++) {
815 final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
816 offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
817 }
818 BitVectorHelper.setBit(validityBuffer, index);
819 lastSet = index;
820 return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
821 }
822
823 /**
824 * End the current value.
825 *
826 * @param index index of the value to end
827 * @param size number of elements in the list that was written
828 */
829 public void endValue(int index, int size) {
830 final int currentOffset = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
831 offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, currentOffset + size);
832 }
833
834 /**
835 * Sets the value count for the vector.
836 *
837 * @param valueCount value count
838 */
839 @Override
840 public void setValueCount(int valueCount) {
841 this.valueCount = valueCount;
842 if (valueCount > 0) {
843 while (valueCount > getValidityAndOffsetValueCapacity()) {
844 /* check if validity and offset buffers need to be re-allocated */
845 reallocValidityAndOffsetBuffers();
846 }
847 for (int i = lastSet + 1; i < valueCount; i++) {
848 /* fill the holes with offsets */
849 final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
850 offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
851 }
852 }
853 /* valueCount for the data vector is the current end offset */
854 final int childValueCount = (valueCount == 0) ? 0 :
855 offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
856 /* set the value count of data vector and this will take care of
857 * checking whether data buffer needs to be reallocated.
858 */
859 vector.setValueCount(childValueCount);
860 }
861
862 public void setLastSet(int value) {
863 lastSet = value;
864 }
865
866 public int getLastSet() {
867 return lastSet;
868 }
869
870 @Override
871 public int getElementStartIndex(int index) {
872 return offsetBuffer.getInt(index * OFFSET_WIDTH);
873 }
874
875 @Override
876 public int getElementEndIndex(int index) {
877 return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
878 }
879}