]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.vector.complex; | |
19 | ||
20 | import static java.util.Collections.singletonList; | |
21 | import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; | |
22 | import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; | |
23 | import static org.apache.arrow.util.Preconditions.checkNotNull; | |
24 | ||
25 | import java.util.ArrayList; | |
26 | import java.util.Arrays; | |
27 | import java.util.Collections; | |
28 | import java.util.List; | |
29 | ||
30 | import org.apache.arrow.memory.ArrowBuf; | |
31 | import org.apache.arrow.memory.BufferAllocator; | |
32 | import org.apache.arrow.memory.OutOfMemoryException; | |
33 | import org.apache.arrow.memory.util.ArrowBufPointer; | |
34 | import org.apache.arrow.memory.util.ByteFunctionHelpers; | |
35 | import org.apache.arrow.memory.util.CommonUtil; | |
36 | import org.apache.arrow.memory.util.hash.ArrowBufHasher; | |
37 | import org.apache.arrow.util.Preconditions; | |
38 | import org.apache.arrow.vector.AddOrGetResult; | |
39 | import org.apache.arrow.vector.BitVectorHelper; | |
40 | import org.apache.arrow.vector.BufferBacked; | |
41 | import org.apache.arrow.vector.FieldVector; | |
42 | import org.apache.arrow.vector.ValueVector; | |
43 | import org.apache.arrow.vector.ZeroVector; | |
44 | import org.apache.arrow.vector.compare.VectorVisitor; | |
45 | import org.apache.arrow.vector.complex.impl.ComplexCopier; | |
46 | import org.apache.arrow.vector.complex.impl.UnionListReader; | |
47 | import org.apache.arrow.vector.complex.impl.UnionListWriter; | |
48 | import org.apache.arrow.vector.complex.reader.FieldReader; | |
49 | import org.apache.arrow.vector.complex.writer.FieldWriter; | |
50 | import org.apache.arrow.vector.ipc.message.ArrowFieldNode; | |
51 | import org.apache.arrow.vector.types.Types.MinorType; | |
52 | import org.apache.arrow.vector.types.pojo.ArrowType; | |
53 | import org.apache.arrow.vector.types.pojo.Field; | |
54 | import org.apache.arrow.vector.types.pojo.FieldType; | |
55 | import org.apache.arrow.vector.util.CallBack; | |
56 | import org.apache.arrow.vector.util.JsonStringArrayList; | |
57 | import org.apache.arrow.vector.util.OversizedAllocationException; | |
58 | import org.apache.arrow.vector.util.TransferPair; | |
59 | ||
60 | /** | |
61 | * A list vector contains lists of a specific type of elements. Its structure contains 3 elements. | |
62 | * <ol> | |
63 | * <li>A validity buffer.</li> | |
64 | * <li> An offset buffer, that denotes lists boundaries. </li> | |
65 | * <li> A child data vector that contains the elements of lists. </li> | |
66 | * </ol> | |
67 | * The latter two are managed by its superclass. | |
68 | */ | |
69 | public class ListVector extends BaseRepeatedValueVector implements PromotableVector { | |
70 | ||
71 | public static ListVector empty(String name, BufferAllocator allocator) { | |
72 | return new ListVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null); | |
73 | } | |
74 | ||
75 | protected ArrowBuf validityBuffer; | |
76 | protected UnionListReader reader; | |
77 | private CallBack callBack; | |
78 | private final FieldType fieldType; | |
79 | private int validityAllocationSizeInBytes; | |
80 | ||
81 | /** | |
82 | * The maximum index that is actually set. | |
83 | */ | |
84 | private int lastSet; | |
85 | ||
86 | /** | |
87 | * Constructs a new instance. | |
88 | * | |
89 | * @param name The name of the instance. | |
90 | * @param allocator The allocator to use for allocating/reallocating buffers. | |
91 | * @param fieldType The type of this list. | |
92 | * @param callBack A schema change callback. | |
93 | */ | |
94 | public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { | |
95 | super(name, allocator, callBack); | |
96 | this.validityBuffer = allocator.getEmpty(); | |
97 | this.fieldType = checkNotNull(fieldType); | |
98 | this.callBack = callBack; | |
99 | this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); | |
100 | this.lastSet = -1; | |
101 | } | |
102 | ||
103 | @Override | |
104 | public void initializeChildrenFromFields(List<Field> children) { | |
105 | if (children.size() != 1) { | |
106 | throw new IllegalArgumentException("Lists have only one child. Found: " + children); | |
107 | } | |
108 | Field field = children.get(0); | |
109 | AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType()); | |
110 | if (!addOrGetVector.isCreated()) { | |
111 | throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector()); | |
112 | } | |
113 | ||
114 | addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); | |
115 | } | |
116 | ||
117 | @Override | |
118 | public void setInitialCapacity(int numRecords) { | |
119 | validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); | |
120 | super.setInitialCapacity(numRecords); | |
121 | } | |
122 | ||
123 | /** | |
124 | * Specialized version of setInitialCapacity() for ListVector. This is | |
125 | * used by some callers when they want to explicitly control and be | |
126 | * conservative about memory allocated for inner data vector. This is | |
127 | * very useful when we are working with memory constraints for a query | |
128 | * and have a fixed amount of memory reserved for the record batch. In | |
129 | * such cases, we are likely to face OOM or related problems when | |
130 | * we reserve memory for a record batch with value count x and | |
131 | * do setInitialCapacity(x) such that each vector allocates only | |
132 | * what is necessary and not the default amount but the multiplier | |
133 | * forces the memory requirement to go beyond what was needed. | |
134 | * | |
135 | * @param numRecords value count | |
136 | * @param density density of ListVector. Density is the average size of | |
137 | * list per position in the List vector. For example, a | |
138 | * density value of 10 implies each position in the list | |
139 | * vector has a list of 10 values. | |
140 | * A density value of 0.1 implies out of 10 positions in | |
141 | * the list vector, 1 position has a list of size 1 and | |
142 | * remaining positions are null (no lists) or empty lists. | |
143 | * This helps in tightly controlling the memory we provision | |
144 | * for inner data vector. | |
145 | */ | |
146 | @Override | |
147 | public void setInitialCapacity(int numRecords, double density) { | |
148 | validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); | |
149 | super.setInitialCapacity(numRecords, density); | |
150 | } | |
151 | ||
152 | /** | |
153 | * Get the density of this ListVector. | |
154 | * @return density | |
155 | */ | |
156 | public double getDensity() { | |
157 | if (valueCount == 0) { | |
158 | return 0.0D; | |
159 | } | |
160 | final int startOffset = offsetBuffer.getInt(0); | |
161 | final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH); | |
162 | final double totalListSize = endOffset - startOffset; | |
163 | return totalListSize / valueCount; | |
164 | } | |
165 | ||
166 | @Override | |
167 | public List<FieldVector> getChildrenFromFields() { | |
168 | return singletonList(getDataVector()); | |
169 | } | |
170 | ||
171 | /** | |
172 | * Load the buffers of this vector with provided source buffers. | |
173 | * The caller manages the source buffers and populates them before invoking | |
174 | * this method. | |
175 | * @param fieldNode the fieldNode indicating the value count | |
176 | * @param ownBuffers the buffers for this Field (own buffers only, children not included) | |
177 | */ | |
178 | @Override | |
179 | public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) { | |
180 | if (ownBuffers.size() != 2) { | |
181 | throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size()); | |
182 | } | |
183 | ||
184 | ArrowBuf bitBuffer = ownBuffers.get(0); | |
185 | ArrowBuf offBuffer = ownBuffers.get(1); | |
186 | ||
187 | validityBuffer.getReferenceManager().release(); | |
188 | validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); | |
189 | offsetBuffer.getReferenceManager().release(); | |
190 | offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); | |
191 | ||
192 | validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); | |
193 | offsetAllocationSizeInBytes = offsetBuffer.capacity(); | |
194 | ||
195 | lastSet = fieldNode.getLength() - 1; | |
196 | valueCount = fieldNode.getLength(); | |
197 | } | |
198 | ||
199 | /** | |
200 | * Get the buffers belonging to this vector. | |
201 | * @return the inner buffers. | |
202 | */ | |
203 | @Override | |
204 | public List<ArrowBuf> getFieldBuffers() { | |
205 | List<ArrowBuf> result = new ArrayList<>(2); | |
206 | setReaderAndWriterIndex(); | |
207 | result.add(validityBuffer); | |
208 | result.add(offsetBuffer); | |
209 | ||
210 | return result; | |
211 | } | |
212 | ||
213 | /** | |
214 | * Set the reader and writer indexes for the inner buffers. | |
215 | */ | |
216 | private void setReaderAndWriterIndex() { | |
217 | validityBuffer.readerIndex(0); | |
218 | offsetBuffer.readerIndex(0); | |
219 | if (valueCount == 0) { | |
220 | validityBuffer.writerIndex(0); | |
221 | offsetBuffer.writerIndex(0); | |
222 | } else { | |
223 | validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); | |
224 | offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); | |
225 | } | |
226 | } | |
227 | ||
228 | /** | |
229 | * Get the inner vectors. | |
230 | * | |
231 | * @deprecated This API will be removed as the current implementations no longer support inner vectors. | |
232 | * | |
233 | * @return the inner vectors for this field as defined by the TypeLayout | |
234 | */ | |
235 | @Deprecated | |
236 | @Override | |
237 | public List<BufferBacked> getFieldInnerVectors() { | |
238 | throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); | |
239 | } | |
240 | ||
241 | public UnionListWriter getWriter() { | |
242 | return new UnionListWriter(this); | |
243 | } | |
244 | ||
245 | /** | |
246 | * Same as {@link #allocateNewSafe()}. | |
247 | */ | |
248 | @Override | |
249 | public void allocateNew() throws OutOfMemoryException { | |
250 | if (!allocateNewSafe()) { | |
251 | throw new OutOfMemoryException("Failure while allocating memory"); | |
252 | } | |
253 | } | |
254 | ||
255 | /** | |
256 | * Allocate memory for the vector. We internally use a default value count | |
257 | * of 4096 to allocate memory for at least these many elements in the | |
258 | * vector. | |
259 | * | |
260 | * @return false if memory allocation fails, true otherwise. | |
261 | */ | |
262 | public boolean allocateNewSafe() { | |
263 | boolean success = false; | |
264 | try { | |
265 | /* we are doing a new allocation -- release the current buffers */ | |
266 | clear(); | |
267 | /* allocate validity buffer */ | |
268 | allocateValidityBuffer(validityAllocationSizeInBytes); | |
269 | /* allocate offset and data buffer */ | |
270 | success = super.allocateNewSafe(); | |
271 | } finally { | |
272 | if (!success) { | |
273 | clear(); | |
274 | return false; | |
275 | } | |
276 | } | |
277 | return true; | |
278 | } | |
279 | ||
280 | private void allocateValidityBuffer(final long size) { | |
281 | final int curSize = (int) size; | |
282 | validityBuffer = allocator.buffer(curSize); | |
283 | validityBuffer.readerIndex(0); | |
284 | validityAllocationSizeInBytes = curSize; | |
285 | validityBuffer.setZero(0, validityBuffer.capacity()); | |
286 | } | |
287 | ||
288 | /** | |
289 | * Resize the vector to increase the capacity. The internal behavior is to | |
290 | * double the current value capacity. | |
291 | */ | |
292 | @Override | |
293 | public void reAlloc() { | |
294 | /* reallocate the validity buffer */ | |
295 | reallocValidityBuffer(); | |
296 | /* reallocate the offset and data */ | |
297 | super.reAlloc(); | |
298 | } | |
299 | ||
300 | private void reallocValidityAndOffsetBuffers() { | |
301 | reallocOffsetBuffer(); | |
302 | reallocValidityBuffer(); | |
303 | } | |
304 | ||
305 | private void reallocValidityBuffer() { | |
306 | final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); | |
307 | long newAllocationSize = currentBufferCapacity * 2; | |
308 | if (newAllocationSize == 0) { | |
309 | if (validityAllocationSizeInBytes > 0) { | |
310 | newAllocationSize = validityAllocationSizeInBytes; | |
311 | } else { | |
312 | newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2; | |
313 | } | |
314 | } | |
315 | newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); | |
316 | assert newAllocationSize >= 1; | |
317 | ||
318 | if (newAllocationSize > MAX_ALLOCATION_SIZE) { | |
319 | throw new OversizedAllocationException("Unable to expand the buffer"); | |
320 | } | |
321 | ||
322 | final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize); | |
323 | newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); | |
324 | newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); | |
325 | validityBuffer.getReferenceManager().release(1); | |
326 | validityBuffer = newBuf; | |
327 | validityAllocationSizeInBytes = (int) newAllocationSize; | |
328 | } | |
329 | ||
330 | /** | |
331 | * Same as {@link #copyFrom(int, int, ValueVector)} except that | |
332 | * it handles the case when the capacity of the vector needs to be expanded | |
333 | * before copy. | |
334 | * @param inIndex position to copy from in source vector | |
335 | * @param outIndex position to copy to in this vector | |
336 | * @param from source vector | |
337 | */ | |
338 | @Override | |
339 | public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { | |
340 | copyFrom(inIndex, outIndex, from); | |
341 | } | |
342 | ||
343 | /** | |
344 | * Copy a cell value from a particular index in source vector to a particular | |
345 | * position in this vector. | |
346 | * @param inIndex position to copy from in source vector | |
347 | * @param outIndex position to copy to in this vector | |
348 | * @param from source vector | |
349 | */ | |
350 | @Override | |
351 | public void copyFrom(int inIndex, int outIndex, ValueVector from) { | |
352 | Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); | |
353 | FieldReader in = from.getReader(); | |
354 | in.setPosition(inIndex); | |
355 | FieldWriter out = getWriter(); | |
356 | out.setPosition(outIndex); | |
357 | ComplexCopier.copy(in, out); | |
358 | } | |
359 | ||
360 | /** | |
361 | * Get the inner data vector for this list vector. | |
362 | * @return data vector | |
363 | */ | |
364 | @Override | |
365 | public FieldVector getDataVector() { | |
366 | return vector; | |
367 | } | |
368 | ||
369 | @Override | |
370 | public TransferPair getTransferPair(String ref, BufferAllocator allocator) { | |
371 | return getTransferPair(ref, allocator, null); | |
372 | } | |
373 | ||
374 | @Override | |
375 | public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { | |
376 | return new TransferImpl(ref, allocator, callBack); | |
377 | } | |
378 | ||
379 | @Override | |
380 | public TransferPair makeTransferPair(ValueVector target) { | |
381 | return new TransferImpl((ListVector) target); | |
382 | } | |
383 | ||
384 | @Override | |
385 | public long getValidityBufferAddress() { | |
386 | return (validityBuffer.memoryAddress()); | |
387 | } | |
388 | ||
389 | @Override | |
390 | public long getDataBufferAddress() { | |
391 | throw new UnsupportedOperationException(); | |
392 | } | |
393 | ||
394 | @Override | |
395 | public long getOffsetBufferAddress() { | |
396 | return (offsetBuffer.memoryAddress()); | |
397 | } | |
398 | ||
399 | @Override | |
400 | public ArrowBuf getValidityBuffer() { | |
401 | return validityBuffer; | |
402 | } | |
403 | ||
404 | @Override | |
405 | public ArrowBuf getDataBuffer() { | |
406 | throw new UnsupportedOperationException(); | |
407 | } | |
408 | ||
409 | @Override | |
410 | public ArrowBuf getOffsetBuffer() { | |
411 | return offsetBuffer; | |
412 | } | |
413 | ||
414 | @Override | |
415 | public int hashCode(int index) { | |
416 | return hashCode(index, null); | |
417 | } | |
418 | ||
419 | @Override | |
420 | public int hashCode(int index, ArrowBufHasher hasher) { | |
421 | if (isSet(index) == 0) { | |
422 | return ArrowBufPointer.NULL_HASH_CODE; | |
423 | } | |
424 | int hash = 0; | |
425 | final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); | |
426 | final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); | |
427 | for (int i = start; i < end; i++) { | |
428 | hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); | |
429 | } | |
430 | return hash; | |
431 | } | |
432 | ||
433 | @Override | |
434 | public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) { | |
435 | return visitor.visit(this, value); | |
436 | } | |
437 | ||
438 | private class TransferImpl implements TransferPair { | |
439 | ||
440 | ListVector to; | |
441 | TransferPair dataTransferPair; | |
442 | ||
443 | public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { | |
444 | this(new ListVector(name, allocator, fieldType, callBack)); | |
445 | } | |
446 | ||
447 | public TransferImpl(ListVector to) { | |
448 | this.to = to; | |
449 | to.addOrGetVector(vector.getField().getFieldType()); | |
450 | if (to.getDataVector() instanceof ZeroVector) { | |
451 | to.addOrGetVector(vector.getField().getFieldType()); | |
452 | } | |
453 | dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); | |
454 | } | |
455 | ||
456 | /** | |
457 | * Transfer this vector'data to another vector. The memory associated | |
458 | * with this vector is transferred to the allocator of target vector | |
459 | * for accounting and management purposes. | |
460 | */ | |
461 | @Override | |
462 | public void transfer() { | |
463 | to.clear(); | |
464 | dataTransferPair.transfer(); | |
465 | to.validityBuffer = transferBuffer(validityBuffer, to.allocator); | |
466 | to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); | |
467 | to.lastSet = lastSet; | |
468 | if (valueCount > 0) { | |
469 | to.setValueCount(valueCount); | |
470 | } | |
471 | clear(); | |
472 | } | |
473 | ||
474 | /** | |
475 | * Slice this vector at desired index and length and transfer the | |
476 | * corresponding data to the target vector. | |
477 | * @param startIndex start position of the split in source vector. | |
478 | * @param length length of the split. | |
479 | */ | |
480 | @Override | |
481 | public void splitAndTransfer(int startIndex, int length) { | |
482 | Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, | |
483 | "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); | |
484 | final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); | |
485 | final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; | |
486 | to.clear(); | |
487 | to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); | |
488 | /* splitAndTransfer offset buffer */ | |
489 | for (int i = 0; i < length + 1; i++) { | |
490 | final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; | |
491 | to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); | |
492 | } | |
493 | /* splitAndTransfer validity buffer */ | |
494 | splitAndTransferValidityBuffer(startIndex, length, to); | |
495 | /* splitAndTransfer data buffer */ | |
496 | dataTransferPair.splitAndTransfer(startPoint, sliceLength); | |
497 | to.lastSet = length - 1; | |
498 | to.setValueCount(length); | |
499 | } | |
500 | ||
501 | /* | |
502 | * transfer the validity. | |
503 | */ | |
504 | private void splitAndTransferValidityBuffer(int startIndex, int length, ListVector target) { | |
505 | int firstByteSource = BitVectorHelper.byteIndex(startIndex); | |
506 | int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); | |
507 | int byteSizeTarget = getValidityBufferSizeFromCount(length); | |
508 | int offset = startIndex % 8; | |
509 | ||
510 | if (length > 0) { | |
511 | if (offset == 0) { | |
512 | // slice | |
513 | if (target.validityBuffer != null) { | |
514 | target.validityBuffer.getReferenceManager().release(); | |
515 | } | |
516 | target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); | |
517 | target.validityBuffer.getReferenceManager().retain(1); | |
518 | } else { | |
519 | /* Copy data | |
520 | * When the first bit starts from the middle of a byte (offset != 0), | |
521 | * copy data from src BitVector. | |
522 | * Each byte in the target is composed by a part in i-th byte, | |
523 | * another part in (i+1)-th byte. | |
524 | */ | |
525 | target.allocateValidityBuffer(byteSizeTarget); | |
526 | ||
527 | for (int i = 0; i < byteSizeTarget - 1; i++) { | |
528 | byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); | |
529 | byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset); | |
530 | ||
531 | target.validityBuffer.setByte(i, (b1 + b2)); | |
532 | } | |
533 | ||
534 | /* Copying the last piece is done in the following manner: | |
535 | * if the source vector has 1 or more bytes remaining, we copy | |
536 | * the last piece as a byte formed by shifting data | |
537 | * from the current byte and the next byte. | |
538 | * | |
539 | * if the source vector has no more bytes remaining | |
540 | * (we are at the last byte), we copy the last piece as a byte | |
541 | * by shifting data from the current byte. | |
542 | */ | |
543 | if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { | |
544 | byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, | |
545 | firstByteSource + byteSizeTarget - 1, offset); | |
546 | byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, | |
547 | firstByteSource + byteSizeTarget, offset); | |
548 | ||
549 | target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); | |
550 | } else { | |
551 | byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, | |
552 | firstByteSource + byteSizeTarget - 1, offset); | |
553 | target.validityBuffer.setByte(byteSizeTarget - 1, b1); | |
554 | } | |
555 | } | |
556 | } | |
557 | } | |
558 | ||
559 | @Override | |
560 | public ValueVector getTo() { | |
561 | return to; | |
562 | } | |
563 | ||
564 | @Override | |
565 | public void copyValueSafe(int from, int to) { | |
566 | this.to.copyFrom(from, to, ListVector.this); | |
567 | } | |
568 | } | |
569 | ||
570 | @Override | |
571 | public UnionListReader getReader() { | |
572 | if (reader == null) { | |
573 | reader = new UnionListReader(this); | |
574 | } | |
575 | return reader; | |
576 | } | |
577 | ||
578 | /** Initialize the child data vector to field type. */ | |
579 | public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) { | |
580 | AddOrGetResult<T> result = super.addOrGetVector(fieldType); | |
581 | invalidateReader(); | |
582 | return result; | |
583 | } | |
584 | ||
585 | /** | |
586 | * Get the size (number of bytes) of underlying buffers used by this | |
587 | * vector. | |
588 | * @return size of underlying buffers. | |
589 | */ | |
590 | @Override | |
591 | public int getBufferSize() { | |
592 | if (valueCount == 0) { | |
593 | return 0; | |
594 | } | |
595 | final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; | |
596 | final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); | |
597 | return offsetBufferSize + validityBufferSize + vector.getBufferSize(); | |
598 | } | |
599 | ||
600 | @Override | |
601 | public int getBufferSizeFor(int valueCount) { | |
602 | if (valueCount == 0) { | |
603 | return 0; | |
604 | } | |
605 | final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); | |
606 | ||
607 | return super.getBufferSizeFor(valueCount) + validityBufferSize; | |
608 | } | |
609 | ||
610 | @Override | |
611 | public Field getField() { | |
612 | return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField())); | |
613 | } | |
614 | ||
615 | @Override | |
616 | public MinorType getMinorType() { | |
617 | return MinorType.LIST; | |
618 | } | |
619 | ||
620 | @Override | |
621 | public void clear() { | |
622 | super.clear(); | |
623 | validityBuffer = releaseBuffer(validityBuffer); | |
624 | lastSet = -1; | |
625 | } | |
626 | ||
627 | @Override | |
628 | public void reset() { | |
629 | super.reset(); | |
630 | validityBuffer.setZero(0, validityBuffer.capacity()); | |
631 | lastSet = -1; | |
632 | } | |
633 | ||
634 | /** | |
635 | * Return the underlying buffers associated with this vector. Note that this doesn't | |
636 | * impact the reference counts for this buffer so it only should be used for in-context | |
637 | * access. Also note that this buffer changes regularly thus | |
638 | * external classes shouldn't hold a reference to it (unless they change it). | |
639 | * | |
640 | * @param clear Whether to clear vector before returning; the buffers will still be refcounted | |
641 | * but the returned array will be the only reference to them | |
642 | * @return The underlying {@link ArrowBuf buffers} that is used by this | |
643 | * vector instance. | |
644 | */ | |
645 | @Override | |
646 | public ArrowBuf[] getBuffers(boolean clear) { | |
647 | setReaderAndWriterIndex(); | |
648 | final ArrowBuf[] buffers; | |
649 | if (getBufferSize() == 0) { | |
650 | buffers = new ArrowBuf[0]; | |
651 | } else { | |
652 | List<ArrowBuf> list = new ArrayList<>(); | |
653 | list.add(offsetBuffer); | |
654 | list.add(validityBuffer); | |
655 | list.addAll(Arrays.asList(vector.getBuffers(false))); | |
656 | buffers = list.toArray(new ArrowBuf[list.size()]); | |
657 | } | |
658 | if (clear) { | |
659 | for (ArrowBuf buffer : buffers) { | |
660 | buffer.getReferenceManager().retain(); | |
661 | } | |
662 | clear(); | |
663 | } | |
664 | return buffers; | |
665 | } | |
666 | ||
667 | @Override | |
668 | public UnionVector promoteToUnion() { | |
669 | UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); | |
670 | replaceDataVector(vector); | |
671 | invalidateReader(); | |
672 | if (callBack != null) { | |
673 | callBack.doWork(); | |
674 | } | |
675 | return vector; | |
676 | } | |
677 | ||
678 | protected void invalidateReader() { | |
679 | reader = null; | |
680 | } | |
681 | ||
682 | /** | |
683 | * Get the element in the list vector at a particular index. | |
684 | * @param index position of the element | |
685 | * @return Object at given position | |
686 | */ | |
687 | @Override | |
688 | public List<?> getObject(int index) { | |
689 | if (isSet(index) == 0) { | |
690 | return null; | |
691 | } | |
692 | final List<Object> vals = new JsonStringArrayList<>(); | |
693 | final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); | |
694 | final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); | |
695 | final ValueVector vv = getDataVector(); | |
696 | for (int i = start; i < end; i++) { | |
697 | vals.add(vv.getObject(i)); | |
698 | } | |
699 | ||
700 | return vals; | |
701 | } | |
702 | ||
703 | /** | |
704 | * Check if element at given index is null. | |
705 | * | |
706 | * @param index position of element | |
707 | * @return true if element at given index is null, false otherwise | |
708 | */ | |
709 | @Override | |
710 | public boolean isNull(int index) { | |
711 | return (isSet(index) == 0); | |
712 | } | |
713 | ||
714 | /** | |
715 | * Check if element at given index is empty list. | |
716 | * @param index position of element | |
717 | * @return true if element at given index is empty list or NULL, false otherwise | |
718 | */ | |
719 | @Override | |
720 | public boolean isEmpty(int index) { | |
721 | if (isNull(index)) { | |
722 | return true; | |
723 | } else { | |
724 | final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); | |
725 | final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); | |
726 | return start == end; | |
727 | } | |
728 | } | |
729 | ||
730 | /** | |
731 | * Same as {@link #isNull(int)}. | |
732 | * | |
733 | * @param index position of element | |
734 | * @return 1 if element at given index is not null, 0 otherwise | |
735 | */ | |
736 | public int isSet(int index) { | |
737 | final int byteIndex = index >> 3; | |
738 | final byte b = validityBuffer.getByte(byteIndex); | |
739 | final int bitIndex = index & 7; | |
740 | return (b >> bitIndex) & 0x01; | |
741 | } | |
742 | ||
743 | /** | |
744 | * Get the number of elements that are null in the vector. | |
745 | * | |
746 | * @return the number of null elements. | |
747 | */ | |
748 | @Override | |
749 | public int getNullCount() { | |
750 | return BitVectorHelper.getNullCount(validityBuffer, valueCount); | |
751 | } | |
752 | ||
753 | /** | |
754 | * Get the current value capacity for the vector. | |
755 | * @return number of elements that vector can hold. | |
756 | */ | |
757 | @Override | |
758 | public int getValueCapacity() { | |
759 | return getValidityAndOffsetValueCapacity(); | |
760 | } | |
761 | ||
762 | private int getValidityAndOffsetValueCapacity() { | |
763 | final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); | |
764 | return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); | |
765 | } | |
766 | ||
767 | private int getValidityBufferValueCapacity() { | |
768 | return capAtMaxInt(validityBuffer.capacity() * 8); | |
769 | } | |
770 | ||
771 | /** | |
772 | * Sets the list at index to be not-null. Reallocates validity buffer if index | |
773 | * is larger than current capacity. | |
774 | */ | |
775 | public void setNotNull(int index) { | |
776 | while (index >= getValidityAndOffsetValueCapacity()) { | |
777 | reallocValidityAndOffsetBuffers(); | |
778 | } | |
779 | BitVectorHelper.setBit(validityBuffer, index); | |
780 | lastSet = index; | |
781 | } | |
782 | ||
783 | /** | |
784 | * Sets list at index to be null. | |
785 | * @param index position in vector | |
786 | */ | |
787 | public void setNull(int index) { | |
788 | while (index >= getValidityAndOffsetValueCapacity()) { | |
789 | reallocValidityAndOffsetBuffers(); | |
790 | } | |
791 | if (lastSet >= index) { | |
792 | lastSet = index - 1; | |
793 | } | |
794 | for (int i = lastSet + 1; i <= index; i++) { | |
795 | final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); | |
796 | offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset); | |
797 | } | |
798 | BitVectorHelper.unsetBit(validityBuffer, index); | |
799 | } | |
800 | ||
801 | /** | |
802 | * Start a new value in the list vector. | |
803 | * | |
804 | * @param index index of the value to start | |
805 | */ | |
806 | @Override | |
807 | public int startNewValue(int index) { | |
808 | while (index >= getValidityAndOffsetValueCapacity()) { | |
809 | reallocValidityAndOffsetBuffers(); | |
810 | } | |
811 | if (lastSet >= index) { | |
812 | lastSet = index - 1; | |
813 | } | |
814 | for (int i = lastSet + 1; i <= index; i++) { | |
815 | final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); | |
816 | offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset); | |
817 | } | |
818 | BitVectorHelper.setBit(validityBuffer, index); | |
819 | lastSet = index; | |
820 | return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH); | |
821 | } | |
822 | ||
823 | /** | |
824 | * End the current value. | |
825 | * | |
826 | * @param index index of the value to end | |
827 | * @param size number of elements in the list that was written | |
828 | */ | |
829 | public void endValue(int index, int size) { | |
830 | final int currentOffset = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); | |
831 | offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, currentOffset + size); | |
832 | } | |
833 | ||
834 | /** | |
835 | * Sets the value count for the vector. | |
836 | * | |
837 | * @param valueCount value count | |
838 | */ | |
839 | @Override | |
840 | public void setValueCount(int valueCount) { | |
841 | this.valueCount = valueCount; | |
842 | if (valueCount > 0) { | |
843 | while (valueCount > getValidityAndOffsetValueCapacity()) { | |
844 | /* check if validity and offset buffers need to be re-allocated */ | |
845 | reallocValidityAndOffsetBuffers(); | |
846 | } | |
847 | for (int i = lastSet + 1; i < valueCount; i++) { | |
848 | /* fill the holes with offsets */ | |
849 | final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); | |
850 | offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset); | |
851 | } | |
852 | } | |
853 | /* valueCount for the data vector is the current end offset */ | |
854 | final int childValueCount = (valueCount == 0) ? 0 : | |
855 | offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH); | |
856 | /* set the value count of data vector and this will take care of | |
857 | * checking whether data buffer needs to be reallocated. | |
858 | */ | |
859 | vector.setValueCount(childValueCount); | |
860 | } | |
861 | ||
862 | public void setLastSet(int value) { | |
863 | lastSet = value; | |
864 | } | |
865 | ||
866 | public int getLastSet() { | |
867 | return lastSet; | |
868 | } | |
869 | ||
870 | @Override | |
871 | public int getElementStartIndex(int index) { | |
872 | return offsetBuffer.getInt(index * OFFSET_WIDTH); | |
873 | } | |
874 | ||
875 | @Override | |
876 | public int getElementEndIndex(int index) { | |
877 | return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); | |
878 | } | |
879 | } |