]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / vector / src / main / java / org / apache / arrow / vector / complex / StructVector.java
CommitLineData
1d09f67e
TL
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.arrow.vector.complex;
19
20import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
21import static org.apache.arrow.util.Preconditions.checkNotNull;
22
23import java.util.ArrayList;
24import java.util.Arrays;
25import java.util.List;
26import java.util.Map;
27
28import org.apache.arrow.memory.ArrowBuf;
29import org.apache.arrow.memory.BufferAllocator;
30import org.apache.arrow.memory.util.ArrowBufPointer;
31import org.apache.arrow.memory.util.CommonUtil;
32import org.apache.arrow.memory.util.hash.ArrowBufHasher;
33import org.apache.arrow.util.Preconditions;
34import org.apache.arrow.vector.BaseValueVector;
35import org.apache.arrow.vector.BitVectorHelper;
36import org.apache.arrow.vector.BufferBacked;
37import org.apache.arrow.vector.FieldVector;
38import org.apache.arrow.vector.ValueVector;
39import org.apache.arrow.vector.complex.impl.NullableStructReaderImpl;
40import org.apache.arrow.vector.complex.impl.NullableStructWriter;
41import org.apache.arrow.vector.holders.ComplexHolder;
42import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
43import org.apache.arrow.vector.types.pojo.ArrowType;
44import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
45import org.apache.arrow.vector.types.pojo.FieldType;
46import org.apache.arrow.vector.util.CallBack;
47import org.apache.arrow.vector.util.OversizedAllocationException;
48import org.apache.arrow.vector.util.TransferPair;
49
50/**
51 * A Struct vector consists of nullability/validity buffer and children vectors
52 * that make up the struct's fields. The children vectors are handled by the
53 * parent class.
54 */
55public class StructVector extends NonNullableStructVector implements FieldVector {
56
57 public static StructVector empty(String name, BufferAllocator allocator) {
58 FieldType fieldType = FieldType.nullable(Struct.INSTANCE);
59 return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false);
60 }
61
62 public static StructVector emptyWithDuplicates(String name, BufferAllocator allocator) {
63 FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
64 return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true);
65 }
66
67 private final NullableStructReaderImpl reader = new NullableStructReaderImpl(this);
68 private final NullableStructWriter writer = new NullableStructWriter(this);
69
70 protected ArrowBuf validityBuffer;
71 private int validityAllocationSizeInBytes;
72
73 /**
74 * Constructs a new instance.
75 *
76 * @param name The name of the instance.
77 * @param allocator The allocator to use to allocating/reallocating buffers.
78 * @param fieldType The type of this list.
79 * @param callBack A schema change callback.
80 */
81 public StructVector(String name,
82 BufferAllocator allocator,
83 FieldType fieldType,
84 CallBack callBack) {
85 super(name,
86 checkNotNull(allocator),
87 fieldType,
88 callBack);
89 this.validityBuffer = allocator.getEmpty();
90 this.validityAllocationSizeInBytes =
91 BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
92 }
93
94 /**
95 * Constructs a new instance.
96 *
97 * @param name The name of the instance.
98 * @param allocator The allocator to use to allocating/reallocating buffers.
99 * @param fieldType The type of this list.
100 * @param callBack A schema change callback.
101 * @param conflictPolicy policy to determine how duplicate names are handled.
102 * @param allowConflictPolicyChanges wether duplicate names are allowed at all.
103 */
104 public StructVector(String name,
105 BufferAllocator allocator,
106 FieldType fieldType,
107 CallBack callBack,
108 ConflictPolicy conflictPolicy,
109 boolean allowConflictPolicyChanges) {
110 super(name, checkNotNull(allocator), fieldType, callBack, conflictPolicy, allowConflictPolicyChanges);
111 this.validityBuffer = allocator.getEmpty();
112 this.validityAllocationSizeInBytes =
113 BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
114 }
115
116 @Override
117 public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
118 if (ownBuffers.size() != 1) {
119 throw new IllegalArgumentException("Illegal buffer count, expected " + 1 + ", got: " + ownBuffers.size());
120 }
121
122 ArrowBuf bitBuffer = ownBuffers.get(0);
123
124 validityBuffer.getReferenceManager().release();
125 validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
126 valueCount = fieldNode.getLength();
127 validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
128 }
129
130 @Override
131 public List<ArrowBuf> getFieldBuffers() {
132 List<ArrowBuf> result = new ArrayList<>(1);
133 setReaderAndWriterIndex();
134 result.add(validityBuffer);
135
136 return result;
137 }
138
139 private void setReaderAndWriterIndex() {
140 validityBuffer.readerIndex(0);
141 validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSize(valueCount));
142 }
143
144 /**
145 * Get the inner vectors.
146 *
147 * @deprecated This API will be removed as the current implementations no longer support inner vectors.
148 *
149 * @return the inner vectors for this field as defined by the TypeLayout
150 */
151 @Deprecated
152 @Override
153 public List<BufferBacked> getFieldInnerVectors() {
154 throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
155 }
156
157 @Override
158 public NullableStructReaderImpl getReader() {
159 return reader;
160 }
161
162 public NullableStructWriter getWriter() {
163 return writer;
164 }
165
166 @Override
167 public TransferPair getTransferPair(BufferAllocator allocator) {
168 return new NullableStructTransferPair(this, new StructVector(name,
169 allocator,
170 fieldType,
171 null,
172 getConflictPolicy(),
173 allowConflictPolicyChanges), false);
174 }
175
176 @Override
177 public TransferPair makeTransferPair(ValueVector to) {
178 return new NullableStructTransferPair(this, (StructVector) to, false);
179 }
180
181 @Override
182 public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
183 return new NullableStructTransferPair(this, new StructVector(ref,
184 allocator,
185 fieldType,
186 null,
187 getConflictPolicy(),
188 allowConflictPolicyChanges), false);
189 }
190
191 @Override
192 public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
193 return new NullableStructTransferPair(this, new StructVector(ref,
194 allocator,
195 fieldType,
196 callBack,
197 getConflictPolicy(),
198 allowConflictPolicyChanges), false);
199 }
200
201 /**
202 * {@link TransferPair} for this (nullable) {@link StructVector}.
203 */
204 protected class NullableStructTransferPair extends StructTransferPair {
205
206 private StructVector target;
207
208 protected NullableStructTransferPair(StructVector from, StructVector to, boolean allocate) {
209 super(from, to, allocate);
210 this.target = to;
211 }
212
213 @Override
214 public void transfer() {
215 target.clear();
216 target.validityBuffer = BaseValueVector.transferBuffer(validityBuffer, target.allocator);
217 super.transfer();
218 clear();
219 }
220
221 @Override
222 public void copyValueSafe(int fromIndex, int toIndex) {
223 while (toIndex >= target.getValidityBufferValueCapacity()) {
224 target.reallocValidityBuffer();
225 }
226 BitVectorHelper.setValidityBit(target.validityBuffer, toIndex, isSet(fromIndex));
227 super.copyValueSafe(fromIndex, toIndex);
228 }
229
230 @Override
231 public void splitAndTransfer(int startIndex, int length) {
232 Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
233 "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
234 target.clear();
235 splitAndTransferValidityBuffer(startIndex, length, target);
236 super.splitAndTransfer(startIndex, length);
237 }
238 }
239
240 /*
241 * transfer the validity.
242 */
243 private void splitAndTransferValidityBuffer(int startIndex, int length, StructVector target) {
244 int firstByteSource = BitVectorHelper.byteIndex(startIndex);
245 int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
246 int byteSizeTarget = BitVectorHelper.getValidityBufferSize(length);
247 int offset = startIndex % 8;
248
249 if (length > 0) {
250 if (offset == 0) {
251 // slice
252 if (target.validityBuffer != null) {
253 target.validityBuffer.getReferenceManager().release();
254 }
255 target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
256 target.validityBuffer.getReferenceManager().retain(1);
257 } else {
258 /* Copy data
259 * When the first bit starts from the middle of a byte (offset != 0),
260 * copy data from src BitVector.
261 * Each byte in the target is composed by a part in i-th byte,
262 * another part in (i+1)-th byte.
263 */
264 target.allocateValidityBuffer(byteSizeTarget);
265
266 for (int i = 0; i < byteSizeTarget - 1; i++) {
267 byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
268 byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
269
270 target.validityBuffer.setByte(i, (b1 + b2));
271 }
272
273 /* Copying the last piece is done in the following manner:
274 * if the source vector has 1 or more bytes remaining, we copy
275 * the last piece as a byte formed by shifting data
276 * from the current byte and the next byte.
277 *
278 * if the source vector has no more bytes remaining
279 * (we are at the last byte), we copy the last piece as a byte
280 * by shifting data from the current byte.
281 */
282 if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
283 byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
284 firstByteSource + byteSizeTarget - 1, offset);
285 byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
286 firstByteSource + byteSizeTarget, offset);
287
288 target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
289 } else {
290 byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
291 firstByteSource + byteSizeTarget - 1, offset);
292 target.validityBuffer.setByte(byteSizeTarget - 1, b1);
293 }
294 }
295 }
296 }
297
298 /**
299 * Get the value capacity of the internal validity buffer.
300 * @return number of elements that validity buffer can hold
301 */
302 private int getValidityBufferValueCapacity() {
303 return checkedCastToInt(validityBuffer.capacity() * 8);
304 }
305
306 /**
307 * Get the current value capacity for the vector.
308 * @return number of elements that vector can hold.
309 */
310 @Override
311 public int getValueCapacity() {
312 return Math.min(getValidityBufferValueCapacity(),
313 super.getValueCapacity());
314 }
315
316 /**
317 * Return the underlying buffers associated with this vector. Note that this doesn't
318 * impact the reference counts for this buffer so it only should be used for in-context
319 * access. Also note that this buffer changes regularly thus
320 * external classes shouldn't hold a reference to it (unless they change it).
321 *
322 * @param clear Whether to clear vector before returning; the buffers will still be refcounted
323 * but the returned array will be the only reference to them
324 * @return The underlying {@link ArrowBuf buffers} that is used by this
325 * vector instance.
326 */
327 @Override
328 public ArrowBuf[] getBuffers(boolean clear) {
329 setReaderAndWriterIndex();
330 final ArrowBuf[] buffers;
331 if (getBufferSize() == 0) {
332 buffers = new ArrowBuf[0];
333 } else {
334 List<ArrowBuf> list = new ArrayList<>();
335 list.add(validityBuffer);
336 list.addAll(Arrays.asList(super.getBuffers(false)));
337 buffers = list.toArray(new ArrowBuf[list.size()]);
338 }
339 if (clear) {
340 for (ArrowBuf buffer : buffers) {
341 buffer.getReferenceManager().retain();
342 }
343 clear();
344 }
345
346 return buffers;
347 }
348
349 /**
350 * Close the vector and release the associated buffers.
351 */
352 @Override
353 public void close() {
354 clearValidityBuffer();
355 super.close();
356 }
357
358 /**
359 * Same as {@link #close()}.
360 */
361 @Override
362 public void clear() {
363 clearValidityBuffer();
364 super.clear();
365 }
366
367 /**
368 * Reset this vector to empty, does not release buffers.
369 */
370 @Override
371 public void reset() {
372 super.reset();
373 validityBuffer.setZero(0, validityBuffer.capacity());
374 }
375
376 /**
377 * Release the validity buffer.
378 */
379 private void clearValidityBuffer() {
380 validityBuffer.getReferenceManager().release();
381 validityBuffer = allocator.getEmpty();
382 }
383
384 /**
385 * Get the size (number of bytes) of underlying buffers used by this vector.
386 *
387 * @return size of underlying buffers.
388 */
389 @Override
390 public int getBufferSize() {
391 if (valueCount == 0) {
392 return 0;
393 }
394 return super.getBufferSize() +
395 BitVectorHelper.getValidityBufferSize(valueCount);
396 }
397
398 /**
399 * Get the potential buffer size for a particular number of records.
400 *
401 * @param valueCount desired number of elements in the vector
402 * @return estimated size of underlying buffers if the vector holds
403 * a given number of elements
404 */
405 @Override
406 public int getBufferSizeFor(final int valueCount) {
407 if (valueCount == 0) {
408 return 0;
409 }
410 return super.getBufferSizeFor(valueCount) +
411 BitVectorHelper.getValidityBufferSize(valueCount);
412 }
413
414 @Override
415 public void setInitialCapacity(int numRecords) {
416 validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
417 super.setInitialCapacity(numRecords);
418 }
419
420 @Override
421 public void setInitialCapacity(int numRecords, double density) {
422 validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
423 super.setInitialCapacity(numRecords, density);
424 }
425
426 @Override
427 public boolean allocateNewSafe() {
428 /* Boolean to keep track if all the memory allocations were successful
429 * Used in the case of composite vectors when we need to allocate multiple
430 * buffers for multiple vectors. If one of the allocations failed we need to
431 * clear all the memory that we allocated
432 */
433 boolean success = false;
434 try {
435 clear();
436 allocateValidityBuffer(validityAllocationSizeInBytes);
437 success = super.allocateNewSafe();
438 } finally {
439 if (!success) {
440 clear();
441 return false;
442 }
443 }
444 return true;
445 }
446
447 private void allocateValidityBuffer(final long size) {
448 final int curSize = (int) size;
449 validityBuffer = allocator.buffer(curSize);
450 validityBuffer.readerIndex(0);
451 validityAllocationSizeInBytes = curSize;
452 validityBuffer.setZero(0, validityBuffer.capacity());
453 }
454
455 @Override
456 public void reAlloc() {
457 /* reallocate the validity buffer */
458 reallocValidityBuffer();
459 super.reAlloc();
460 }
461
462 private void reallocValidityBuffer() {
463 final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
464 long newAllocationSize = currentBufferCapacity * 2;
465 if (newAllocationSize == 0) {
466 if (validityAllocationSizeInBytes > 0) {
467 newAllocationSize = validityAllocationSizeInBytes;
468 } else {
469 newAllocationSize = BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2;
470 }
471 }
472 newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
473 assert newAllocationSize >= 1;
474
475 if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
476 throw new OversizedAllocationException("Unable to expand the buffer");
477 }
478
479 final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
480 newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
481 newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
482 validityBuffer.getReferenceManager().release(1);
483 validityBuffer = newBuf;
484 validityAllocationSizeInBytes = (int) newAllocationSize;
485 }
486
487 @Override
488 public long getValidityBufferAddress() {
489 return validityBuffer.memoryAddress();
490 }
491
492 @Override
493 public long getDataBufferAddress() {
494 throw new UnsupportedOperationException();
495 }
496
497 @Override
498 public long getOffsetBufferAddress() {
499 throw new UnsupportedOperationException();
500 }
501
502 @Override
503 public ArrowBuf getValidityBuffer() {
504 return validityBuffer;
505 }
506
507 @Override
508 public ArrowBuf getDataBuffer() {
509 throw new UnsupportedOperationException();
510 }
511
512 @Override
513 public ArrowBuf getOffsetBuffer() {
514 throw new UnsupportedOperationException();
515 }
516
517 @Override
518 public Map<String, ?> getObject(int index) {
519 if (isSet(index) == 0) {
520 return null;
521 } else {
522 return super.getObject(index);
523 }
524 }
525
526 @Override
527 public int hashCode(int index) {
528 return hashCode(index, null);
529 }
530
531 @Override
532 public int hashCode(int index, ArrowBufHasher hasher) {
533 if (isSet(index) == 0) {
534 return ArrowBufPointer.NULL_HASH_CODE;
535 } else {
536 return super.hashCode(index, hasher);
537 }
538 }
539
540 @Override
541 public void get(int index, ComplexHolder holder) {
542 holder.isSet = isSet(index);
543 if (holder.isSet == 0) {
544 holder.reader = null;
545 return;
546 }
547 super.get(index, holder);
548 }
549
550 /**
551 * Return the number of null values in the vector.
552 */
553 public int getNullCount() {
554 return BitVectorHelper.getNullCount(validityBuffer, valueCount);
555 }
556
557 /**
558 * Returns true if the value at the provided index is null.
559 */
560 public boolean isNull(int index) {
561 return isSet(index) == 0;
562 }
563
564 /**
565 * Returns true the value at the given index is set (i.e. not null).
566 */
567 public int isSet(int index) {
568 final int byteIndex = index >> 3;
569 final byte b = validityBuffer.getByte(byteIndex);
570 final int bitIndex = index & 7;
571 return (b >> bitIndex) & 0x01;
572 }
573
574 /**
575 * Marks the value at index as being set. Reallocates the validity buffer
576 * if index is larger than current capacity.
577 */
578 public void setIndexDefined(int index) {
579 while (index >= getValidityBufferValueCapacity()) {
580 /* realloc the inner buffers if needed */
581 reallocValidityBuffer();
582 }
583 BitVectorHelper.setBit(validityBuffer, index);
584 }
585
586 /**
587 * Marks the value at index as null/not set.
588 */
589 public void setNull(int index) {
590 while (index >= getValidityBufferValueCapacity()) {
591 /* realloc the inner buffers if needed */
592 reallocValidityBuffer();
593 }
594 BitVectorHelper.unsetBit(validityBuffer, index);
595 }
596
597 @Override
598 public void setValueCount(int valueCount) {
599 Preconditions.checkArgument(valueCount >= 0);
600 while (valueCount > getValidityBufferValueCapacity()) {
601 /* realloc the inner buffers if needed */
602 reallocValidityBuffer();
603 }
604 super.setValueCount(valueCount);
605 this.valueCount = valueCount;
606 }
607
608}