]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.vector; | |
19 | ||
20 | import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; | |
21 | ||
22 | import java.nio.ByteBuffer; | |
23 | import java.util.ArrayList; | |
24 | import java.util.Collections; | |
25 | import java.util.List; | |
26 | ||
27 | import org.apache.arrow.memory.ArrowBuf; | |
28 | import org.apache.arrow.memory.BufferAllocator; | |
29 | import org.apache.arrow.memory.OutOfMemoryException; | |
30 | import org.apache.arrow.memory.util.ArrowBufPointer; | |
31 | import org.apache.arrow.memory.util.ByteFunctionHelpers; | |
32 | import org.apache.arrow.memory.util.CommonUtil; | |
33 | import org.apache.arrow.memory.util.hash.ArrowBufHasher; | |
34 | import org.apache.arrow.util.Preconditions; | |
35 | import org.apache.arrow.vector.compare.VectorVisitor; | |
36 | import org.apache.arrow.vector.ipc.message.ArrowFieldNode; | |
37 | import org.apache.arrow.vector.types.pojo.Field; | |
38 | import org.apache.arrow.vector.util.CallBack; | |
39 | import org.apache.arrow.vector.util.OversizedAllocationException; | |
40 | import org.apache.arrow.vector.util.TransferPair; | |
41 | ||
42 | /** | |
43 | * BaseVariableWidthVector is a base class providing functionality for strings/bytes types. | |
44 | */ | |
45 | public abstract class BaseVariableWidthVector extends BaseValueVector | |
46 | implements VariableWidthVector, FieldVector, VectorDefinitionSetter { | |
47 | private static final int DEFAULT_RECORD_BYTE_COUNT = 8; | |
48 | private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; | |
49 | private int lastValueCapacity; | |
50 | private long lastValueAllocationSizeInBytes; | |
51 | ||
52 | /* protected members */ | |
53 | public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */ | |
54 | protected static final byte[] emptyByteArray = new byte[]{}; | |
55 | protected ArrowBuf validityBuffer; | |
56 | protected ArrowBuf valueBuffer; | |
57 | protected ArrowBuf offsetBuffer; | |
58 | protected int valueCount; | |
59 | protected int lastSet; | |
60 | protected final Field field; | |
61 | ||
62 | /** | |
63 | * Constructs a new instance. | |
64 | * | |
65 | * @param field The field materialized by this vector. | |
66 | * @param allocator The allocator to use for creating/resizing buffers | |
67 | */ | |
68 | public BaseVariableWidthVector(Field field, final BufferAllocator allocator) { | |
69 | super(allocator); | |
70 | this.field = field; | |
71 | lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; | |
72 | // -1 because we require one extra slot for the offset array. | |
73 | lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1; | |
74 | valueCount = 0; | |
75 | lastSet = -1; | |
76 | offsetBuffer = allocator.getEmpty(); | |
77 | validityBuffer = allocator.getEmpty(); | |
78 | valueBuffer = allocator.getEmpty(); | |
79 | } | |
80 | ||
81 | @Override | |
82 | public String getName() { | |
83 | return field.getName(); | |
84 | } | |
85 | ||
86 | /* TODO: | |
87 | * see if getNullCount() can be made faster -- O(1) | |
88 | */ | |
89 | ||
90 | /* TODO: | |
91 | * Once the entire hierarchy has been refactored, move common functions | |
92 | * like getNullCount(), splitAndTransferValidityBuffer to top level | |
93 | * base class BaseValueVector. | |
94 | * | |
95 | * Along with this, some class members (validityBuffer) can also be | |
96 | * abstracted out to top level base class. | |
97 | * | |
98 | * Right now BaseValueVector is the top level base class for other | |
99 | * vector types in ValueVector hierarchy (non-nullable) and those | |
100 | * vectors have not yet been refactored/removed so moving things to | |
101 | * the top class as of now is not a good idea. | |
102 | */ | |
103 | ||
104 | /** | |
105 | * Get buffer that manages the validity (NULL or NON-NULL nature) of | |
106 | * elements in the vector. Consider it as a buffer for internal bit vector | |
107 | * data structure. | |
108 | * @return buffer | |
109 | */ | |
110 | @Override | |
111 | public ArrowBuf getValidityBuffer() { | |
112 | return validityBuffer; | |
113 | } | |
114 | ||
115 | /** | |
116 | * Get the buffer that stores the data for elements in the vector. | |
117 | * @return buffer | |
118 | */ | |
119 | @Override | |
120 | public ArrowBuf getDataBuffer() { | |
121 | return valueBuffer; | |
122 | } | |
123 | ||
124 | /** | |
125 | * buffer that stores the offsets for elements | |
126 | * in the vector. This operation is not supported for fixed-width vectors. | |
127 | * @return buffer | |
128 | */ | |
129 | @Override | |
130 | public ArrowBuf getOffsetBuffer() { | |
131 | return offsetBuffer; | |
132 | } | |
133 | ||
134 | /** | |
135 | * Get the memory address of buffer that stores the offsets for elements | |
136 | * in the vector. | |
137 | * @return starting address of the buffer | |
138 | */ | |
139 | @Override | |
140 | public long getOffsetBufferAddress() { | |
141 | return offsetBuffer.memoryAddress(); | |
142 | } | |
143 | ||
144 | /** | |
145 | * Get the memory address of buffer that manages the validity | |
146 | * (NULL or NON-NULL nature) of elements in the vector. | |
147 | * @return starting address of the buffer | |
148 | */ | |
149 | @Override | |
150 | public long getValidityBufferAddress() { | |
151 | return validityBuffer.memoryAddress(); | |
152 | } | |
153 | ||
154 | /** | |
155 | * Get the memory address of buffer that stores the data for elements | |
156 | * in the vector. | |
157 | * @return starting address of the buffer | |
158 | */ | |
159 | @Override | |
160 | public long getDataBufferAddress() { | |
161 | return valueBuffer.memoryAddress(); | |
162 | } | |
163 | ||
164 | /** | |
165 | * Sets the desired value capacity for the vector. This function doesn't | |
166 | * allocate any memory for the vector. | |
167 | * @param valueCount desired number of elements in the vector | |
168 | */ | |
169 | @Override | |
170 | public void setInitialCapacity(int valueCount) { | |
171 | final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT; | |
172 | checkDataBufferSize(size); | |
173 | computeAndCheckOffsetsBufferSize(valueCount); | |
174 | lastValueAllocationSizeInBytes = (int) size; | |
175 | lastValueCapacity = valueCount; | |
176 | } | |
177 | ||
178 | /** | |
179 | * Sets the desired value capacity for the vector. This function doesn't | |
180 | * allocate any memory for the vector. | |
181 | * @param valueCount desired number of elements in the vector | |
182 | * @param density average number of bytes per variable width element | |
183 | */ | |
184 | @Override | |
185 | public void setInitialCapacity(int valueCount, double density) { | |
186 | long size = Math.max((long) (valueCount * density), 1L); | |
187 | checkDataBufferSize(size); | |
188 | computeAndCheckOffsetsBufferSize(valueCount); | |
189 | lastValueAllocationSizeInBytes = (int) size; | |
190 | lastValueCapacity = valueCount; | |
191 | } | |
192 | ||
193 | /** | |
194 | * Get the density of this ListVector. | |
195 | * @return density | |
196 | */ | |
197 | public double getDensity() { | |
198 | if (valueCount == 0) { | |
199 | return 0.0D; | |
200 | } | |
201 | final int startOffset = offsetBuffer.getInt(0); | |
202 | final int endOffset = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH); | |
203 | final double totalListSize = endOffset - startOffset; | |
204 | return totalListSize / valueCount; | |
205 | } | |
206 | ||
207 | /** | |
208 | * Get the current capacity which does not exceed either validity buffer or offset buffer. | |
209 | * Note: Here the `getValueCapacity` has no relationship with the value buffer. | |
210 | * @return number of elements that vector can hold. | |
211 | */ | |
212 | @Override | |
213 | public int getValueCapacity() { | |
214 | final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); | |
215 | return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); | |
216 | } | |
217 | ||
218 | private int getValidityBufferValueCapacity() { | |
219 | return capAtMaxInt(validityBuffer.capacity() * 8); | |
220 | } | |
221 | ||
222 | private int getOffsetBufferValueCapacity() { | |
223 | return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); | |
224 | } | |
225 | ||
226 | /** | |
227 | * zero out the vector and the data in associated buffers. | |
228 | */ | |
229 | public void zeroVector() { | |
230 | initValidityBuffer(); | |
231 | initOffsetBuffer(); | |
232 | valueBuffer.setZero(0, valueBuffer.capacity()); | |
233 | } | |
234 | ||
235 | /* zero out the validity buffer */ | |
236 | private void initValidityBuffer() { | |
237 | validityBuffer.setZero(0, validityBuffer.capacity()); | |
238 | } | |
239 | ||
240 | /* zero out the offset buffer */ | |
241 | private void initOffsetBuffer() { | |
242 | offsetBuffer.setZero(0, offsetBuffer.capacity()); | |
243 | } | |
244 | ||
245 | /** | |
246 | * Reset the vector to initial state. Same as {@link #zeroVector()}. | |
247 | * Note that this method doesn't release any memory. | |
248 | */ | |
249 | public void reset() { | |
250 | zeroVector(); | |
251 | lastSet = -1; | |
252 | valueCount = 0; | |
253 | } | |
254 | ||
255 | /** | |
256 | * Close the vector and release the associated buffers. | |
257 | */ | |
258 | @Override | |
259 | public void close() { | |
260 | clear(); | |
261 | } | |
262 | ||
263 | /** | |
264 | * Same as {@link #close()}. | |
265 | */ | |
266 | @Override | |
267 | public void clear() { | |
268 | validityBuffer = releaseBuffer(validityBuffer); | |
269 | valueBuffer = releaseBuffer(valueBuffer); | |
270 | offsetBuffer = releaseBuffer(offsetBuffer); | |
271 | lastSet = -1; | |
272 | valueCount = 0; | |
273 | } | |
274 | ||
275 | /** | |
276 | * Get the inner vectors. | |
277 | * | |
278 | * @deprecated This API will be removed as the current implementations no longer support inner vectors. | |
279 | * | |
280 | * @return the inner vectors for this field as defined by the TypeLayout | |
281 | */ | |
282 | @Deprecated | |
283 | @Override | |
284 | public List<BufferBacked> getFieldInnerVectors() { | |
285 | throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); | |
286 | } | |
287 | ||
288 | /** | |
289 | * Initialize the children in schema for this Field. This operation is a | |
290 | * NO-OP for scalar types since they don't have any children. | |
291 | * @param children the schema | |
292 | * @throws IllegalArgumentException if children is a non-empty list for scalar types. | |
293 | */ | |
294 | @Override | |
295 | public void initializeChildrenFromFields(List<Field> children) { | |
296 | if (!children.isEmpty()) { | |
297 | throw new IllegalArgumentException("primitive type vector can not have children"); | |
298 | } | |
299 | } | |
300 | ||
301 | /** | |
302 | * Get the inner child vectors. | |
303 | * @return list of child vectors for complex types, empty list for scalar vector types | |
304 | */ | |
305 | @Override | |
306 | public List<FieldVector> getChildrenFromFields() { | |
307 | return Collections.emptyList(); | |
308 | } | |
309 | ||
310 | ||
311 | /** | |
312 | * Load the buffers of this vector with provided source buffers. | |
313 | * The caller manages the source buffers and populates them before invoking | |
314 | * this method. | |
315 | * @param fieldNode the fieldNode indicating the value count | |
316 | * @param ownBuffers the buffers for this Field (own buffers only, children not included) | |
317 | */ | |
318 | @Override | |
319 | public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) { | |
320 | ArrowBuf bitBuffer = ownBuffers.get(0); | |
321 | ArrowBuf offBuffer = ownBuffers.get(1); | |
322 | ArrowBuf dataBuffer = ownBuffers.get(2); | |
323 | ||
324 | validityBuffer.getReferenceManager().release(); | |
325 | validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); | |
326 | offsetBuffer.getReferenceManager().release(); | |
327 | offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); | |
328 | valueBuffer.getReferenceManager().release(); | |
329 | valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator); | |
330 | ||
331 | lastSet = fieldNode.getLength() - 1; | |
332 | valueCount = fieldNode.getLength(); | |
333 | } | |
334 | ||
335 | /** | |
336 | * Get the buffers belonging to this vector. | |
337 | * @return the inner buffers. | |
338 | */ | |
339 | public List<ArrowBuf> getFieldBuffers() { | |
340 | // before flight/IPC, we must bring the vector to a consistent state. | |
341 | // this is because, it is possible that the offset buffers of some trailing values | |
342 | // are not updated. this may cause some data in the data buffer being lost. | |
343 | // for details, please see TestValueVector#testUnloadVariableWidthVector. | |
344 | fillHoles(valueCount); | |
345 | ||
346 | List<ArrowBuf> result = new ArrayList<>(3); | |
347 | setReaderAndWriterIndex(); | |
348 | result.add(validityBuffer); | |
349 | result.add(offsetBuffer); | |
350 | result.add(valueBuffer); | |
351 | ||
352 | return result; | |
353 | } | |
354 | ||
355 | /** | |
356 | * Set the reader and writer indexes for the inner buffers. | |
357 | */ | |
358 | private void setReaderAndWriterIndex() { | |
359 | validityBuffer.readerIndex(0); | |
360 | offsetBuffer.readerIndex(0); | |
361 | valueBuffer.readerIndex(0); | |
362 | if (valueCount == 0) { | |
363 | validityBuffer.writerIndex(0); | |
364 | offsetBuffer.writerIndex(0); | |
365 | valueBuffer.writerIndex(0); | |
366 | } else { | |
367 | final int lastDataOffset = getStartOffset(valueCount); | |
368 | validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); | |
369 | offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); | |
370 | valueBuffer.writerIndex(lastDataOffset); | |
371 | } | |
372 | } | |
373 | ||
374 | /** | |
375 | * Same as {@link #allocateNewSafe()}. | |
376 | */ | |
377 | @Override | |
378 | public void allocateNew() { | |
379 | allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); | |
380 | } | |
381 | ||
382 | /** | |
383 | * Allocate memory for the vector. We internally use a default value count | |
384 | * of 4096 to allocate memory for at least these many elements in the | |
385 | * vector. See {@link #allocateNew(long, int)} for allocating memory for specific | |
386 | * number of elements in the vector. | |
387 | * | |
388 | * @return false if memory allocation fails, true otherwise. | |
389 | */ | |
390 | @Override | |
391 | public boolean allocateNewSafe() { | |
392 | try { | |
393 | allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); | |
394 | return true; | |
395 | } catch (Exception e) { | |
396 | return false; | |
397 | } | |
398 | } | |
399 | ||
400 | /** | |
401 | * Allocate memory for the vector to support storing at least the provided number of | |
402 | * elements in the vector. This method must be called prior to using the ValueVector. | |
403 | * | |
404 | * @param totalBytes desired total memory capacity | |
405 | * @param valueCount the desired number of elements in the vector | |
406 | * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails | |
407 | */ | |
408 | @Override | |
409 | public void allocateNew(long totalBytes, int valueCount) { | |
410 | assert totalBytes >= 0; | |
411 | ||
412 | checkDataBufferSize(totalBytes); | |
413 | computeAndCheckOffsetsBufferSize(valueCount); | |
414 | ||
415 | /* we are doing a new allocation -- release the current buffers */ | |
416 | clear(); | |
417 | ||
418 | try { | |
419 | allocateBytes(totalBytes, valueCount); | |
420 | } catch (Exception e) { | |
421 | clear(); | |
422 | throw e; | |
423 | } | |
424 | } | |
425 | ||
426 | @Override | |
427 | public void allocateNew(int valueCount) { | |
428 | allocateNew(lastValueAllocationSizeInBytes, valueCount); | |
429 | } | |
430 | ||
431 | /* Check if the data buffer size is within bounds. */ | |
432 | private void checkDataBufferSize(long size) { | |
433 | if (size > MAX_ALLOCATION_SIZE || size < 0) { | |
434 | throw new OversizedAllocationException("Memory required for vector " + | |
435 | " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); | |
436 | } | |
437 | } | |
438 | ||
439 | /* | |
440 | * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's | |
441 | * within bounds. | |
442 | */ | |
443 | private long computeAndCheckOffsetsBufferSize(int valueCount) { | |
444 | /* to track the end offset of last data element in vector, we need | |
445 | * an additional slot in offset buffer. | |
446 | */ | |
447 | final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH); | |
448 | if (size > MAX_ALLOCATION_SIZE) { | |
449 | throw new OversizedAllocationException("Memory required for vector capacity " + | |
450 | valueCount + | |
451 | " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); | |
452 | } | |
453 | return size; | |
454 | } | |
455 | ||
456 | /* allocate the inner buffers */ | |
457 | private void allocateBytes(final long valueBufferSize, final int valueCount) { | |
458 | /* allocate data buffer */ | |
459 | long curSize = valueBufferSize; | |
460 | valueBuffer = allocator.buffer(curSize); | |
461 | valueBuffer.readerIndex(0); | |
462 | ||
463 | /* allocate offset buffer and validity buffer */ | |
464 | DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH); | |
465 | offsetBuffer = buffers.getDataBuf(); | |
466 | validityBuffer = buffers.getValidityBuf(); | |
467 | initOffsetBuffer(); | |
468 | initValidityBuffer(); | |
469 | ||
470 | lastValueCapacity = getValueCapacity(); | |
471 | lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity()); | |
472 | } | |
473 | ||
474 | /* allocate offset buffer */ | |
475 | private void allocateOffsetBuffer(final long size) { | |
476 | final int curSize = (int) size; | |
477 | offsetBuffer = allocator.buffer(curSize); | |
478 | offsetBuffer.readerIndex(0); | |
479 | initOffsetBuffer(); | |
480 | } | |
481 | ||
482 | /* allocate validity buffer */ | |
483 | private void allocateValidityBuffer(final long size) { | |
484 | final int curSize = (int) size; | |
485 | validityBuffer = allocator.buffer(curSize); | |
486 | validityBuffer.readerIndex(0); | |
487 | initValidityBuffer(); | |
488 | } | |
489 | ||
490 | /** | |
491 | * Resize the vector to increase the capacity. The internal behavior is to | |
492 | * double the current value capacity. | |
493 | */ | |
494 | public void reAlloc() { | |
495 | reallocDataBuffer(); | |
496 | reallocValidityAndOffsetBuffers(); | |
497 | } | |
498 | ||
499 | /** | |
500 | * Reallocate the data buffer. Data Buffer stores the actual data for | |
501 | * VARCHAR or VARBINARY elements in the vector. The behavior is to double | |
502 | * the size of buffer. | |
503 | * @throws OversizedAllocationException if the desired new size is more than | |
504 | * max allowed | |
505 | * @throws OutOfMemoryException if the internal memory allocation fails | |
506 | */ | |
507 | public void reallocDataBuffer() { | |
508 | final long currentBufferCapacity = valueBuffer.capacity(); | |
509 | long newAllocationSize = currentBufferCapacity * 2; | |
510 | if (newAllocationSize == 0) { | |
511 | if (lastValueAllocationSizeInBytes > 0) { | |
512 | newAllocationSize = lastValueAllocationSizeInBytes; | |
513 | } else { | |
514 | newAllocationSize = INITIAL_BYTE_COUNT * 2L; | |
515 | } | |
516 | } | |
517 | newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); | |
518 | assert newAllocationSize >= 1; | |
519 | ||
520 | checkDataBufferSize(newAllocationSize); | |
521 | ||
522 | final ArrowBuf newBuf = allocator.buffer(newAllocationSize); | |
523 | newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity); | |
524 | valueBuffer.getReferenceManager().release(); | |
525 | valueBuffer = newBuf; | |
526 | lastValueAllocationSizeInBytes = valueBuffer.capacity(); | |
527 | } | |
528 | ||
529 | /** | |
530 | * Reallocate the validity and offset buffers for this vector. Validity | |
531 | * buffer is used to track the NULL or NON-NULL nature of elements in | |
532 | * the vector and offset buffer is used to store the lengths of variable | |
533 | * width elements in the vector. | |
534 | * | |
535 | * <p>Note that data buffer for variable length vectors moves independent | |
536 | * of the companion validity and offset buffers. This is in | |
537 | * contrast to what we have for fixed width vectors. | |
538 | * | |
539 | * <p>So even though we may have setup an initial capacity of 1024 | |
540 | * elements in the vector, it is quite possible | |
541 | * that we need to reAlloc() the data buffer when we are setting | |
542 | * the 5th element in the vector simply because previous | |
543 | * variable length elements have exhausted the buffer capacity. | |
544 | * However, we really don't need to reAlloc() validity and | |
545 | * offset buffers until we try to set the 1025th element | |
546 | * This is why we do a separate check for safe methods to | |
547 | * determine which buffer needs reallocation. | |
548 | * @throws OversizedAllocationException if the desired new size is more than | |
549 | * max allowed | |
550 | * @throws OutOfMemoryException if the internal memory allocation fails | |
551 | */ | |
552 | public void reallocValidityAndOffsetBuffers() { | |
553 | int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2); | |
554 | if (targetOffsetCount == 0) { | |
555 | if (lastValueCapacity > 0) { | |
556 | targetOffsetCount = (lastValueCapacity + 1); | |
557 | } else { | |
558 | targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1); | |
559 | } | |
560 | } | |
561 | computeAndCheckOffsetsBufferSize(targetOffsetCount); | |
562 | ||
563 | DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH); | |
564 | final ArrowBuf newOffsetBuffer = buffers.getDataBuf(); | |
565 | newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); | |
566 | newOffsetBuffer.setZero(offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity()); | |
567 | offsetBuffer.getReferenceManager().release(); | |
568 | offsetBuffer = newOffsetBuffer; | |
569 | ||
570 | final ArrowBuf newValidityBuffer = buffers.getValidityBuf(); | |
571 | newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); | |
572 | newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); | |
573 | validityBuffer.getReferenceManager().release(); | |
574 | validityBuffer = newValidityBuffer; | |
575 | ||
576 | lastValueCapacity = getValueCapacity(); | |
577 | } | |
578 | ||
579 | /** | |
580 | * Get the size (number of bytes) of underlying data buffer. | |
581 | * @return number of bytes in the data buffer | |
582 | */ | |
583 | @Override | |
584 | public int getByteCapacity() { | |
585 | return capAtMaxInt(valueBuffer.capacity()); | |
586 | } | |
587 | ||
588 | @Override | |
589 | public int sizeOfValueBuffer() { | |
590 | if (valueCount == 0) { | |
591 | return 0; | |
592 | } | |
593 | return offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH); | |
594 | } | |
595 | ||
596 | /** | |
597 | * Get the size (number of bytes) of underlying buffers used by this | |
598 | * vector. | |
599 | * @return size of underlying buffers. | |
600 | */ | |
601 | @Override | |
602 | public int getBufferSize() { | |
603 | return getBufferSizeFor(this.valueCount); | |
604 | } | |
605 | ||
606 | /** | |
607 | * Get the potential buffer size for a particular number of records. | |
608 | * @param valueCount desired number of elements in the vector | |
609 | * @return estimated size of underlying buffers if the vector holds | |
610 | * a given number of elements | |
611 | */ | |
612 | @Override | |
613 | public int getBufferSizeFor(final int valueCount) { | |
614 | if (valueCount == 0) { | |
615 | return 0; | |
616 | } | |
617 | ||
618 | final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); | |
619 | final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; | |
620 | /* get the end offset for this valueCount */ | |
621 | final int dataBufferSize = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH); | |
622 | return validityBufferSize + offsetBufferSize + dataBufferSize; | |
623 | } | |
624 | ||
625 | /** | |
626 | * Get information about how this field is materialized. | |
627 | * @return the field corresponding to this vector | |
628 | */ | |
629 | @Override | |
630 | public Field getField() { | |
631 | return field; | |
632 | } | |
633 | ||
634 | /** | |
635 | * Return the underlying buffers associated with this vector. Note that this doesn't | |
636 | * impact the reference counts for this buffer so it only should be used for in-context | |
637 | * access. Also note that this buffer changes regularly thus | |
638 | * external classes shouldn't hold a reference to it (unless they change it). | |
639 | * | |
640 | * @param clear Whether to clear vector before returning; the buffers will still be refcounted | |
641 | * but the returned array will be the only reference to them | |
642 | * @return The underlying {@link ArrowBuf buffers} that is used by this | |
643 | * vector instance. | |
644 | */ | |
645 | @Override | |
646 | public ArrowBuf[] getBuffers(boolean clear) { | |
647 | final ArrowBuf[] buffers; | |
648 | setReaderAndWriterIndex(); | |
649 | if (getBufferSize() == 0) { | |
650 | buffers = new ArrowBuf[0]; | |
651 | } else { | |
652 | buffers = new ArrowBuf[3]; | |
653 | buffers[0] = validityBuffer; | |
654 | buffers[1] = offsetBuffer; | |
655 | buffers[2] = valueBuffer; | |
656 | } | |
657 | if (clear) { | |
658 | for (final ArrowBuf buffer : buffers) { | |
659 | buffer.getReferenceManager().retain(); | |
660 | } | |
661 | clear(); | |
662 | } | |
663 | return buffers; | |
664 | } | |
665 | ||
666 | /** | |
667 | * Construct a transfer pair of this vector and another vector of same type. | |
668 | * @param ref name of the target vector | |
669 | * @param allocator allocator for the target vector | |
670 | * @param callBack not used | |
671 | * @return TransferPair | |
672 | */ | |
673 | @Override | |
674 | public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { | |
675 | return getTransferPair(ref, allocator); | |
676 | } | |
677 | ||
678 | /** | |
679 | * Construct a transfer pair of this vector and another vector of same type. | |
680 | * @param allocator allocator for the target vector | |
681 | * @return TransferPair | |
682 | */ | |
683 | @Override | |
684 | public TransferPair getTransferPair(BufferAllocator allocator) { | |
685 | return getTransferPair(getName(), allocator); | |
686 | } | |
687 | ||
688 | /** | |
689 | * Construct a transfer pair of this vector and another vector of same type. | |
690 | * @param ref name of the target vector | |
691 | * @param allocator allocator for the target vector | |
692 | * @return TransferPair | |
693 | */ | |
694 | public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); | |
695 | ||
696 | /** | |
697 | * Transfer this vector'data to another vector. The memory associated | |
698 | * with this vector is transferred to the allocator of target vector | |
699 | * for accounting and management purposes. | |
700 | * @param target destination vector for transfer | |
701 | */ | |
702 | public void transferTo(BaseVariableWidthVector target) { | |
703 | compareTypes(target, "transferTo"); | |
704 | target.clear(); | |
705 | target.validityBuffer = transferBuffer(validityBuffer, target.allocator); | |
706 | target.valueBuffer = transferBuffer(valueBuffer, target.allocator); | |
707 | target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator); | |
708 | target.setLastSet(this.lastSet); | |
709 | if (this.valueCount > 0) { | |
710 | target.setValueCount(this.valueCount); | |
711 | } | |
712 | clear(); | |
713 | } | |
714 | ||
715 | /** | |
716 | * Slice this vector at desired index and length and transfer the | |
717 | * corresponding data to the target vector. | |
718 | * @param startIndex start position of the split in source vector. | |
719 | * @param length length of the split. | |
720 | * @param target destination vector | |
721 | */ | |
722 | public void splitAndTransferTo(int startIndex, int length, | |
723 | BaseVariableWidthVector target) { | |
724 | Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, | |
725 | "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); | |
726 | compareTypes(target, "splitAndTransferTo"); | |
727 | target.clear(); | |
728 | splitAndTransferValidityBuffer(startIndex, length, target); | |
729 | splitAndTransferOffsetBuffer(startIndex, length, target); | |
730 | target.setLastSet(length - 1); | |
731 | if (length > 0) { | |
732 | target.setValueCount(length); | |
733 | } | |
734 | } | |
735 | ||
736 | /** | |
737 | * Transfer the offsets along with data. Unlike the data buffer, we cannot simply | |
738 | * slice the offset buffer for split and transfer. The reason is that offsets | |
739 | * in the target vector have to be adjusted and made relative to the staring | |
740 | * offset in source vector from the start index of split. This is why, we | |
741 | * need to explicitly allocate the offset buffer and set the adjusted offsets | |
742 | * in the target vector. | |
743 | */ | |
744 | private void splitAndTransferOffsetBuffer(int startIndex, int length, BaseVariableWidthVector target) { | |
745 | final int start = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH); | |
746 | final int end = offsetBuffer.getInt((long) (startIndex + length) * OFFSET_WIDTH); | |
747 | final int dataLength = end - start; | |
748 | ||
749 | if (start == 0) { | |
750 | final ArrowBuf slicedOffsetBuffer = offsetBuffer.slice(startIndex * OFFSET_WIDTH, (1 + length) * OFFSET_WIDTH); | |
751 | target.offsetBuffer = transferBuffer(slicedOffsetBuffer, target.allocator); | |
752 | } else { | |
753 | target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH); | |
754 | for (int i = 0; i < length + 1; i++) { | |
755 | final int relativeSourceOffset = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH) - start; | |
756 | target.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeSourceOffset); | |
757 | } | |
758 | } | |
759 | final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength); | |
760 | target.valueBuffer = transferBuffer(slicedBuffer, target.allocator); | |
761 | } | |
762 | ||
763 | /* | |
764 | * Transfer the validity. | |
765 | */ | |
766 | private void splitAndTransferValidityBuffer(int startIndex, int length, | |
767 | BaseVariableWidthVector target) { | |
768 | if (length <= 0) { | |
769 | return; | |
770 | } | |
771 | ||
772 | final int firstByteSource = BitVectorHelper.byteIndex(startIndex); | |
773 | final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); | |
774 | final int byteSizeTarget = getValidityBufferSizeFromCount(length); | |
775 | final int offset = startIndex % 8; | |
776 | ||
777 | if (offset == 0) { | |
778 | // slice | |
779 | if (target.validityBuffer != null) { | |
780 | target.validityBuffer.getReferenceManager().release(); | |
781 | } | |
782 | final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); | |
783 | target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); | |
784 | return; | |
785 | } | |
786 | ||
787 | /* Copy data | |
788 | * When the first bit starts from the middle of a byte (offset != 0), | |
789 | * copy data from src BitVector. | |
790 | * Each byte in the target is composed by a part in i-th byte, | |
791 | * another part in (i+1)-th byte. | |
792 | */ | |
793 | target.allocateValidityBuffer(byteSizeTarget); | |
794 | ||
795 | for (int i = 0; i < byteSizeTarget - 1; i++) { | |
796 | byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); | |
797 | byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); | |
798 | ||
799 | target.validityBuffer.setByte(i, (b1 + b2)); | |
800 | } | |
801 | /* Copying the last piece is done in the following manner: | |
802 | * if the source vector has 1 or more bytes remaining, we copy | |
803 | * the last piece as a byte formed by shifting data | |
804 | * from the current byte and the next byte. | |
805 | * | |
806 | * if the source vector has no more bytes remaining | |
807 | * (we are at the last byte), we copy the last piece as a byte | |
808 | * by shifting data from the current byte. | |
809 | */ | |
810 | if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { | |
811 | byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, | |
812 | firstByteSource + byteSizeTarget - 1, offset); | |
813 | byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, | |
814 | firstByteSource + byteSizeTarget, offset); | |
815 | ||
816 | target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); | |
817 | } else { | |
818 | byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, | |
819 | firstByteSource + byteSizeTarget - 1, offset); | |
820 | target.validityBuffer.setByte(byteSizeTarget - 1, b1); | |
821 | } | |
822 | } | |
823 | ||
824 | ||
825 | /*----------------------------------------------------------------* | |
826 | | | | |
827 | | common getters and setters | | |
828 | | | | |
829 | *----------------------------------------------------------------*/ | |
830 | ||
831 | ||
832 | /** | |
833 | * Get the number of elements that are null in the vector. | |
834 | * | |
835 | * @return the number of null elements. | |
836 | */ | |
837 | public int getNullCount() { | |
838 | return BitVectorHelper.getNullCount(validityBuffer, valueCount); | |
839 | } | |
840 | ||
841 | /** | |
842 | * Check if the given index is within the current value capacity | |
843 | * of the vector. | |
844 | * | |
845 | * @param index position to check | |
846 | * @return true if index is within the current value capacity | |
847 | */ | |
848 | public boolean isSafe(int index) { | |
849 | return index < getValueCapacity(); | |
850 | } | |
851 | ||
852 | /** | |
853 | * Check if element at given index is null. | |
854 | * | |
855 | * @param index position of element | |
856 | * @return true if element at given index is null | |
857 | */ | |
858 | public boolean isNull(int index) { | |
859 | return (isSet(index) == 0); | |
860 | } | |
861 | ||
862 | /** | |
863 | * Same as {@link #isNull(int)}. | |
864 | * | |
865 | * @param index position of element | |
866 | * @return 1 if element at given index is not null, 0 otherwise | |
867 | */ | |
868 | public int isSet(int index) { | |
869 | final int byteIndex = index >> 3; | |
870 | final byte b = validityBuffer.getByte(byteIndex); | |
871 | final int bitIndex = index & 7; | |
872 | return (b >> bitIndex) & 0x01; | |
873 | } | |
874 | ||
875 | /** | |
876 | * Get the value count of vector. This will always be zero unless | |
877 | * setValueCount(int) has been called prior to calling this. | |
878 | * | |
879 | * @return valueCount for the vector | |
880 | */ | |
881 | public int getValueCount() { | |
882 | return valueCount; | |
883 | } | |
884 | ||
885 | /** | |
886 | * Sets the value count for the vector. | |
887 | * | |
888 | * @param valueCount value count | |
889 | */ | |
890 | public void setValueCount(int valueCount) { | |
891 | assert valueCount >= 0; | |
892 | this.valueCount = valueCount; | |
893 | while (valueCount > getValueCapacity()) { | |
894 | reallocValidityAndOffsetBuffers(); | |
895 | } | |
896 | fillHoles(valueCount); | |
897 | lastSet = valueCount - 1; | |
898 | setReaderAndWriterIndex(); | |
899 | } | |
900 | ||
901 | /** | |
902 | * Create holes in the vector upto the given index (exclusive). | |
903 | * Holes will be created from the current last set position in | |
904 | * the vector. | |
905 | * | |
906 | * @param index target index | |
907 | */ | |
908 | public void fillEmpties(int index) { | |
909 | handleSafe(index, emptyByteArray.length); | |
910 | fillHoles(index); | |
911 | lastSet = index - 1; | |
912 | } | |
913 | ||
914 | /** | |
915 | * Set the index of last non-null element in the vector. | |
916 | * It is important to call this method with appropriate value | |
917 | * before calling {@link #setValueCount(int)}. | |
918 | * | |
919 | * @param value desired index of last non-null element. | |
920 | */ | |
921 | public void setLastSet(int value) { | |
922 | lastSet = value; | |
923 | } | |
924 | ||
925 | /** | |
926 | * Get the index of last non-null element in the vector. | |
927 | * | |
928 | * @return index of the last non-null element | |
929 | */ | |
930 | public int getLastSet() { | |
931 | return lastSet; | |
932 | } | |
933 | ||
934 | /** | |
935 | * Get the starting position (offset) in the data stream for a given | |
936 | * element in the vector. | |
937 | * | |
938 | * @param index position of the element in the vector | |
939 | * @return starting offset for the element | |
940 | */ | |
941 | public long getStartEnd(int index) { | |
942 | return offsetBuffer.getLong((long) index * OFFSET_WIDTH); | |
943 | } | |
944 | ||
945 | /** | |
946 | * Mark the particular position in the vector as non-null. | |
947 | * | |
948 | * @param index position of the element. | |
949 | */ | |
950 | @Override | |
951 | public void setIndexDefined(int index) { | |
952 | // We need to check and realloc both validity and offset buffer | |
953 | while (index >= getValueCapacity()) { | |
954 | reallocValidityAndOffsetBuffers(); | |
955 | } | |
956 | BitVectorHelper.setBit(validityBuffer, index); | |
957 | } | |
958 | ||
959 | /** | |
960 | * Sets the value length for an element. | |
961 | * | |
962 | * @param index position of the element to set | |
963 | * @param length length of the element | |
964 | */ | |
965 | public void setValueLengthSafe(int index, int length) { | |
966 | assert index >= 0; | |
967 | handleSafe(index, length); | |
968 | fillHoles(index); | |
969 | final int startOffset = getStartOffset(index); | |
970 | offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length); | |
971 | lastSet = index; | |
972 | } | |
973 | ||
974 | /** | |
975 | * Get the variable length element at specified index as Text. | |
976 | * | |
977 | * @param index position of element to get | |
978 | * @return greater than 0 length for non-null element, 0 otherwise | |
979 | */ | |
980 | public int getValueLength(int index) { | |
981 | assert index >= 0; | |
982 | if (isSet(index) == 0) { | |
983 | return 0; | |
984 | } | |
985 | final int startOffset = getStartOffset(index); | |
986 | final int dataLength = | |
987 | offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) - startOffset; | |
988 | return dataLength; | |
989 | } | |
990 | ||
991 | /** | |
992 | * Set the variable length element at the specified index to the supplied | |
993 | * byte array. This is same as using {@link #set(int, byte[], int, int)} | |
994 | * with start as 0 and length as value.length | |
995 | * | |
996 | * @param index position of the element to set | |
997 | * @param value array of bytes to write | |
998 | */ | |
999 | public void set(int index, byte[] value) { | |
1000 | assert index >= 0; | |
1001 | fillHoles(index); | |
1002 | BitVectorHelper.setBit(validityBuffer, index); | |
1003 | setBytes(index, value, 0, value.length); | |
1004 | lastSet = index; | |
1005 | } | |
1006 | ||
1007 | /** | |
1008 | * Same as {@link #set(int, byte[])} except that it handles the | |
1009 | * case where index and length of new element are beyond the existing | |
1010 | * capacity of the vector. | |
1011 | * | |
1012 | * @param index position of the element to set | |
1013 | * @param value array of bytes to write | |
1014 | */ | |
1015 | public void setSafe(int index, byte[] value) { | |
1016 | assert index >= 0; | |
1017 | handleSafe(index, value.length); | |
1018 | fillHoles(index); | |
1019 | BitVectorHelper.setBit(validityBuffer, index); | |
1020 | setBytes(index, value, 0, value.length); | |
1021 | lastSet = index; | |
1022 | } | |
1023 | ||
1024 | /** | |
1025 | * Set the variable length element at the specified index to the supplied | |
1026 | * byte array. | |
1027 | * | |
1028 | * @param index position of the element to set | |
1029 | * @param value array of bytes to write | |
1030 | * @param start start index in array of bytes | |
1031 | * @param length length of data in array of bytes | |
1032 | */ | |
1033 | public void set(int index, byte[] value, int start, int length) { | |
1034 | assert index >= 0; | |
1035 | fillHoles(index); | |
1036 | BitVectorHelper.setBit(validityBuffer, index); | |
1037 | setBytes(index, value, start, length); | |
1038 | lastSet = index; | |
1039 | } | |
1040 | ||
1041 | /** | |
1042 | * Same as {@link #set(int, byte[], int, int)} except that it handles the | |
1043 | * case where index and length of new element are beyond the existing | |
1044 | * capacity of the vector. | |
1045 | * | |
1046 | * @param index position of the element to set | |
1047 | * @param value array of bytes to write | |
1048 | * @param start start index in array of bytes | |
1049 | * @param length length of data in array of bytes | |
1050 | */ | |
1051 | public void setSafe(int index, byte[] value, int start, int length) { | |
1052 | assert index >= 0; | |
1053 | handleSafe(index, length); | |
1054 | fillHoles(index); | |
1055 | BitVectorHelper.setBit(validityBuffer, index); | |
1056 | setBytes(index, value, start, length); | |
1057 | lastSet = index; | |
1058 | } | |
1059 | ||
1060 | /** | |
1061 | * Set the variable length element at the specified index to the | |
1062 | * content in supplied ByteBuffer. | |
1063 | * | |
1064 | * @param index position of the element to set | |
1065 | * @param value ByteBuffer with data | |
1066 | * @param start start index in ByteBuffer | |
1067 | * @param length length of data in ByteBuffer | |
1068 | */ | |
1069 | public void set(int index, ByteBuffer value, int start, int length) { | |
1070 | assert index >= 0; | |
1071 | fillHoles(index); | |
1072 | BitVectorHelper.setBit(validityBuffer, index); | |
1073 | final int startOffset = getStartOffset(index); | |
1074 | offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length); | |
1075 | valueBuffer.setBytes(startOffset, value, start, length); | |
1076 | lastSet = index; | |
1077 | } | |
1078 | ||
1079 | /** | |
1080 | * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the | |
1081 | * case where index and length of new element are beyond the existing | |
1082 | * capacity of the vector. | |
1083 | * | |
1084 | * @param index position of the element to set | |
1085 | * @param value ByteBuffer with data | |
1086 | * @param start start index in ByteBuffer | |
1087 | * @param length length of data in ByteBuffer | |
1088 | */ | |
1089 | public void setSafe(int index, ByteBuffer value, int start, int length) { | |
1090 | assert index >= 0; | |
1091 | handleSafe(index, length); | |
1092 | fillHoles(index); | |
1093 | BitVectorHelper.setBit(validityBuffer, index); | |
1094 | final int startOffset = getStartOffset(index); | |
1095 | offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length); | |
1096 | valueBuffer.setBytes(startOffset, value, start, length); | |
1097 | lastSet = index; | |
1098 | } | |
1099 | ||
1100 | /** | |
1101 | * Set the element at the given index to null. | |
1102 | * | |
1103 | * @param index position of element | |
1104 | */ | |
1105 | public void setNull(int index) { | |
1106 | // We need to check and realloc both validity and offset buffer | |
1107 | while (index >= getValueCapacity()) { | |
1108 | reallocValidityAndOffsetBuffers(); | |
1109 | } | |
1110 | BitVectorHelper.unsetBit(validityBuffer, index); | |
1111 | } | |
1112 | ||
1113 | /** | |
1114 | * Store the given value at a particular position in the vector. isSet indicates | |
1115 | * whether the value is NULL or not. | |
1116 | * @param index position of the new value | |
1117 | * @param isSet 0 for NULL value, 1 otherwise | |
1118 | * @param start start position of data in buffer | |
1119 | * @param end end position of data in buffer | |
1120 | * @param buffer data buffer containing the variable width element to be stored | |
1121 | * in the vector | |
1122 | */ | |
1123 | public void set(int index, int isSet, int start, int end, ArrowBuf buffer) { | |
1124 | assert index >= 0; | |
1125 | final int dataLength = end - start; | |
1126 | fillHoles(index); | |
1127 | BitVectorHelper.setValidityBit(validityBuffer, index, isSet); | |
1128 | final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); | |
1129 | offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); | |
1130 | valueBuffer.setBytes(startOffset, buffer, start, dataLength); | |
1131 | lastSet = index; | |
1132 | } | |
1133 | ||
1134 | /** | |
1135 | * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case | |
1136 | * when index is greater than or equal to current value capacity of the | |
1137 | * vector. | |
1138 | * @param index position of the new value | |
1139 | * @param isSet 0 for NULL value, 1 otherwise | |
1140 | * @param start start position of data in buffer | |
1141 | * @param end end position of data in buffer | |
1142 | * @param buffer data buffer containing the variable width element to be stored | |
1143 | * in the vector | |
1144 | */ | |
1145 | public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) { | |
1146 | assert index >= 0; | |
1147 | final int dataLength = end - start; | |
1148 | handleSafe(index, dataLength); | |
1149 | fillHoles(index); | |
1150 | BitVectorHelper.setValidityBit(validityBuffer, index, isSet); | |
1151 | final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); | |
1152 | offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); | |
1153 | valueBuffer.setBytes(startOffset, buffer, start, dataLength); | |
1154 | lastSet = index; | |
1155 | } | |
1156 | ||
1157 | /** | |
1158 | * Store the given value at a particular position in the vector. isSet indicates | |
1159 | * whether the value is NULL or not. | |
1160 | * @param index position of the new value | |
1161 | * @param start start position of data in buffer | |
1162 | * @param length length of data in buffer | |
1163 | * @param buffer data buffer containing the variable width element to be stored | |
1164 | * in the vector | |
1165 | */ | |
1166 | public void set(int index, int start, int length, ArrowBuf buffer) { | |
1167 | assert index >= 0; | |
1168 | fillHoles(index); | |
1169 | BitVectorHelper.setBit(validityBuffer, index); | |
1170 | final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); | |
1171 | offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); | |
1172 | final ArrowBuf bb = buffer.slice(start, length); | |
1173 | valueBuffer.setBytes(startOffset, bb); | |
1174 | lastSet = index; | |
1175 | } | |
1176 | ||
1177 | /** | |
1178 | * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case | |
1179 | * when index is greater than or equal to current value capacity of the | |
1180 | * vector. | |
1181 | * @param index position of the new value | |
1182 | * @param start start position of data in buffer | |
1183 | * @param length length of data in buffer | |
1184 | * @param buffer data buffer containing the variable width element to be stored | |
1185 | * in the vector | |
1186 | */ | |
1187 | public void setSafe(int index, int start, int length, ArrowBuf buffer) { | |
1188 | assert index >= 0; | |
1189 | handleSafe(index, length); | |
1190 | fillHoles(index); | |
1191 | BitVectorHelper.setBit(validityBuffer, index); | |
1192 | final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); | |
1193 | offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); | |
1194 | final ArrowBuf bb = buffer.slice(start, length); | |
1195 | valueBuffer.setBytes(startOffset, bb); | |
1196 | lastSet = index; | |
1197 | } | |
1198 | ||
1199 | ||
1200 | /*----------------------------------------------------------------* | |
1201 | | | | |
1202 | | helper methods for setters | | |
1203 | | | | |
1204 | *----------------------------------------------------------------*/ | |
1205 | ||
1206 | ||
1207 | protected final void fillHoles(int index) { | |
1208 | for (int i = lastSet + 1; i < index; i++) { | |
1209 | setBytes(i, emptyByteArray, 0, emptyByteArray.length); | |
1210 | } | |
1211 | lastSet = index - 1; | |
1212 | } | |
1213 | ||
1214 | protected final void setBytes(int index, byte[] value, int start, int length) { | |
1215 | /* end offset of current last element in the vector. this will | |
1216 | * be the start offset of new element we are trying to store. | |
1217 | */ | |
1218 | final int startOffset = getStartOffset(index); | |
1219 | /* set new end offset */ | |
1220 | offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); | |
1221 | /* store the var length data in value buffer */ | |
1222 | valueBuffer.setBytes(startOffset, value, start, length); | |
1223 | } | |
1224 | ||
1225 | public final int getStartOffset(int index) { | |
1226 | return offsetBuffer.getInt((long) index * OFFSET_WIDTH); | |
1227 | } | |
1228 | ||
1229 | protected final void handleSafe(int index, int dataLength) { | |
1230 | /* | |
1231 | * IMPORTANT: | |
1232 | * value buffer for variable length vectors moves independent | |
1233 | * of the companion validity and offset buffers. This is in | |
1234 | * contrast to what we have for fixed width vectors. | |
1235 | * | |
1236 | * Here there is no concept of getValueCapacity() in the | |
1237 | * data stream. getValueCapacity() is applicable only to validity | |
1238 | * and offset buffers. | |
1239 | * | |
1240 | * So even though we may have setup an initial capacity of 1024 | |
1241 | * elements in the vector, it is quite possible | |
1242 | * that we need to reAlloc() the data buffer when we are setting | |
1243 | * the 5th element in the vector simply because previous | |
1244 | * variable length elements have exhausted the buffer capacity. | |
1245 | * However, we really don't need to reAlloc() validity and | |
1246 | * offset buffers until we try to set the 1025th element | |
1247 | * This is why we do a separate check for safe methods to | |
1248 | * determine which buffer needs reallocation. | |
1249 | */ | |
1250 | while (index >= getValueCapacity()) { | |
1251 | reallocValidityAndOffsetBuffers(); | |
1252 | } | |
1253 | final int startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1); | |
1254 | while (valueBuffer.capacity() < (startOffset + dataLength)) { | |
1255 | reallocDataBuffer(); | |
1256 | } | |
1257 | } | |
1258 | ||
1259 | /** | |
1260 | * Method used by Json Writer to read a variable width element from | |
1261 | * the variable width vector and write to Json. | |
1262 | * | |
1263 | * <p>This method should not be used externally. | |
1264 | * | |
1265 | * @param data buffer storing the variable width vector elements | |
1266 | * @param offset buffer storing the offsets of variable width vector elements | |
1267 | * @param index position of the element in the vector | |
1268 | * @return array of bytes | |
1269 | */ | |
1270 | public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) { | |
1271 | final int currentStartOffset = offset.getInt((long) index * OFFSET_WIDTH); | |
1272 | final int dataLength = | |
1273 | offset.getInt((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset; | |
1274 | final byte[] result = new byte[dataLength]; | |
1275 | data.getBytes(currentStartOffset, result, 0, dataLength); | |
1276 | return result; | |
1277 | } | |
1278 | ||
1279 | /** | |
1280 | * Method used by Json Reader to explicitly set the offsets of the variable | |
1281 | * width vector data. The method takes care of allocating the memory for | |
1282 | * offsets if the caller hasn't done so. | |
1283 | * | |
1284 | * <p>This method should not be used externally. | |
1285 | * | |
1286 | * @param buffer ArrowBuf to store offsets for variable width elements | |
1287 | * @param allocator memory allocator | |
1288 | * @param valueCount number of elements | |
1289 | * @param index position of the element | |
1290 | * @param value offset of the element | |
1291 | * @return buffer holding the offsets | |
1292 | */ | |
1293 | public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, | |
1294 | int valueCount, int index, int value) { | |
1295 | if (buffer == null) { | |
1296 | buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH); | |
1297 | } | |
1298 | buffer.setInt((long) index * OFFSET_WIDTH, value); | |
1299 | if (index == (valueCount - 1)) { | |
1300 | buffer.writerIndex((long) valueCount * OFFSET_WIDTH); | |
1301 | } | |
1302 | ||
1303 | return buffer; | |
1304 | } | |
1305 | ||
1306 | /** | |
1307 | * Copy a cell value from a particular index in source vector to a particular | |
1308 | * position in this vector. | |
1309 | * | |
1310 | * @param fromIndex position to copy from in source vector | |
1311 | * @param thisIndex position to copy to in this vector | |
1312 | * @param from source vector | |
1313 | */ | |
1314 | @Override | |
1315 | public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { | |
1316 | Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); | |
1317 | if (from.isNull(fromIndex)) { | |
1318 | fillHoles(thisIndex); | |
1319 | BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); | |
1320 | final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH); | |
1321 | offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); | |
1322 | } else { | |
1323 | final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH); | |
1324 | final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH); | |
1325 | final int length = end - start; | |
1326 | fillHoles(thisIndex); | |
1327 | BitVectorHelper.setBit(this.validityBuffer, thisIndex); | |
1328 | final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH); | |
1329 | from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length); | |
1330 | offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); | |
1331 | } | |
1332 | lastSet = thisIndex; | |
1333 | } | |
1334 | ||
1335 | /** | |
1336 | * Same as {@link #copyFrom(int, int, ValueVector)} except that | |
1337 | * it handles the case when the capacity of the vector needs to be expanded | |
1338 | * before copy. | |
1339 | * | |
1340 | * @param fromIndex position to copy from in source vector | |
1341 | * @param thisIndex position to copy to in this vector | |
1342 | * @param from source vector | |
1343 | */ | |
1344 | @Override | |
1345 | public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { | |
1346 | Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); | |
1347 | if (from.isNull(fromIndex)) { | |
1348 | handleSafe(thisIndex, 0); | |
1349 | fillHoles(thisIndex); | |
1350 | BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); | |
1351 | final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH); | |
1352 | offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); | |
1353 | } else { | |
1354 | final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH); | |
1355 | final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH); | |
1356 | final int length = end - start; | |
1357 | handleSafe(thisIndex, length); | |
1358 | fillHoles(thisIndex); | |
1359 | BitVectorHelper.setBit(this.validityBuffer, thisIndex); | |
1360 | final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH); | |
1361 | from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length); | |
1362 | offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); | |
1363 | } | |
1364 | lastSet = thisIndex; | |
1365 | } | |
1366 | ||
1367 | @Override | |
1368 | public ArrowBufPointer getDataPointer(int index) { | |
1369 | return getDataPointer(index, new ArrowBufPointer()); | |
1370 | } | |
1371 | ||
1372 | @Override | |
1373 | public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { | |
1374 | if (isNull(index)) { | |
1375 | reuse.set(null, 0, 0); | |
1376 | } else { | |
1377 | int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); | |
1378 | int length = offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - offset; | |
1379 | reuse.set(valueBuffer, offset, length); | |
1380 | } | |
1381 | return reuse; | |
1382 | } | |
1383 | ||
1384 | @Override | |
1385 | public int hashCode(int index) { | |
1386 | return hashCode(index, null); | |
1387 | } | |
1388 | ||
1389 | @Override | |
1390 | public int hashCode(int index, ArrowBufHasher hasher) { | |
1391 | if (isNull(index)) { | |
1392 | return ArrowBufPointer.NULL_HASH_CODE; | |
1393 | } | |
1394 | final int start = getStartOffset(index); | |
1395 | final int end = getStartOffset(index + 1); | |
1396 | return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end); | |
1397 | } | |
1398 | ||
1399 | @Override | |
1400 | public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) { | |
1401 | return visitor.visit(this, value); | |
1402 | } | |
1403 | ||
1404 | /** | |
1405 | * Gets the ending offset of a record, given its index. | |
1406 | */ | |
1407 | public final int getEndOffset(int index) { | |
1408 | return offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH); | |
1409 | } | |
1410 | } |