]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / vector / src / main / java / org / apache / arrow / vector / ValueVector.java
CommitLineData
1d09f67e
TL
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.arrow.vector;
19
20import java.io.Closeable;
21
22import org.apache.arrow.memory.ArrowBuf;
23import org.apache.arrow.memory.BufferAllocator;
24import org.apache.arrow.memory.OutOfMemoryException;
25import org.apache.arrow.memory.util.hash.ArrowBufHasher;
26import org.apache.arrow.vector.compare.VectorVisitor;
27import org.apache.arrow.vector.complex.reader.FieldReader;
28import org.apache.arrow.vector.types.Types.MinorType;
29import org.apache.arrow.vector.types.pojo.Field;
30import org.apache.arrow.vector.util.CallBack;
31import org.apache.arrow.vector.util.TransferPair;
32
33/**
34 * An abstraction that is used to store a sequence of values in an individual column.
35 *
36 * <p>A {@link ValueVector value vector} stores underlying data in-memory in a columnar fashion that is compact and
37 * efficient. The column whose data is stored, is referred by {@link #getField()}.
38 *
39 * <p>It is important that vector is allocated before attempting to read or write.
40 *
41 * <p>There are a few "rules" around vectors:
42 *
43 * <ul>
44 * <li>values need to be written in order (e.g. index 0, 1, 2, 5)</li>
45 * <li>null vectors start with all values as null before writing anything</li>
46 * <li>for variable width types, the offset vector should be all zeros before writing</li>
47 * <li>you must call setValueCount before a vector can be read</li>
48 * <li>you should never write to a vector once it has been read.</li>
49 * </ul>
50 *
51 * <p>Please note that the current implementation doesn't enforce those rules, hence we may find few places that
52 * deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector)
53 *
54 * <p>This interface "should" strive to guarantee this order of operation:
55 * <blockquote>
56 * allocate &gt; mutate &gt; setvaluecount &gt; access &gt; clear (or allocate to start the process over).
57 * </blockquote>
58 */
59public interface ValueVector extends Closeable, Iterable<ValueVector> {
60 /**
61 * Allocate new buffers. ValueVector implements logic to determine how much to allocate.
62 *
63 * @throws OutOfMemoryException Thrown if no memory can be allocated.
64 */
65 void allocateNew() throws OutOfMemoryException;
66
67 /**
68 * Allocates new buffers. ValueVector implements logic to determine how much to allocate.
69 *
70 * @return Returns true if allocation was successful.
71 */
72 boolean allocateNewSafe();
73
74 /**
75 * Allocate new buffer with double capacity, and copy data into the new buffer.
76 * Replace vector's buffer with new buffer, and release old one
77 */
78 void reAlloc();
79
80 BufferAllocator getAllocator();
81
82 /**
83 * Set the initial record capacity.
84 *
85 * @param numRecords the initial record capacity.
86 */
87 void setInitialCapacity(int numRecords);
88
89 /**
90 * Returns the maximum number of values that can be stored in this vector instance.
91 *
92 * @return the maximum number of values that can be stored in this vector instance.
93 */
94 int getValueCapacity();
95
96 /**
97 * Alternative to clear(). Allows use as an AutoCloseable in try-with-resources.
98 */
99 @Override
100 void close();
101
102 /**
103 * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the
104 * vector has any child vectors, they will also be cleared.
105 */
106 void clear();
107
108 /**
109 * Reset the ValueVector to the initial state without releasing any owned ArrowBuf.
110 * Buffer capacities will remain unchanged and any previous data will be zeroed out.
111 * This includes buffers for data, validity, offset, etc. If the vector has any
112 * child vectors, they will also be reset.
113 */
114 void reset();
115
116 /**
117 * Get information about how this field is materialized.
118 *
119 * @return the field corresponding to this vector
120 */
121 Field getField();
122
123 MinorType getMinorType();
124
125 /**
126 * To transfer quota responsibility.
127 *
128 * @param allocator the target allocator
129 * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of
130 * the same type.
131 */
132 TransferPair getTransferPair(BufferAllocator allocator);
133
134 TransferPair getTransferPair(String ref, BufferAllocator allocator);
135
136 TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack);
137
138 /**
139 * Makes a new transfer pair used to transfer underlying buffers.
140 *
141 * @param target the target for the transfer
142 * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to transfer underlying
143 * buffers into the target vector.
144 */
145 TransferPair makeTransferPair(ValueVector target);
146
147 /**
148 * Get a reader for this vector.
149 *
150 * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports reading values
151 * from this vector.
152 */
153 FieldReader getReader();
154
155 /**
156 * Get the number of bytes used by this vector.
157 *
158 * @return the number of bytes that is used by this vector instance.
159 */
160 int getBufferSize();
161
162 /**
163 * Returns the number of bytes that is used by this vector if it holds the given number
164 * of values. The result will be the same as if setValueCount() were called, followed
165 * by calling getBufferSize(), but without any of the closing side-effects that setValueCount()
166 * implies wrt finishing off the population of a vector. Some operations might wish to use
167 * this to determine how much memory has been used by a vector so far, even though it is
168 * not finished being populated.
169 *
170 * @param valueCount the number of values to assume this vector contains
171 * @return the buffer size if this vector is holding valueCount values
172 */
173 int getBufferSizeFor(int valueCount);
174
175 /**
176 * Return the underlying buffers associated with this vector. Note that this doesn't impact the reference counts for
177 * this buffer so it only should be used for in-context access. Also note that this buffer changes regularly thus
178 * external classes shouldn't hold a reference to it (unless they change it).
179 *
180 * @param clear Whether to clear vector before returning; the buffers will still be refcounted;
181 * but the returned array will be the only reference to them
182 * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
183 */
184 ArrowBuf[] getBuffers(boolean clear);
185
186 /**
187 * Gets the underlying buffer associated with validity vector.
188 *
189 * @return buffer
190 */
191 ArrowBuf getValidityBuffer();
192
193 /**
194 * Gets the underlying buffer associated with data vector.
195 *
196 * @return buffer
197 */
198 ArrowBuf getDataBuffer();
199
200 /**
201 * Gets the underlying buffer associated with offset vector.
202 *
203 * @return buffer
204 */
205 ArrowBuf getOffsetBuffer();
206
207 /**
208 * Gets the number of values.
209 *
210 * @return number of values in the vector
211 */
212 int getValueCount();
213
214 /**
215 * Set number of values in the vector.
216 */
217 void setValueCount(int valueCount);
218
219 /**
220 * Get friendly type object from the vector.
221 *
222 * @param index index of object to get
223 * @return friendly type object
224 */
225 Object getObject(int index);
226
227 /**
228 * Returns number of null elements in the vector.
229 *
230 * @return number of null elements
231 */
232 int getNullCount();
233
234 /**
235 * Check whether an element in the vector is null.
236 *
237 * @param index index to check for null
238 * @return true if element is null
239 */
240 boolean isNull(int index);
241
242 /**
243 * Returns hashCode of element in index with the default hasher.
244 */
245 int hashCode(int index);
246
247 /**
248 * Returns hashCode of element in index with the given hasher.
249 */
250 int hashCode(int index, ArrowBufHasher hasher);
251
252 /**
253 * Copy a cell value from a particular index in source vector to a particular
254 * position in this vector.
255 *
256 * @param fromIndex position to copy from in source vector
257 * @param thisIndex position to copy to in this vector
258 * @param from source vector
259 */
260 void copyFrom(int fromIndex, int thisIndex, ValueVector from);
261
262 /**
263 * Same as {@link #copyFrom(int, int, ValueVector)} except that
264 * it handles the case when the capacity of the vector needs to be expanded
265 * before copy.
266 *
267 * @param fromIndex position to copy from in source vector
268 * @param thisIndex position to copy to in this vector
269 * @param from source vector
270 */
271 void copyFromSafe(int fromIndex, int thisIndex, ValueVector from);
272
273 /**
274 * Accept a generic {@link VectorVisitor} and return the result.
275 * @param <OUT> the output result type.
276 * @param <IN> the input data together with visitor.
277 */
278 <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value);
279
280 /**
281 * Gets the name of the vector.
282 * @return the name of the vector.
283 */
284 String getName();
285}