]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.vector; | |
19 | ||
20 | import java.io.Closeable; | |
21 | ||
22 | import org.apache.arrow.memory.ArrowBuf; | |
23 | import org.apache.arrow.memory.BufferAllocator; | |
24 | import org.apache.arrow.memory.OutOfMemoryException; | |
25 | import org.apache.arrow.memory.util.hash.ArrowBufHasher; | |
26 | import org.apache.arrow.vector.compare.VectorVisitor; | |
27 | import org.apache.arrow.vector.complex.reader.FieldReader; | |
28 | import org.apache.arrow.vector.types.Types.MinorType; | |
29 | import org.apache.arrow.vector.types.pojo.Field; | |
30 | import org.apache.arrow.vector.util.CallBack; | |
31 | import org.apache.arrow.vector.util.TransferPair; | |
32 | ||
33 | /** | |
34 | * An abstraction that is used to store a sequence of values in an individual column. | |
35 | * | |
36 | * <p>A {@link ValueVector value vector} stores underlying data in-memory in a columnar fashion that is compact and | |
37 | * efficient. The column whose data is stored, is referred by {@link #getField()}. | |
38 | * | |
39 | * <p>It is important that vector is allocated before attempting to read or write. | |
40 | * | |
41 | * <p>There are a few "rules" around vectors: | |
42 | * | |
43 | * <ul> | |
44 | * <li>values need to be written in order (e.g. index 0, 1, 2, 5)</li> | |
45 | * <li>null vectors start with all values as null before writing anything</li> | |
46 | * <li>for variable width types, the offset vector should be all zeros before writing</li> | |
47 | * <li>you must call setValueCount before a vector can be read</li> | |
48 | * <li>you should never write to a vector once it has been read.</li> | |
49 | * </ul> | |
50 | * | |
51 | * <p>Please note that the current implementation doesn't enforce those rules, hence we may find few places that | |
52 | * deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector) | |
53 | * | |
54 | * <p>This interface "should" strive to guarantee this order of operation: | |
55 | * <blockquote> | |
56 | * allocate > mutate > setvaluecount > access > clear (or allocate to start the process over). | |
57 | * </blockquote> | |
58 | */ | |
59 | public interface ValueVector extends Closeable, Iterable<ValueVector> { | |
60 | /** | |
61 | * Allocate new buffers. ValueVector implements logic to determine how much to allocate. | |
62 | * | |
63 | * @throws OutOfMemoryException Thrown if no memory can be allocated. | |
64 | */ | |
65 | void allocateNew() throws OutOfMemoryException; | |
66 | ||
67 | /** | |
68 | * Allocates new buffers. ValueVector implements logic to determine how much to allocate. | |
69 | * | |
70 | * @return Returns true if allocation was successful. | |
71 | */ | |
72 | boolean allocateNewSafe(); | |
73 | ||
74 | /** | |
75 | * Allocate new buffer with double capacity, and copy data into the new buffer. | |
76 | * Replace vector's buffer with new buffer, and release old one | |
77 | */ | |
78 | void reAlloc(); | |
79 | ||
80 | BufferAllocator getAllocator(); | |
81 | ||
82 | /** | |
83 | * Set the initial record capacity. | |
84 | * | |
85 | * @param numRecords the initial record capacity. | |
86 | */ | |
87 | void setInitialCapacity(int numRecords); | |
88 | ||
89 | /** | |
90 | * Returns the maximum number of values that can be stored in this vector instance. | |
91 | * | |
92 | * @return the maximum number of values that can be stored in this vector instance. | |
93 | */ | |
94 | int getValueCapacity(); | |
95 | ||
96 | /** | |
97 | * Alternative to clear(). Allows use as an AutoCloseable in try-with-resources. | |
98 | */ | |
99 | @Override | |
100 | void close(); | |
101 | ||
102 | /** | |
103 | * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the | |
104 | * vector has any child vectors, they will also be cleared. | |
105 | */ | |
106 | void clear(); | |
107 | ||
108 | /** | |
109 | * Reset the ValueVector to the initial state without releasing any owned ArrowBuf. | |
110 | * Buffer capacities will remain unchanged and any previous data will be zeroed out. | |
111 | * This includes buffers for data, validity, offset, etc. If the vector has any | |
112 | * child vectors, they will also be reset. | |
113 | */ | |
114 | void reset(); | |
115 | ||
116 | /** | |
117 | * Get information about how this field is materialized. | |
118 | * | |
119 | * @return the field corresponding to this vector | |
120 | */ | |
121 | Field getField(); | |
122 | ||
123 | MinorType getMinorType(); | |
124 | ||
125 | /** | |
126 | * To transfer quota responsibility. | |
127 | * | |
128 | * @param allocator the target allocator | |
129 | * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of | |
130 | * the same type. | |
131 | */ | |
132 | TransferPair getTransferPair(BufferAllocator allocator); | |
133 | ||
134 | TransferPair getTransferPair(String ref, BufferAllocator allocator); | |
135 | ||
136 | TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack); | |
137 | ||
138 | /** | |
139 | * Makes a new transfer pair used to transfer underlying buffers. | |
140 | * | |
141 | * @param target the target for the transfer | |
142 | * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to transfer underlying | |
143 | * buffers into the target vector. | |
144 | */ | |
145 | TransferPair makeTransferPair(ValueVector target); | |
146 | ||
147 | /** | |
148 | * Get a reader for this vector. | |
149 | * | |
150 | * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports reading values | |
151 | * from this vector. | |
152 | */ | |
153 | FieldReader getReader(); | |
154 | ||
155 | /** | |
156 | * Get the number of bytes used by this vector. | |
157 | * | |
158 | * @return the number of bytes that is used by this vector instance. | |
159 | */ | |
160 | int getBufferSize(); | |
161 | ||
162 | /** | |
163 | * Returns the number of bytes that is used by this vector if it holds the given number | |
164 | * of values. The result will be the same as if setValueCount() were called, followed | |
165 | * by calling getBufferSize(), but without any of the closing side-effects that setValueCount() | |
166 | * implies wrt finishing off the population of a vector. Some operations might wish to use | |
167 | * this to determine how much memory has been used by a vector so far, even though it is | |
168 | * not finished being populated. | |
169 | * | |
170 | * @param valueCount the number of values to assume this vector contains | |
171 | * @return the buffer size if this vector is holding valueCount values | |
172 | */ | |
173 | int getBufferSizeFor(int valueCount); | |
174 | ||
175 | /** | |
176 | * Return the underlying buffers associated with this vector. Note that this doesn't impact the reference counts for | |
177 | * this buffer so it only should be used for in-context access. Also note that this buffer changes regularly thus | |
178 | * external classes shouldn't hold a reference to it (unless they change it). | |
179 | * | |
180 | * @param clear Whether to clear vector before returning; the buffers will still be refcounted; | |
181 | * but the returned array will be the only reference to them | |
182 | * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. | |
183 | */ | |
184 | ArrowBuf[] getBuffers(boolean clear); | |
185 | ||
186 | /** | |
187 | * Gets the underlying buffer associated with validity vector. | |
188 | * | |
189 | * @return buffer | |
190 | */ | |
191 | ArrowBuf getValidityBuffer(); | |
192 | ||
193 | /** | |
194 | * Gets the underlying buffer associated with data vector. | |
195 | * | |
196 | * @return buffer | |
197 | */ | |
198 | ArrowBuf getDataBuffer(); | |
199 | ||
200 | /** | |
201 | * Gets the underlying buffer associated with offset vector. | |
202 | * | |
203 | * @return buffer | |
204 | */ | |
205 | ArrowBuf getOffsetBuffer(); | |
206 | ||
207 | /** | |
208 | * Gets the number of values. | |
209 | * | |
210 | * @return number of values in the vector | |
211 | */ | |
212 | int getValueCount(); | |
213 | ||
214 | /** | |
215 | * Set number of values in the vector. | |
216 | */ | |
217 | void setValueCount(int valueCount); | |
218 | ||
219 | /** | |
220 | * Get friendly type object from the vector. | |
221 | * | |
222 | * @param index index of object to get | |
223 | * @return friendly type object | |
224 | */ | |
225 | Object getObject(int index); | |
226 | ||
227 | /** | |
228 | * Returns number of null elements in the vector. | |
229 | * | |
230 | * @return number of null elements | |
231 | */ | |
232 | int getNullCount(); | |
233 | ||
234 | /** | |
235 | * Check whether an element in the vector is null. | |
236 | * | |
237 | * @param index index to check for null | |
238 | * @return true if element is null | |
239 | */ | |
240 | boolean isNull(int index); | |
241 | ||
242 | /** | |
243 | * Returns hashCode of element in index with the default hasher. | |
244 | */ | |
245 | int hashCode(int index); | |
246 | ||
247 | /** | |
248 | * Returns hashCode of element in index with the given hasher. | |
249 | */ | |
250 | int hashCode(int index, ArrowBufHasher hasher); | |
251 | ||
252 | /** | |
253 | * Copy a cell value from a particular index in source vector to a particular | |
254 | * position in this vector. | |
255 | * | |
256 | * @param fromIndex position to copy from in source vector | |
257 | * @param thisIndex position to copy to in this vector | |
258 | * @param from source vector | |
259 | */ | |
260 | void copyFrom(int fromIndex, int thisIndex, ValueVector from); | |
261 | ||
262 | /** | |
263 | * Same as {@link #copyFrom(int, int, ValueVector)} except that | |
264 | * it handles the case when the capacity of the vector needs to be expanded | |
265 | * before copy. | |
266 | * | |
267 | * @param fromIndex position to copy from in source vector | |
268 | * @param thisIndex position to copy to in this vector | |
269 | * @param from source vector | |
270 | */ | |
271 | void copyFromSafe(int fromIndex, int thisIndex, ValueVector from); | |
272 | ||
273 | /** | |
274 | * Accept a generic {@link VectorVisitor} and return the result. | |
275 | * @param <OUT> the output result type. | |
276 | * @param <IN> the input data together with visitor. | |
277 | */ | |
278 | <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value); | |
279 | ||
280 | /** | |
281 | * Gets the name of the vector. | |
282 | * @return the name of the vector. | |
283 | */ | |
284 | String getName(); | |
285 | } |