]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.vector.complex; | |
19 | ||
20 | import static org.apache.arrow.util.Preconditions.checkNotNull; | |
21 | ||
22 | import java.util.ArrayList; | |
23 | import java.util.Collection; | |
24 | import java.util.Iterator; | |
25 | import java.util.List; | |
26 | import java.util.Map; | |
27 | ||
28 | import org.apache.arrow.memory.ArrowBuf; | |
29 | import org.apache.arrow.memory.BufferAllocator; | |
30 | import org.apache.arrow.memory.util.ByteFunctionHelpers; | |
31 | import org.apache.arrow.memory.util.hash.ArrowBufHasher; | |
32 | import org.apache.arrow.util.Preconditions; | |
33 | import org.apache.arrow.vector.DensityAwareVector; | |
34 | import org.apache.arrow.vector.FieldVector; | |
35 | import org.apache.arrow.vector.ValueVector; | |
36 | import org.apache.arrow.vector.compare.VectorVisitor; | |
37 | import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl; | |
38 | import org.apache.arrow.vector.complex.reader.FieldReader; | |
39 | import org.apache.arrow.vector.holders.ComplexHolder; | |
40 | import org.apache.arrow.vector.types.Types.MinorType; | |
41 | import org.apache.arrow.vector.types.pojo.ArrowType; | |
42 | import org.apache.arrow.vector.types.pojo.Field; | |
43 | import org.apache.arrow.vector.types.pojo.FieldType; | |
44 | import org.apache.arrow.vector.util.CallBack; | |
45 | import org.apache.arrow.vector.util.JsonStringHashMap; | |
46 | import org.apache.arrow.vector.util.TransferPair; | |
47 | ||
48 | /** | |
49 | * A struct vector that has no null values (and no validity buffer). | |
50 | * Child Vectors are handled in {@link AbstractStructVector}. | |
51 | */ | |
52 | public class NonNullableStructVector extends AbstractStructVector { | |
53 | ||
54 | public static NonNullableStructVector empty(String name, BufferAllocator allocator) { | |
55 | FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); | |
56 | return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false); | |
57 | } | |
58 | ||
59 | public static NonNullableStructVector emptyWithDuplicates(String name, BufferAllocator allocator) { | |
60 | FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); | |
61 | return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true); | |
62 | } | |
63 | ||
64 | private final SingleStructReaderImpl reader = new SingleStructReaderImpl(this); | |
65 | protected final FieldType fieldType; | |
66 | public int valueCount; | |
67 | ||
68 | /** | |
69 | * Constructs a new instance. | |
70 | * | |
71 | * @param name The name of the instance. | |
72 | * @param allocator The allocator to use to allocating/reallocating buffers. | |
73 | * @param fieldType The type of this list. | |
74 | */ | |
75 | public NonNullableStructVector(String name, | |
76 | BufferAllocator allocator, | |
77 | FieldType fieldType, | |
78 | CallBack callBack) { | |
79 | super(name, | |
80 | allocator, | |
81 | callBack, | |
82 | null, | |
83 | true); | |
84 | this.fieldType = checkNotNull(fieldType); | |
85 | this.valueCount = 0; | |
86 | } | |
87 | ||
88 | /** | |
89 | * Constructs a new instance. | |
90 | * | |
91 | * @param name The name of the instance. | |
92 | * @param allocator The allocator to use to allocating/reallocating buffers. | |
93 | * @param fieldType The type of this list. | |
94 | * @param callBack A schema change callback. | |
95 | * @param conflictPolicy How to handle duplicate field names in the struct. | |
96 | */ | |
97 | public NonNullableStructVector(String name, | |
98 | BufferAllocator allocator, | |
99 | FieldType fieldType, | |
100 | CallBack callBack, | |
101 | ConflictPolicy conflictPolicy, | |
102 | boolean allowConflictPolicyChanges) { | |
103 | super(name, allocator, callBack, conflictPolicy, allowConflictPolicyChanges); | |
104 | this.fieldType = checkNotNull(fieldType); | |
105 | this.valueCount = 0; | |
106 | } | |
107 | ||
108 | @Override | |
109 | public FieldReader getReader() { | |
110 | return reader; | |
111 | } | |
112 | ||
113 | private transient StructTransferPair ephPair; | |
114 | ||
115 | /** | |
116 | * Copies the element at fromIndex in the provided vector to thisIndex. Reallocates buffers | |
117 | * if thisIndex is larger then current capacity. | |
118 | */ | |
119 | @Override | |
120 | public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { | |
121 | Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); | |
122 | if (ephPair == null || ephPair.from != from) { | |
123 | ephPair = (StructTransferPair) from.makeTransferPair(this); | |
124 | } | |
125 | ephPair.copyValueSafe(fromIndex, thisIndex); | |
126 | } | |
127 | ||
128 | @Override | |
129 | public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { | |
130 | copyFrom(fromIndex, thisIndex, from); | |
131 | } | |
132 | ||
133 | @Override | |
134 | protected boolean supportsDirectRead() { | |
135 | return true; | |
136 | } | |
137 | ||
138 | public Iterator<String> fieldNameIterator() { | |
139 | return getChildFieldNames().iterator(); | |
140 | } | |
141 | ||
142 | @Override | |
143 | public void setInitialCapacity(int numRecords) { | |
144 | for (final ValueVector v : this) { | |
145 | v.setInitialCapacity(numRecords); | |
146 | } | |
147 | } | |
148 | ||
149 | @Override | |
150 | public void setInitialCapacity(int valueCount, double density) { | |
151 | for (final ValueVector vector : this) { | |
152 | if (vector instanceof DensityAwareVector) { | |
153 | ((DensityAwareVector) vector).setInitialCapacity(valueCount, density); | |
154 | } else { | |
155 | vector.setInitialCapacity(valueCount); | |
156 | } | |
157 | } | |
158 | } | |
159 | ||
160 | @Override | |
161 | public int getBufferSize() { | |
162 | if (valueCount == 0 || size() == 0) { | |
163 | return 0; | |
164 | } | |
165 | long buffer = 0; | |
166 | for (final ValueVector v : this) { | |
167 | buffer += v.getBufferSize(); | |
168 | } | |
169 | ||
170 | return (int) buffer; | |
171 | } | |
172 | ||
173 | @Override | |
174 | public int getBufferSizeFor(final int valueCount) { | |
175 | if (valueCount == 0) { | |
176 | return 0; | |
177 | } | |
178 | ||
179 | long bufferSize = 0; | |
180 | for (final ValueVector v : this) { | |
181 | bufferSize += v.getBufferSizeFor(valueCount); | |
182 | } | |
183 | ||
184 | return (int) bufferSize; | |
185 | } | |
186 | ||
187 | @Override | |
188 | public ArrowBuf getValidityBuffer() { | |
189 | throw new UnsupportedOperationException(); | |
190 | } | |
191 | ||
192 | @Override | |
193 | public ArrowBuf getDataBuffer() { | |
194 | throw new UnsupportedOperationException(); | |
195 | } | |
196 | ||
197 | @Override | |
198 | public ArrowBuf getOffsetBuffer() { | |
199 | throw new UnsupportedOperationException(); | |
200 | } | |
201 | ||
202 | @Override | |
203 | public TransferPair getTransferPair(BufferAllocator allocator) { | |
204 | return getTransferPair(name, allocator, null); | |
205 | } | |
206 | ||
207 | @Override | |
208 | public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { | |
209 | return new StructTransferPair(this, new NonNullableStructVector(name, | |
210 | allocator, | |
211 | fieldType, | |
212 | callBack, | |
213 | getConflictPolicy(), | |
214 | allowConflictPolicyChanges), false); | |
215 | } | |
216 | ||
217 | @Override | |
218 | public TransferPair makeTransferPair(ValueVector to) { | |
219 | return new StructTransferPair(this, (NonNullableStructVector) to); | |
220 | } | |
221 | ||
222 | @Override | |
223 | public TransferPair getTransferPair(String ref, BufferAllocator allocator) { | |
224 | return new StructTransferPair(this, new NonNullableStructVector(ref, | |
225 | allocator, | |
226 | fieldType, | |
227 | callBack, | |
228 | getConflictPolicy(), | |
229 | allowConflictPolicyChanges), false); | |
230 | } | |
231 | ||
232 | /** | |
233 | * {@link TransferPair} for this this class. | |
234 | */ | |
235 | protected static class StructTransferPair implements TransferPair { | |
236 | private final TransferPair[] pairs; | |
237 | private final NonNullableStructVector from; | |
238 | private final NonNullableStructVector to; | |
239 | ||
240 | public StructTransferPair(NonNullableStructVector from, NonNullableStructVector to) { | |
241 | this(from, to, true); | |
242 | } | |
243 | ||
244 | protected StructTransferPair(NonNullableStructVector from, NonNullableStructVector to, boolean allocate) { | |
245 | this.from = from; | |
246 | this.to = to; | |
247 | this.pairs = new TransferPair[from.size()]; | |
248 | this.to.ephPair = null; | |
249 | ||
250 | int i = 0; | |
251 | FieldVector vector; | |
252 | for (String child : from.getChildFieldNames()) { | |
253 | int preSize = to.size(); | |
254 | vector = from.getChild(child); | |
255 | if (vector == null) { | |
256 | continue; | |
257 | } | |
258 | //DRILL-1872: we add the child fields for the vector, looking up the field by name. For a map vector, | |
259 | // the child fields may be nested fields of the top level child. For example if the structure | |
260 | // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab. | |
261 | // But the children member of a Materialized field is a HashSet. If the fields are added in the | |
262 | // children HashSet, and the hashCode of the Materialized field includes the hash code of the | |
263 | // children, the hashCode value of oa changes *after* the field has been added to the HashSet. | |
264 | // (This is similar to what happens in ScanBatch where the children cannot be added till they are | |
265 | // read). To take care of this, we ensure that the hashCode of the MaterializedField does not | |
266 | // include the hashCode of the children but is based only on MaterializedField$key. | |
267 | final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass()); | |
268 | if (allocate && to.size() != preSize) { | |
269 | newVector.allocateNew(); | |
270 | } | |
271 | pairs[i++] = vector.makeTransferPair(newVector); | |
272 | } | |
273 | } | |
274 | ||
275 | @Override | |
276 | public void transfer() { | |
277 | for (final TransferPair p : pairs) { | |
278 | p.transfer(); | |
279 | } | |
280 | to.valueCount = from.valueCount; | |
281 | from.clear(); | |
282 | } | |
283 | ||
284 | @Override | |
285 | public ValueVector getTo() { | |
286 | return to; | |
287 | } | |
288 | ||
289 | @Override | |
290 | public void copyValueSafe(int from, int to) { | |
291 | for (TransferPair p : pairs) { | |
292 | p.copyValueSafe(from, to); | |
293 | } | |
294 | } | |
295 | ||
296 | @Override | |
297 | public void splitAndTransfer(int startIndex, int length) { | |
298 | for (TransferPair p : pairs) { | |
299 | p.splitAndTransfer(startIndex, length); | |
300 | } | |
301 | to.setValueCount(length); | |
302 | } | |
303 | } | |
304 | ||
305 | @Override | |
306 | public int getValueCapacity() { | |
307 | if (size() == 0) { | |
308 | return 0; | |
309 | } | |
310 | ||
311 | return getChildren().stream() | |
312 | .mapToInt(child -> child.getValueCapacity()) | |
313 | .min() | |
314 | .getAsInt(); | |
315 | } | |
316 | ||
317 | @Override | |
318 | public Map<String, ?> getObject(int index) { | |
319 | Map<String, Object> vv = new JsonStringHashMap<>(); | |
320 | for (String child : getChildFieldNames()) { | |
321 | ValueVector v = getChild(child); | |
322 | if (v != null && index < v.getValueCount()) { | |
323 | Object value = v.getObject(index); | |
324 | if (value != null) { | |
325 | vv.put(child, value); | |
326 | } | |
327 | } | |
328 | } | |
329 | return vv; | |
330 | } | |
331 | ||
332 | @Override | |
333 | public int hashCode(int index) { | |
334 | return hashCode(index, null); | |
335 | } | |
336 | ||
337 | @Override | |
338 | public int hashCode(int index, ArrowBufHasher hasher) { | |
339 | int hash = 0; | |
340 | for (FieldVector v : getChildren()) { | |
341 | if (index < v.getValueCount()) { | |
342 | hash = ByteFunctionHelpers.combineHash(hash, v.hashCode(index, hasher)); | |
343 | } | |
344 | } | |
345 | return hash; | |
346 | } | |
347 | ||
348 | @Override | |
349 | public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) { | |
350 | return visitor.visit(this, value); | |
351 | } | |
352 | ||
353 | @Override | |
354 | public boolean isNull(int index) { | |
355 | return false; | |
356 | } | |
357 | ||
358 | @Override | |
359 | public int getNullCount() { | |
360 | return 0; | |
361 | } | |
362 | ||
363 | public void get(int index, ComplexHolder holder) { | |
364 | reader.setPosition(index); | |
365 | holder.reader = reader; | |
366 | } | |
367 | ||
368 | @Override | |
369 | public int getValueCount() { | |
370 | return valueCount; | |
371 | } | |
372 | ||
373 | public ValueVector getVectorById(int id) { | |
374 | return getChildByOrdinal(id); | |
375 | } | |
376 | ||
377 | @Override | |
378 | public void setValueCount(int valueCount) { | |
379 | for (final ValueVector v : getChildren()) { | |
380 | v.setValueCount(valueCount); | |
381 | } | |
382 | NonNullableStructVector.this.valueCount = valueCount; | |
383 | } | |
384 | ||
385 | @Override | |
386 | public void clear() { | |
387 | for (final ValueVector v : getChildren()) { | |
388 | v.clear(); | |
389 | } | |
390 | valueCount = 0; | |
391 | } | |
392 | ||
393 | @Override | |
394 | public void reset() { | |
395 | for (final ValueVector v : getChildren()) { | |
396 | v.reset(); | |
397 | } | |
398 | valueCount = 0; | |
399 | } | |
400 | ||
401 | @Override | |
402 | public Field getField() { | |
403 | List<Field> children = new ArrayList<>(); | |
404 | for (ValueVector child : getChildren()) { | |
405 | children.add(child.getField()); | |
406 | } | |
407 | return new Field(name, fieldType, children); | |
408 | } | |
409 | ||
410 | @Override | |
411 | public MinorType getMinorType() { | |
412 | return MinorType.STRUCT; | |
413 | } | |
414 | ||
415 | @Override | |
416 | public void close() { | |
417 | final Collection<FieldVector> vectors = getChildren(); | |
418 | for (final FieldVector v : vectors) { | |
419 | v.close(); | |
420 | } | |
421 | vectors.clear(); | |
422 | ||
423 | valueCount = 0; | |
424 | ||
425 | super.close(); | |
426 | } | |
427 | ||
428 | /** Initializes the struct's members from the given Fields. */ | |
429 | public void initializeChildrenFromFields(List<Field> children) { | |
430 | for (Field field : children) { | |
431 | FieldVector vector = (FieldVector) this.add(field.getName(), field.getFieldType()); | |
432 | vector.initializeChildrenFromFields(field.getChildren()); | |
433 | } | |
434 | } | |
435 | ||
436 | public List<FieldVector> getChildrenFromFields() { | |
437 | return getChildren(); | |
438 | } | |
439 | ||
440 | } |