]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / vector / src / test / java / org / apache / arrow / vector / ipc / BaseFileTest.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.arrow.vector.ipc;
19
20 import static org.apache.arrow.vector.TestUtils.newVarCharVector;
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertTrue;
24
25 import java.io.IOException;
26 import java.math.BigDecimal;
27 import java.math.BigInteger;
28 import java.nio.charset.StandardCharsets;
29 import java.time.LocalDateTime;
30 import java.time.LocalTime;
31 import java.time.ZoneId;
32 import java.time.ZoneOffset;
33 import java.util.Arrays;
34 import java.util.List;
35
36 import org.apache.arrow.memory.ArrowBuf;
37 import org.apache.arrow.memory.BufferAllocator;
38 import org.apache.arrow.memory.RootAllocator;
39 import org.apache.arrow.util.Collections2;
40 import org.apache.arrow.vector.BigIntVector;
41 import org.apache.arrow.vector.DateMilliVector;
42 import org.apache.arrow.vector.DecimalVector;
43 import org.apache.arrow.vector.FieldVector;
44 import org.apache.arrow.vector.IntVector;
45 import org.apache.arrow.vector.NullVector;
46 import org.apache.arrow.vector.TimeMilliVector;
47 import org.apache.arrow.vector.UInt1Vector;
48 import org.apache.arrow.vector.UInt2Vector;
49 import org.apache.arrow.vector.UInt4Vector;
50 import org.apache.arrow.vector.UInt8Vector;
51 import org.apache.arrow.vector.VarBinaryVector;
52 import org.apache.arrow.vector.VarCharVector;
53 import org.apache.arrow.vector.VectorSchemaRoot;
54 import org.apache.arrow.vector.complex.ListVector;
55 import org.apache.arrow.vector.complex.MapVector;
56 import org.apache.arrow.vector.complex.StructVector;
57 import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
58 import org.apache.arrow.vector.complex.impl.UnionListWriter;
59 import org.apache.arrow.vector.complex.impl.UnionMapReader;
60 import org.apache.arrow.vector.complex.impl.UnionMapWriter;
61 import org.apache.arrow.vector.complex.reader.FieldReader;
62 import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
63 import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
64 import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
65 import org.apache.arrow.vector.complex.writer.BigIntWriter;
66 import org.apache.arrow.vector.complex.writer.DateMilliWriter;
67 import org.apache.arrow.vector.complex.writer.Float4Writer;
68 import org.apache.arrow.vector.complex.writer.IntWriter;
69 import org.apache.arrow.vector.complex.writer.TimeMilliWriter;
70 import org.apache.arrow.vector.complex.writer.TimeStampMilliTZWriter;
71 import org.apache.arrow.vector.complex.writer.TimeStampMilliWriter;
72 import org.apache.arrow.vector.complex.writer.TimeStampNanoWriter;
73 import org.apache.arrow.vector.complex.writer.UInt1Writer;
74 import org.apache.arrow.vector.complex.writer.UInt2Writer;
75 import org.apache.arrow.vector.complex.writer.UInt4Writer;
76 import org.apache.arrow.vector.complex.writer.UInt8Writer;
77 import org.apache.arrow.vector.dictionary.Dictionary;
78 import org.apache.arrow.vector.dictionary.DictionaryEncoder;
79 import org.apache.arrow.vector.dictionary.DictionaryProvider;
80 import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder;
81 import org.apache.arrow.vector.types.pojo.ArrowType;
82 import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
83 import org.apache.arrow.vector.types.pojo.Field;
84 import org.apache.arrow.vector.types.pojo.FieldType;
85 import org.apache.arrow.vector.util.JsonStringArrayList;
86 import org.apache.arrow.vector.util.Text;
87 import org.junit.After;
88 import org.junit.Assert;
89 import org.junit.Before;
90 import org.slf4j.Logger;
91 import org.slf4j.LoggerFactory;
92
93 /**
94 * Helps testing the file formats.
95 */
96 public class BaseFileTest {
97 private static final Logger LOGGER = LoggerFactory.getLogger(BaseFileTest.class);
98 protected static final int COUNT = 10;
99 protected BufferAllocator allocator;
100
101 @Before
102 public void init() {
103 allocator = new RootAllocator(Integer.MAX_VALUE);
104 }
105
106 @After
107 public void tearDown() {
108 allocator.close();
109 }
110
111
112 private static short [] uint1Values = new short[]{0, 255, 1, 128, 2};
113 private static char [] uint2Values = new char[]{0, Character.MAX_VALUE, 1, Short.MAX_VALUE * 2, 2};
114 private static long [] uint4Values = new long[]{0, Integer.MAX_VALUE + 1L, 1, Integer.MAX_VALUE * 2L, 2};
115 private static BigInteger[] uint8Values = new BigInteger[]{BigInteger.valueOf(0),
116 BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.valueOf(2)), BigInteger.valueOf(2),
117 BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.valueOf(1)), BigInteger.valueOf(2)};
118
119 protected void writeData(int count, StructVector parent) {
120 ComplexWriter writer = new ComplexWriterImpl("root", parent);
121 StructWriter rootWriter = writer.rootAsStruct();
122 IntWriter intWriter = rootWriter.integer("int");
123 UInt1Writer uint1Writer = rootWriter.uInt1("uint1");
124 UInt2Writer uint2Writer = rootWriter.uInt2("uint2");
125 UInt4Writer uint4Writer = rootWriter.uInt4("uint4");
126 UInt8Writer uint8Writer = rootWriter.uInt8("uint8");
127 BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
128 Float4Writer float4Writer = rootWriter.float4("float");
129 for (int i = 0; i < count; i++) {
130 intWriter.setPosition(i);
131 intWriter.writeInt(i);
132 uint1Writer.setPosition(i);
133 // TODO: Fix add safe write methods on uint methods.
134 uint1Writer.setPosition(i);
135 uint1Writer.writeUInt1((byte) uint1Values[i % uint1Values.length] );
136 uint2Writer.setPosition(i);
137 uint2Writer.writeUInt2((char) uint2Values[i % uint2Values.length] );
138 uint4Writer.setPosition(i);
139 uint4Writer.writeUInt4((int) uint4Values[i % uint4Values.length] );
140 uint8Writer.setPosition(i);
141 uint8Writer.writeUInt8(uint8Values[i % uint8Values.length].longValue());
142 bigIntWriter.setPosition(i);
143 bigIntWriter.writeBigInt(i);
144 float4Writer.setPosition(i);
145 float4Writer.writeFloat4(i == 0 ? Float.NaN : i);
146 }
147 writer.setValueCount(count);
148 }
149
150
151 protected void validateContent(int count, VectorSchemaRoot root) {
152 for (int i = 0; i < count; i++) {
153 Assert.assertEquals(i, root.getVector("int").getObject(i));
154 Assert.assertEquals((Short) uint1Values[i % uint1Values.length],
155 ((UInt1Vector) root.getVector("uint1")).getObjectNoOverflow(i));
156 Assert.assertEquals("Failed for index: " + i, (Character) uint2Values[i % uint2Values.length],
157 (Character) ((UInt2Vector) root.getVector("uint2")).get(i));
158 Assert.assertEquals("Failed for index: " + i, (Long) uint4Values[i % uint4Values.length],
159 ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i));
160 Assert.assertEquals("Failed for index: " + i, uint8Values[i % uint8Values.length],
161 ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i));
162 Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i));
163 Assert.assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i));
164 }
165 }
166
167 protected void writeComplexData(int count, StructVector parent) {
168 ArrowBuf varchar = allocator.buffer(3);
169 varchar.readerIndex(0);
170 varchar.setByte(0, 'a');
171 varchar.setByte(1, 'b');
172 varchar.setByte(2, 'c');
173 varchar.writerIndex(3);
174 ComplexWriter writer = new ComplexWriterImpl("root", parent);
175 StructWriter rootWriter = writer.rootAsStruct();
176 IntWriter intWriter = rootWriter.integer("int");
177 BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
178 ListWriter listWriter = rootWriter.list("list");
179 StructWriter structWriter = rootWriter.struct("struct");
180 for (int i = 0; i < count; i++) {
181 if (i % 5 != 3) {
182 intWriter.setPosition(i);
183 intWriter.writeInt(i);
184 }
185 bigIntWriter.setPosition(i);
186 bigIntWriter.writeBigInt(i);
187 listWriter.setPosition(i);
188 listWriter.startList();
189 for (int j = 0; j < i % 3; j++) {
190 listWriter.varChar().writeVarChar(0, 3, varchar);
191 }
192 listWriter.endList();
193 structWriter.setPosition(i);
194 structWriter.start();
195 structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i);
196 structWriter.end();
197 }
198 writer.setValueCount(count);
199 varchar.getReferenceManager().release();
200 }
201
202 public void printVectors(List<FieldVector> vectors) {
203 for (FieldVector vector : vectors) {
204 LOGGER.debug(vector.getField().getName());
205 int valueCount = vector.getValueCount();
206 for (int i = 0; i < valueCount; i++) {
207 LOGGER.debug(String.valueOf(vector.getObject(i)));
208 }
209 }
210 }
211
212 protected void validateComplexContent(int count, VectorSchemaRoot root) {
213 Assert.assertEquals(count, root.getRowCount());
214 printVectors(root.getFieldVectors());
215 for (int i = 0; i < count; i++) {
216
217 Object intVal = root.getVector("int").getObject(i);
218 if (i % 5 != 3) {
219 Assert.assertEquals(i, intVal);
220 } else {
221 Assert.assertNull(intVal);
222 }
223 Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i));
224 Assert.assertEquals(i % 3, ((List<?>) root.getVector("list").getObject(i)).size());
225 NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder();
226 FieldReader structReader = root.getVector("struct").getReader();
227 structReader.setPosition(i);
228 structReader.reader("timestamp").read(h);
229 Assert.assertEquals(i, h.value);
230 }
231 }
232
233 private LocalDateTime makeDateTimeFromCount(int i) {
234 return LocalDateTime.of(2000 + i, 1 + i, 1 + i, i, i, i, i * 100_000_000 + i);
235 }
236
237 protected void writeDateTimeData(int count, StructVector parent) {
238 Assert.assertTrue(count < 100);
239 ComplexWriter writer = new ComplexWriterImpl("root", parent);
240 StructWriter rootWriter = writer.rootAsStruct();
241 DateMilliWriter dateWriter = rootWriter.dateMilli("date");
242 TimeMilliWriter timeWriter = rootWriter.timeMilli("time");
243 TimeStampMilliWriter timeStampMilliWriter = rootWriter.timeStampMilli("timestamp-milli");
244 TimeStampMilliTZWriter timeStampMilliTZWriter = rootWriter.timeStampMilliTZ("timestamp-milliTZ", "Europe/Paris");
245 TimeStampNanoWriter timeStampNanoWriter = rootWriter.timeStampNano("timestamp-nano");
246 for (int i = 0; i < count; i++) {
247 LocalDateTime dt = makeDateTimeFromCount(i);
248 // Number of days in milliseconds since epoch, stored as 64-bit integer, only date part is used
249 dateWriter.setPosition(i);
250 long dateLong = dt.toLocalDate().atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
251 dateWriter.writeDateMilli(dateLong);
252 // Time is a value in milliseconds since midnight, stored as 32-bit integer
253 timeWriter.setPosition(i);
254 int milliOfDay = (int) java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(dt.toLocalTime().toNanoOfDay());
255 timeWriter.writeTimeMilli(milliOfDay);
256 // Timestamp as milliseconds since the epoch, stored as 64-bit integer
257 timeStampMilliWriter.setPosition(i);
258 timeStampMilliWriter.writeTimeStampMilli(dt.toInstant(ZoneOffset.UTC).toEpochMilli());
259 // Timestamp as milliseconds since epoch with timezone
260 timeStampMilliTZWriter.setPosition(i);
261 timeStampMilliTZWriter.writeTimeStampMilliTZ(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli());
262 // Timestamp as nanoseconds since epoch
263 timeStampNanoWriter.setPosition(i);
264 long tsNanos = dt.toInstant(ZoneOffset.UTC).toEpochMilli() * 1_000_000 + i; // need to add back in nano val
265 timeStampNanoWriter.writeTimeStampNano(tsNanos);
266 }
267 writer.setValueCount(count);
268 }
269
270 protected void validateDateTimeContent(int count, VectorSchemaRoot root) {
271 Assert.assertEquals(count, root.getRowCount());
272 printVectors(root.getFieldVectors());
273 for (int i = 0; i < count; i++) {
274 LocalDateTime dt = makeDateTimeFromCount(i);
275 LocalDateTime dtMilli = dt.minusNanos(i);
276 LocalDateTime dateVal = ((DateMilliVector) root.getVector("date")).getObject(i);
277 LocalDateTime dateExpected = dt.toLocalDate().atStartOfDay();
278 Assert.assertEquals(dateExpected, dateVal);
279 LocalTime timeVal = ((TimeMilliVector) root.getVector("time")).getObject(i).toLocalTime();
280 Assert.assertEquals(dtMilli.toLocalTime(), timeVal);
281 Object timestampMilliVal = root.getVector("timestamp-milli").getObject(i);
282 Assert.assertEquals(dtMilli, timestampMilliVal);
283 Object timestampMilliTZVal = root.getVector("timestamp-milliTZ").getObject(i);
284 Assert.assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal);
285 Object timestampNanoVal = root.getVector("timestamp-nano").getObject(i);
286 Assert.assertEquals(dt, timestampNanoVal);
287 }
288 }
289
290 protected VectorSchemaRoot writeFlatDictionaryData(
291 BufferAllocator bufferAllocator,
292 DictionaryProvider.MapDictionaryProvider provider) {
293
294 // Define dictionaries and add to provider
295 VarCharVector dictionary1Vector = newVarCharVector("D1", bufferAllocator);
296 dictionary1Vector.allocateNewSafe();
297 dictionary1Vector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
298 dictionary1Vector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
299 dictionary1Vector.set(2, "baz".getBytes(StandardCharsets.UTF_8));
300 dictionary1Vector.setValueCount(3);
301
302 Dictionary dictionary1 = new Dictionary(dictionary1Vector, new DictionaryEncoding(1L, false, null));
303 provider.put(dictionary1);
304
305 VarCharVector dictionary2Vector = newVarCharVector("D2", bufferAllocator);
306 dictionary2Vector.allocateNewSafe();
307 dictionary2Vector.set(0, "micro".getBytes(StandardCharsets.UTF_8));
308 dictionary2Vector.set(1, "small".getBytes(StandardCharsets.UTF_8));
309 dictionary2Vector.set(2, "large".getBytes(StandardCharsets.UTF_8));
310 dictionary2Vector.setValueCount(3);
311
312 Dictionary dictionary2 = new Dictionary(dictionary2Vector, new DictionaryEncoding(2L, false, null));
313 provider.put(dictionary2);
314
315 // Populate the vectors
316 VarCharVector vector1A = newVarCharVector("varcharA", bufferAllocator);
317 vector1A.allocateNewSafe();
318 vector1A.set(0, "foo".getBytes(StandardCharsets.UTF_8));
319 vector1A.set(1, "bar".getBytes(StandardCharsets.UTF_8));
320 vector1A.set(3, "baz".getBytes(StandardCharsets.UTF_8));
321 vector1A.set(4, "bar".getBytes(StandardCharsets.UTF_8));
322 vector1A.set(5, "baz".getBytes(StandardCharsets.UTF_8));
323 vector1A.setValueCount(6);
324
325 FieldVector encodedVector1A = (FieldVector) DictionaryEncoder.encode(vector1A, dictionary1);
326 vector1A.close(); // Done with this vector after encoding
327
328 // Write this vector using indices instead of encoding
329 IntVector encodedVector1B = new IntVector("varcharB", bufferAllocator);
330 encodedVector1B.allocateNewSafe();
331 encodedVector1B.set(0, 2); // "baz"
332 encodedVector1B.set(1, 1); // "bar"
333 encodedVector1B.set(2, 2); // "baz"
334 encodedVector1B.set(4, 1); // "bar"
335 encodedVector1B.set(5, 0); // "foo"
336 encodedVector1B.setValueCount(6);
337
338 VarCharVector vector2 = newVarCharVector("sizes", bufferAllocator);
339 vector2.allocateNewSafe();
340 vector2.set(1, "large".getBytes(StandardCharsets.UTF_8));
341 vector2.set(2, "small".getBytes(StandardCharsets.UTF_8));
342 vector2.set(3, "small".getBytes(StandardCharsets.UTF_8));
343 vector2.set(4, "large".getBytes(StandardCharsets.UTF_8));
344 vector2.setValueCount(6);
345
346 FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary2);
347 vector2.close(); // Done with this vector after encoding
348
349 List<Field> fields = Arrays.asList(encodedVector1A.getField(), encodedVector1B.getField(),
350 encodedVector2.getField());
351 List<FieldVector> vectors = Collections2.asImmutableList(encodedVector1A, encodedVector1B, encodedVector2);
352
353 return new VectorSchemaRoot(fields, vectors, encodedVector1A.getValueCount());
354 }
355
356 protected void validateFlatDictionary(VectorSchemaRoot root, DictionaryProvider provider) {
357 FieldVector vector1A = root.getVector("varcharA");
358 Assert.assertNotNull(vector1A);
359
360 DictionaryEncoding encoding1A = vector1A.getField().getDictionary();
361 Assert.assertNotNull(encoding1A);
362 Assert.assertEquals(1L, encoding1A.getId());
363
364 Assert.assertEquals(6, vector1A.getValueCount());
365 Assert.assertEquals(0, vector1A.getObject(0));
366 Assert.assertEquals(1, vector1A.getObject(1));
367 Assert.assertEquals(null, vector1A.getObject(2));
368 Assert.assertEquals(2, vector1A.getObject(3));
369 Assert.assertEquals(1, vector1A.getObject(4));
370 Assert.assertEquals(2, vector1A.getObject(5));
371
372 FieldVector vector1B = root.getVector("varcharB");
373 Assert.assertNotNull(vector1B);
374
375 DictionaryEncoding encoding1B = vector1A.getField().getDictionary();
376 Assert.assertNotNull(encoding1B);
377 Assert.assertTrue(encoding1A.equals(encoding1B));
378 Assert.assertEquals(1L, encoding1B.getId());
379
380 Assert.assertEquals(6, vector1B.getValueCount());
381 Assert.assertEquals(2, vector1B.getObject(0));
382 Assert.assertEquals(1, vector1B.getObject(1));
383 Assert.assertEquals(2, vector1B.getObject(2));
384 Assert.assertEquals(null, vector1B.getObject(3));
385 Assert.assertEquals(1, vector1B.getObject(4));
386 Assert.assertEquals(0, vector1B.getObject(5));
387
388 FieldVector vector2 = root.getVector("sizes");
389 Assert.assertNotNull(vector2);
390
391 DictionaryEncoding encoding2 = vector2.getField().getDictionary();
392 Assert.assertNotNull(encoding2);
393 Assert.assertEquals(2L, encoding2.getId());
394
395 Assert.assertEquals(6, vector2.getValueCount());
396 Assert.assertEquals(null, vector2.getObject(0));
397 Assert.assertEquals(2, vector2.getObject(1));
398 Assert.assertEquals(1, vector2.getObject(2));
399 Assert.assertEquals(1, vector2.getObject(3));
400 Assert.assertEquals(2, vector2.getObject(4));
401 Assert.assertEquals(null, vector2.getObject(5));
402
403 Dictionary dictionary1 = provider.lookup(1L);
404 Assert.assertNotNull(dictionary1);
405 VarCharVector dictionaryVector = ((VarCharVector) dictionary1.getVector());
406 Assert.assertEquals(3, dictionaryVector.getValueCount());
407 Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
408 Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
409 Assert.assertEquals(new Text("baz"), dictionaryVector.getObject(2));
410
411 Dictionary dictionary2 = provider.lookup(2L);
412 Assert.assertNotNull(dictionary2);
413 dictionaryVector = ((VarCharVector) dictionary2.getVector());
414 Assert.assertEquals(3, dictionaryVector.getValueCount());
415 Assert.assertEquals(new Text("micro"), dictionaryVector.getObject(0));
416 Assert.assertEquals(new Text("small"), dictionaryVector.getObject(1));
417 Assert.assertEquals(new Text("large"), dictionaryVector.getObject(2));
418 }
419
420 protected VectorSchemaRoot writeNestedDictionaryData(
421 BufferAllocator bufferAllocator,
422 DictionaryProvider.MapDictionaryProvider provider) {
423
424 // Define the dictionary and add to the provider
425 VarCharVector dictionaryVector = newVarCharVector("D2", bufferAllocator);
426 dictionaryVector.allocateNewSafe();
427 dictionaryVector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
428 dictionaryVector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
429 dictionaryVector.setValueCount(2);
430
431 Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(2L, false, null));
432 provider.put(dictionary);
433
434 // Write the vector data using dictionary indices
435 ListVector listVector = ListVector.empty("list", bufferAllocator);
436 DictionaryEncoding encoding = dictionary.getEncoding();
437 listVector.addOrGetVector(new FieldType(true, encoding.getIndexType(), encoding));
438 listVector.allocateNew();
439 UnionListWriter listWriter = new UnionListWriter(listVector);
440 listWriter.startList();
441 listWriter.writeInt(0);
442 listWriter.writeInt(1);
443 listWriter.endList();
444 listWriter.startList();
445 listWriter.writeInt(0);
446 listWriter.endList();
447 listWriter.startList();
448 listWriter.writeInt(1);
449 listWriter.endList();
450 listWriter.setValueCount(3);
451
452 List<Field> fields = Collections2.asImmutableList(listVector.getField());
453 List<FieldVector> vectors = Collections2.asImmutableList(listVector);
454 return new VectorSchemaRoot(fields, vectors, 3);
455 }
456
457 protected void validateNestedDictionary(VectorSchemaRoot root, DictionaryProvider provider) {
458 FieldVector vector = root.getFieldVectors().get(0);
459 Assert.assertNotNull(vector);
460 Assert.assertNull(vector.getField().getDictionary());
461 Field nestedField = vector.getField().getChildren().get(0);
462
463 DictionaryEncoding encoding = nestedField.getDictionary();
464 Assert.assertNotNull(encoding);
465 Assert.assertEquals(2L, encoding.getId());
466 Assert.assertEquals(new ArrowType.Int(32, true), encoding.getIndexType());
467
468 Assert.assertEquals(3, vector.getValueCount());
469 Assert.assertEquals(Arrays.asList(0, 1), vector.getObject(0));
470 Assert.assertEquals(Arrays.asList(0), vector.getObject(1));
471 Assert.assertEquals(Arrays.asList(1), vector.getObject(2));
472
473 Dictionary dictionary = provider.lookup(2L);
474 Assert.assertNotNull(dictionary);
475 VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector());
476 Assert.assertEquals(2, dictionaryVector.getValueCount());
477 Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
478 Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
479 }
480
481 protected VectorSchemaRoot writeDecimalData(BufferAllocator bufferAllocator) {
482 DecimalVector decimalVector1 = new DecimalVector("decimal1", bufferAllocator, 10, 3);
483 DecimalVector decimalVector2 = new DecimalVector("decimal2", bufferAllocator, 4, 2);
484 DecimalVector decimalVector3 = new DecimalVector("decimal3", bufferAllocator, 16, 8);
485
486 int count = 10;
487 decimalVector1.allocateNew(count);
488 decimalVector2.allocateNew(count);
489 decimalVector3.allocateNew(count);
490
491 for (int i = 0; i < count; i++) {
492 decimalVector1.setSafe(i, new BigDecimal(BigInteger.valueOf(i), 3));
493 decimalVector2.setSafe(i, new BigDecimal(BigInteger.valueOf(i * (1 << 10)), 2));
494 decimalVector3.setSafe(i, new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), 8));
495 }
496
497 decimalVector1.setValueCount(count);
498 decimalVector2.setValueCount(count);
499 decimalVector3.setValueCount(count);
500
501 List<Field> fields = Collections2.asImmutableList(decimalVector1.getField(), decimalVector2.getField(),
502 decimalVector3.getField());
503 List<FieldVector> vectors = Collections2.asImmutableList(decimalVector1, decimalVector2, decimalVector3);
504 return new VectorSchemaRoot(fields, vectors, count);
505 }
506
507 protected void validateDecimalData(VectorSchemaRoot root) {
508 DecimalVector decimalVector1 = (DecimalVector) root.getVector("decimal1");
509 DecimalVector decimalVector2 = (DecimalVector) root.getVector("decimal2");
510 DecimalVector decimalVector3 = (DecimalVector) root.getVector("decimal3");
511 int count = 10;
512 Assert.assertEquals(count, root.getRowCount());
513
514 for (int i = 0; i < count; i++) {
515 // Verify decimal 1 vector
516 BigDecimal readValue = decimalVector1.getObject(i);
517 ArrowType.Decimal type = (ArrowType.Decimal) decimalVector1.getField().getType();
518 BigDecimal genValue = new BigDecimal(BigInteger.valueOf(i), type.getScale());
519 Assert.assertEquals(genValue, readValue);
520
521 // Verify decimal 2 vector
522 readValue = decimalVector2.getObject(i);
523 type = (ArrowType.Decimal) decimalVector2.getField().getType();
524 genValue = new BigDecimal(BigInteger.valueOf(i * (1 << 10)), type.getScale());
525 Assert.assertEquals(genValue, readValue);
526
527 // Verify decimal 3 vector
528 readValue = decimalVector3.getObject(i);
529 type = (ArrowType.Decimal) decimalVector3.getField().getType();
530 genValue = new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), type.getScale());
531 Assert.assertEquals(genValue, readValue);
532 }
533 }
534
535 protected VectorSchemaRoot writeNullData(int valueCount) {
536 NullVector nullVector1 = new NullVector("vector1");
537 NullVector nullVector2 = new NullVector("vector2");
538 nullVector1.setValueCount(valueCount);
539 nullVector2.setValueCount(valueCount);
540
541 List<Field> fields = Collections2.asImmutableList(nullVector1.getField(), nullVector2.getField());
542 List<FieldVector> vectors = Collections2.asImmutableList(nullVector1, nullVector2);
543 return new VectorSchemaRoot(fields, vectors, valueCount);
544 }
545
546 protected void validateNullData(VectorSchemaRoot root, int valueCount) {
547
548 NullVector vector1 = (NullVector) root.getFieldVectors().get(0);
549 NullVector vector2 = (NullVector) root.getFieldVectors().get(1);
550
551 assertEquals(valueCount, vector1.getValueCount());
552 assertEquals(valueCount, vector2.getValueCount());
553 }
554
555 public void validateUnionData(int count, VectorSchemaRoot root) {
556 FieldReader unionReader = root.getVector("union").getReader();
557 for (int i = 0; i < count; i++) {
558 unionReader.setPosition(i);
559 switch (i % 4) {
560 case 0:
561 Assert.assertEquals(i, unionReader.readInteger().intValue());
562 break;
563 case 1:
564 Assert.assertEquals(i, unionReader.readLong().longValue());
565 break;
566 case 2:
567 Assert.assertEquals(i % 3, unionReader.size());
568 break;
569 case 3:
570 NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder();
571 unionReader.reader("timestamp").read(h);
572 Assert.assertEquals(i, h.value);
573 break;
574 default:
575 assert false : "Unexpected value in switch statement: " + i;
576 }
577 }
578 }
579
580 public void writeUnionData(int count, StructVector parent) {
581 ArrowBuf varchar = allocator.buffer(3);
582 varchar.readerIndex(0);
583 varchar.setByte(0, 'a');
584 varchar.setByte(1, 'b');
585 varchar.setByte(2, 'c');
586 varchar.writerIndex(3);
587 ComplexWriter writer = new ComplexWriterImpl("root", parent);
588 StructWriter rootWriter = writer.rootAsStruct();
589 IntWriter intWriter = rootWriter.integer("union");
590 BigIntWriter bigIntWriter = rootWriter.bigInt("union");
591 ListWriter listWriter = rootWriter.list("union");
592 StructWriter structWriter = rootWriter.struct("union");
593 for (int i = 0; i < count; i++) {
594 switch (i % 4) {
595 case 0:
596 intWriter.setPosition(i);
597 intWriter.writeInt(i);
598 break;
599 case 1:
600 bigIntWriter.setPosition(i);
601 bigIntWriter.writeBigInt(i);
602 break;
603 case 2:
604 listWriter.setPosition(i);
605 listWriter.startList();
606 for (int j = 0; j < i % 3; j++) {
607 listWriter.varChar().writeVarChar(0, 3, varchar);
608 }
609 listWriter.endList();
610 break;
611 case 3:
612 structWriter.setPosition(i);
613 structWriter.start();
614 structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i);
615 structWriter.end();
616 break;
617 default:
618 assert false : "Unexpected value in switch statement: " + i;
619 }
620 }
621 writer.setValueCount(count);
622 varchar.getReferenceManager().release();
623 }
624
625 protected void writeVarBinaryData(int count, StructVector parent) {
626 Assert.assertTrue(count < 100);
627 ComplexWriter writer = new ComplexWriterImpl("root", parent);
628 StructWriter rootWriter = writer.rootAsStruct();
629 ListWriter listWriter = rootWriter.list("list");
630 ArrowBuf varbin = allocator.buffer(count);
631 for (int i = 0; i < count; i++) {
632 varbin.setByte(i, i);
633 listWriter.setPosition(i);
634 listWriter.startList();
635 for (int j = 0; j < i % 3; j++) {
636 listWriter.varBinary().writeVarBinary(0, i + 1, varbin);
637 }
638 listWriter.endList();
639 }
640 writer.setValueCount(count);
641 varbin.getReferenceManager().release();
642 }
643
644 protected void validateVarBinary(int count, VectorSchemaRoot root) {
645 Assert.assertEquals(count, root.getRowCount());
646 ListVector listVector = (ListVector) root.getVector("list");
647 byte[] expectedArray = new byte[count];
648 int numVarBinaryValues = 0;
649 for (int i = 0; i < count; i++) {
650 expectedArray[i] = (byte) i;
651 List<?> objList = listVector.getObject(i);
652 if (i % 3 == 0) {
653 Assert.assertTrue(objList.isEmpty());
654 } else {
655 byte[] expected = Arrays.copyOfRange(expectedArray, 0, i + 1);
656 for (int j = 0; j < i % 3; j++) {
657 byte[] result = (byte[]) objList.get(j);
658 Assert.assertArrayEquals(result, expected);
659 numVarBinaryValues++;
660 }
661 }
662 }
663
664 // ListVector lastSet should be the index of last value + 1
665 Assert.assertEquals(listVector.getLastSet(), count - 1);
666
667 // VarBinaryVector lastSet should be the index of last value
668 VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0);
669 Assert.assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1);
670 }
671
672 protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchemaRoot root) throws IOException {
673 writer.start();
674
675 vector.setNull(0);
676 vector.setSafe(1, 1);
677 vector.setSafe(2, 2);
678 vector.setNull(3);
679 vector.setSafe(4, 1);
680 vector.setValueCount(5);
681 root.setRowCount(5);
682 writer.writeBatch();
683
684 vector.setNull(0);
685 vector.setSafe(1, 1);
686 vector.setSafe(2, 2);
687 vector.setValueCount(3);
688 root.setRowCount(3);
689 writer.writeBatch();
690
691 writer.end();
692 }
693
694 protected void validateBatchData(ArrowReader reader, IntVector vector) throws IOException {
695 reader.loadNextBatch();
696
697 assertEquals(vector.getValueCount(), 5);
698 assertTrue(vector.isNull(0));
699 assertEquals(vector.get(1), 1);
700 assertEquals(vector.get(2), 2);
701 assertTrue(vector.isNull(3));
702 assertEquals(vector.get(4), 1);
703
704 reader.loadNextBatch();
705
706 assertEquals(vector.getValueCount(), 3);
707 assertTrue(vector.isNull(0));
708 assertEquals(vector.get(1), 1);
709 assertEquals(vector.get(2), 2);
710 }
711
712 protected VectorSchemaRoot writeMapData(BufferAllocator bufferAllocator) {
713 MapVector mapVector = MapVector.empty("map", bufferAllocator, false);
714 MapVector sortedMapVector = MapVector.empty("mapSorted", bufferAllocator, true);
715 mapVector.allocateNew();
716 sortedMapVector.allocateNew();
717 UnionMapWriter mapWriter = mapVector.getWriter();
718 UnionMapWriter sortedMapWriter = sortedMapVector.getWriter();
719
720 final int count = 10;
721 for (int i = 0; i < count; i++) {
722 // Write mapVector with NULL values
723 // i == 1 is a NULL
724 if (i != 1) {
725 mapWriter.setPosition(i);
726 mapWriter.startMap();
727 // i == 3 is an empty map
728 if (i != 3) {
729 for (int j = 0; j < i + 1; j++) {
730 mapWriter.startEntry();
731 mapWriter.key().bigInt().writeBigInt(j);
732 // i == 5 maps to a NULL value
733 if (i != 5) {
734 mapWriter.value().integer().writeInt(j);
735 }
736 mapWriter.endEntry();
737 }
738 }
739 mapWriter.endMap();
740 }
741 // Write sortedMapVector
742 sortedMapWriter.setPosition(i);
743 sortedMapWriter.startMap();
744 for (int j = 0; j < i + 1; j++) {
745 sortedMapWriter.startEntry();
746 sortedMapWriter.key().bigInt().writeBigInt(j);
747 sortedMapWriter.value().integer().writeInt(j);
748 sortedMapWriter.endEntry();
749 }
750 sortedMapWriter.endMap();
751 }
752 mapWriter.setValueCount(COUNT);
753 sortedMapWriter.setValueCount(COUNT);
754
755 List<Field> fields = Collections2.asImmutableList(mapVector.getField(), sortedMapVector.getField());
756 List<FieldVector> vectors = Collections2.asImmutableList(mapVector, sortedMapVector);
757 return new VectorSchemaRoot(fields, vectors, count);
758 }
759
760 protected void validateMapData(VectorSchemaRoot root) {
761 MapVector mapVector = (MapVector) root.getVector("map");
762 MapVector sortedMapVector = (MapVector) root.getVector("mapSorted");
763
764 final int count = 10;
765 Assert.assertEquals(count, root.getRowCount());
766
767 UnionMapReader mapReader = new UnionMapReader(mapVector);
768 UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector);
769 for (int i = 0; i < count; i++) {
770 // Read mapVector with NULL values
771 mapReader.setPosition(i);
772 if (i == 1) {
773 assertFalse(mapReader.isSet());
774 } else {
775 if (i == 3) {
776 JsonStringArrayList<?> result = (JsonStringArrayList<?>) mapReader.readObject();
777 assertTrue(result.isEmpty());
778 } else {
779 for (int j = 0; j < i + 1; j++) {
780 mapReader.next();
781 assertEquals(j, mapReader.key().readLong().longValue());
782 if (i == 5) {
783 assertFalse(mapReader.value().isSet());
784 } else {
785 assertEquals(j, mapReader.value().readInteger().intValue());
786 }
787 }
788 }
789 }
790 // Read sortedMapVector
791 sortedMapReader.setPosition(i);
792 for (int j = 0; j < i + 1; j++) {
793 sortedMapReader.next();
794 assertEquals(j, sortedMapReader.key().readLong().longValue());
795 assertEquals(j, sortedMapReader.value().readInteger().intValue());
796 }
797 }
798 }
799
800 protected VectorSchemaRoot writeListAsMapData(BufferAllocator bufferAllocator) {
801 ListVector mapEntryList = ListVector.empty("entryList", bufferAllocator);
802 FieldType mapEntryType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
803 StructVector mapEntryData = new StructVector("entryData", bufferAllocator, mapEntryType, null);
804 mapEntryData.addOrGet("myKey", new FieldType(false, new ArrowType.Int(64, true), null), BigIntVector.class);
805 mapEntryData.addOrGet("myValue", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
806 mapEntryList.initializeChildrenFromFields(Collections2.asImmutableList(mapEntryData.getField()));
807 UnionListWriter entryWriter = mapEntryList.getWriter();
808 entryWriter.allocate();
809
810 final int count = 10;
811 for (int i = 0; i < count; i++) {
812 entryWriter.setPosition(i);
813 entryWriter.startList();
814 for (int j = 0; j < i + 1; j++) {
815 entryWriter.struct().start();
816 entryWriter.struct().bigInt("myKey").writeBigInt(j);
817 entryWriter.struct().integer("myValue").writeInt(j);
818 entryWriter.struct().end();
819 }
820 entryWriter.endList();
821 }
822 entryWriter.setValueCount(COUNT);
823
824 MapVector mapVector = MapVector.empty("map", bufferAllocator, false);
825 mapEntryList.makeTransferPair(mapVector).transfer();
826
827 List<Field> fields = Collections2.asImmutableList(mapVector.getField());
828 List<FieldVector> vectors = Collections2.asImmutableList(mapVector);
829 return new VectorSchemaRoot(fields, vectors, count);
830 }
831
832 protected void validateListAsMapData(VectorSchemaRoot root) {
833 MapVector sortedMapVector = (MapVector) root.getVector("map");
834
835 final int count = 10;
836 Assert.assertEquals(count, root.getRowCount());
837
838 UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector);
839 sortedMapReader.setKeyValueNames("myKey", "myValue");
840 for (int i = 0; i < count; i++) {
841 sortedMapReader.setPosition(i);
842 for (int j = 0; j < i + 1; j++) {
843 sortedMapReader.next();
844 assertEquals(j, sortedMapReader.key().readLong().longValue());
845 assertEquals(j, sortedMapReader.value().readInteger().intValue());
846 }
847 }
848 }
849 }