2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org
.apache
.arrow
.vector
.ipc
;
20 import static org
.apache
.arrow
.vector
.TestUtils
.newVarCharVector
;
21 import static org
.junit
.Assert
.assertEquals
;
22 import static org
.junit
.Assert
.assertFalse
;
23 import static org
.junit
.Assert
.assertTrue
;
25 import java
.io
.IOException
;
26 import java
.math
.BigDecimal
;
27 import java
.math
.BigInteger
;
28 import java
.nio
.charset
.StandardCharsets
;
29 import java
.time
.LocalDateTime
;
30 import java
.time
.LocalTime
;
31 import java
.time
.ZoneId
;
32 import java
.time
.ZoneOffset
;
33 import java
.util
.Arrays
;
34 import java
.util
.List
;
36 import org
.apache
.arrow
.memory
.ArrowBuf
;
37 import org
.apache
.arrow
.memory
.BufferAllocator
;
38 import org
.apache
.arrow
.memory
.RootAllocator
;
39 import org
.apache
.arrow
.util
.Collections2
;
40 import org
.apache
.arrow
.vector
.BigIntVector
;
41 import org
.apache
.arrow
.vector
.DateMilliVector
;
42 import org
.apache
.arrow
.vector
.DecimalVector
;
43 import org
.apache
.arrow
.vector
.FieldVector
;
44 import org
.apache
.arrow
.vector
.IntVector
;
45 import org
.apache
.arrow
.vector
.NullVector
;
46 import org
.apache
.arrow
.vector
.TimeMilliVector
;
47 import org
.apache
.arrow
.vector
.UInt1Vector
;
48 import org
.apache
.arrow
.vector
.UInt2Vector
;
49 import org
.apache
.arrow
.vector
.UInt4Vector
;
50 import org
.apache
.arrow
.vector
.UInt8Vector
;
51 import org
.apache
.arrow
.vector
.VarBinaryVector
;
52 import org
.apache
.arrow
.vector
.VarCharVector
;
53 import org
.apache
.arrow
.vector
.VectorSchemaRoot
;
54 import org
.apache
.arrow
.vector
.complex
.ListVector
;
55 import org
.apache
.arrow
.vector
.complex
.MapVector
;
56 import org
.apache
.arrow
.vector
.complex
.StructVector
;
57 import org
.apache
.arrow
.vector
.complex
.impl
.ComplexWriterImpl
;
58 import org
.apache
.arrow
.vector
.complex
.impl
.UnionListWriter
;
59 import org
.apache
.arrow
.vector
.complex
.impl
.UnionMapReader
;
60 import org
.apache
.arrow
.vector
.complex
.impl
.UnionMapWriter
;
61 import org
.apache
.arrow
.vector
.complex
.reader
.FieldReader
;
62 import org
.apache
.arrow
.vector
.complex
.writer
.BaseWriter
.ComplexWriter
;
63 import org
.apache
.arrow
.vector
.complex
.writer
.BaseWriter
.ListWriter
;
64 import org
.apache
.arrow
.vector
.complex
.writer
.BaseWriter
.StructWriter
;
65 import org
.apache
.arrow
.vector
.complex
.writer
.BigIntWriter
;
66 import org
.apache
.arrow
.vector
.complex
.writer
.DateMilliWriter
;
67 import org
.apache
.arrow
.vector
.complex
.writer
.Float4Writer
;
68 import org
.apache
.arrow
.vector
.complex
.writer
.IntWriter
;
69 import org
.apache
.arrow
.vector
.complex
.writer
.TimeMilliWriter
;
70 import org
.apache
.arrow
.vector
.complex
.writer
.TimeStampMilliTZWriter
;
71 import org
.apache
.arrow
.vector
.complex
.writer
.TimeStampMilliWriter
;
72 import org
.apache
.arrow
.vector
.complex
.writer
.TimeStampNanoWriter
;
73 import org
.apache
.arrow
.vector
.complex
.writer
.UInt1Writer
;
74 import org
.apache
.arrow
.vector
.complex
.writer
.UInt2Writer
;
75 import org
.apache
.arrow
.vector
.complex
.writer
.UInt4Writer
;
76 import org
.apache
.arrow
.vector
.complex
.writer
.UInt8Writer
;
77 import org
.apache
.arrow
.vector
.dictionary
.Dictionary
;
78 import org
.apache
.arrow
.vector
.dictionary
.DictionaryEncoder
;
79 import org
.apache
.arrow
.vector
.dictionary
.DictionaryProvider
;
80 import org
.apache
.arrow
.vector
.holders
.NullableTimeStampMilliHolder
;
81 import org
.apache
.arrow
.vector
.types
.pojo
.ArrowType
;
82 import org
.apache
.arrow
.vector
.types
.pojo
.DictionaryEncoding
;
83 import org
.apache
.arrow
.vector
.types
.pojo
.Field
;
84 import org
.apache
.arrow
.vector
.types
.pojo
.FieldType
;
85 import org
.apache
.arrow
.vector
.util
.JsonStringArrayList
;
86 import org
.apache
.arrow
.vector
.util
.Text
;
87 import org
.junit
.After
;
88 import org
.junit
.Assert
;
89 import org
.junit
.Before
;
90 import org
.slf4j
.Logger
;
91 import org
.slf4j
.LoggerFactory
;
94 * Helps testing the file formats.
96 public class BaseFileTest
{
97 private static final Logger LOGGER
= LoggerFactory
.getLogger(BaseFileTest
.class);
98 protected static final int COUNT
= 10;
99 protected BufferAllocator allocator
;
103 allocator
= new RootAllocator(Integer
.MAX_VALUE
);
107 public void tearDown() {
112 private static short [] uint1Values
= new short[]{0, 255, 1, 128, 2};
113 private static char [] uint2Values
= new char[]{0, Character
.MAX_VALUE
, 1, Short
.MAX_VALUE
* 2, 2};
114 private static long [] uint4Values
= new long[]{0, Integer
.MAX_VALUE
+ 1L, 1, Integer
.MAX_VALUE
* 2L, 2};
115 private static BigInteger
[] uint8Values
= new BigInteger
[]{BigInteger
.valueOf(0),
116 BigInteger
.valueOf(Long
.MAX_VALUE
).multiply(BigInteger
.valueOf(2)), BigInteger
.valueOf(2),
117 BigInteger
.valueOf(Long
.MAX_VALUE
).add(BigInteger
.valueOf(1)), BigInteger
.valueOf(2)};
119 protected void writeData(int count
, StructVector parent
) {
120 ComplexWriter writer
= new ComplexWriterImpl("root", parent
);
121 StructWriter rootWriter
= writer
.rootAsStruct();
122 IntWriter intWriter
= rootWriter
.integer("int");
123 UInt1Writer uint1Writer
= rootWriter
.uInt1("uint1");
124 UInt2Writer uint2Writer
= rootWriter
.uInt2("uint2");
125 UInt4Writer uint4Writer
= rootWriter
.uInt4("uint4");
126 UInt8Writer uint8Writer
= rootWriter
.uInt8("uint8");
127 BigIntWriter bigIntWriter
= rootWriter
.bigInt("bigInt");
128 Float4Writer float4Writer
= rootWriter
.float4("float");
129 for (int i
= 0; i
< count
; i
++) {
130 intWriter
.setPosition(i
);
131 intWriter
.writeInt(i
);
132 uint1Writer
.setPosition(i
);
133 // TODO: Fix add safe write methods on uint methods.
134 uint1Writer
.setPosition(i
);
135 uint1Writer
.writeUInt1((byte) uint1Values
[i
% uint1Values
.length
] );
136 uint2Writer
.setPosition(i
);
137 uint2Writer
.writeUInt2((char) uint2Values
[i
% uint2Values
.length
] );
138 uint4Writer
.setPosition(i
);
139 uint4Writer
.writeUInt4((int) uint4Values
[i
% uint4Values
.length
] );
140 uint8Writer
.setPosition(i
);
141 uint8Writer
.writeUInt8(uint8Values
[i
% uint8Values
.length
].longValue());
142 bigIntWriter
.setPosition(i
);
143 bigIntWriter
.writeBigInt(i
);
144 float4Writer
.setPosition(i
);
145 float4Writer
.writeFloat4(i
== 0 ? Float
.NaN
: i
);
147 writer
.setValueCount(count
);
151 protected void validateContent(int count
, VectorSchemaRoot root
) {
152 for (int i
= 0; i
< count
; i
++) {
153 Assert
.assertEquals(i
, root
.getVector("int").getObject(i
));
154 Assert
.assertEquals((Short
) uint1Values
[i
% uint1Values
.length
],
155 ((UInt1Vector
) root
.getVector("uint1")).getObjectNoOverflow(i
));
156 Assert
.assertEquals("Failed for index: " + i
, (Character
) uint2Values
[i
% uint2Values
.length
],
157 (Character
) ((UInt2Vector
) root
.getVector("uint2")).get(i
));
158 Assert
.assertEquals("Failed for index: " + i
, (Long
) uint4Values
[i
% uint4Values
.length
],
159 ((UInt4Vector
) root
.getVector("uint4")).getObjectNoOverflow(i
));
160 Assert
.assertEquals("Failed for index: " + i
, uint8Values
[i
% uint8Values
.length
],
161 ((UInt8Vector
) root
.getVector("uint8")).getObjectNoOverflow(i
));
162 Assert
.assertEquals(Long
.valueOf(i
), root
.getVector("bigInt").getObject(i
));
163 Assert
.assertEquals(i
== 0 ? Float
.NaN
: i
, root
.getVector("float").getObject(i
));
167 protected void writeComplexData(int count
, StructVector parent
) {
168 ArrowBuf varchar
= allocator
.buffer(3);
169 varchar
.readerIndex(0);
170 varchar
.setByte(0, 'a');
171 varchar
.setByte(1, 'b');
172 varchar
.setByte(2, 'c');
173 varchar
.writerIndex(3);
174 ComplexWriter writer
= new ComplexWriterImpl("root", parent
);
175 StructWriter rootWriter
= writer
.rootAsStruct();
176 IntWriter intWriter
= rootWriter
.integer("int");
177 BigIntWriter bigIntWriter
= rootWriter
.bigInt("bigInt");
178 ListWriter listWriter
= rootWriter
.list("list");
179 StructWriter structWriter
= rootWriter
.struct("struct");
180 for (int i
= 0; i
< count
; i
++) {
182 intWriter
.setPosition(i
);
183 intWriter
.writeInt(i
);
185 bigIntWriter
.setPosition(i
);
186 bigIntWriter
.writeBigInt(i
);
187 listWriter
.setPosition(i
);
188 listWriter
.startList();
189 for (int j
= 0; j
< i
% 3; j
++) {
190 listWriter
.varChar().writeVarChar(0, 3, varchar
);
192 listWriter
.endList();
193 structWriter
.setPosition(i
);
194 structWriter
.start();
195 structWriter
.timeStampMilli("timestamp").writeTimeStampMilli(i
);
198 writer
.setValueCount(count
);
199 varchar
.getReferenceManager().release();
202 public void printVectors(List
<FieldVector
> vectors
) {
203 for (FieldVector vector
: vectors
) {
204 LOGGER
.debug(vector
.getField().getName());
205 int valueCount
= vector
.getValueCount();
206 for (int i
= 0; i
< valueCount
; i
++) {
207 LOGGER
.debug(String
.valueOf(vector
.getObject(i
)));
212 protected void validateComplexContent(int count
, VectorSchemaRoot root
) {
213 Assert
.assertEquals(count
, root
.getRowCount());
214 printVectors(root
.getFieldVectors());
215 for (int i
= 0; i
< count
; i
++) {
217 Object intVal
= root
.getVector("int").getObject(i
);
219 Assert
.assertEquals(i
, intVal
);
221 Assert
.assertNull(intVal
);
223 Assert
.assertEquals(Long
.valueOf(i
), root
.getVector("bigInt").getObject(i
));
224 Assert
.assertEquals(i
% 3, ((List
<?
>) root
.getVector("list").getObject(i
)).size());
225 NullableTimeStampMilliHolder h
= new NullableTimeStampMilliHolder();
226 FieldReader structReader
= root
.getVector("struct").getReader();
227 structReader
.setPosition(i
);
228 structReader
.reader("timestamp").read(h
);
229 Assert
.assertEquals(i
, h
.value
);
233 private LocalDateTime
makeDateTimeFromCount(int i
) {
234 return LocalDateTime
.of(2000 + i
, 1 + i
, 1 + i
, i
, i
, i
, i
* 100_000_000
+ i
);
237 protected void writeDateTimeData(int count
, StructVector parent
) {
238 Assert
.assertTrue(count
< 100);
239 ComplexWriter writer
= new ComplexWriterImpl("root", parent
);
240 StructWriter rootWriter
= writer
.rootAsStruct();
241 DateMilliWriter dateWriter
= rootWriter
.dateMilli("date");
242 TimeMilliWriter timeWriter
= rootWriter
.timeMilli("time");
243 TimeStampMilliWriter timeStampMilliWriter
= rootWriter
.timeStampMilli("timestamp-milli");
244 TimeStampMilliTZWriter timeStampMilliTZWriter
= rootWriter
.timeStampMilliTZ("timestamp-milliTZ", "Europe/Paris");
245 TimeStampNanoWriter timeStampNanoWriter
= rootWriter
.timeStampNano("timestamp-nano");
246 for (int i
= 0; i
< count
; i
++) {
247 LocalDateTime dt
= makeDateTimeFromCount(i
);
248 // Number of days in milliseconds since epoch, stored as 64-bit integer, only date part is used
249 dateWriter
.setPosition(i
);
250 long dateLong
= dt
.toLocalDate().atStartOfDay().toInstant(ZoneOffset
.UTC
).toEpochMilli();
251 dateWriter
.writeDateMilli(dateLong
);
252 // Time is a value in milliseconds since midnight, stored as 32-bit integer
253 timeWriter
.setPosition(i
);
254 int milliOfDay
= (int) java
.util
.concurrent
.TimeUnit
.NANOSECONDS
.toMillis(dt
.toLocalTime().toNanoOfDay());
255 timeWriter
.writeTimeMilli(milliOfDay
);
256 // Timestamp as milliseconds since the epoch, stored as 64-bit integer
257 timeStampMilliWriter
.setPosition(i
);
258 timeStampMilliWriter
.writeTimeStampMilli(dt
.toInstant(ZoneOffset
.UTC
).toEpochMilli());
259 // Timestamp as milliseconds since epoch with timezone
260 timeStampMilliTZWriter
.setPosition(i
);
261 timeStampMilliTZWriter
.writeTimeStampMilliTZ(dt
.atZone(ZoneId
.of("Europe/Paris")).toInstant().toEpochMilli());
262 // Timestamp as nanoseconds since epoch
263 timeStampNanoWriter
.setPosition(i
);
264 long tsNanos
= dt
.toInstant(ZoneOffset
.UTC
).toEpochMilli() * 1_000_000
+ i
; // need to add back in nano val
265 timeStampNanoWriter
.writeTimeStampNano(tsNanos
);
267 writer
.setValueCount(count
);
270 protected void validateDateTimeContent(int count
, VectorSchemaRoot root
) {
271 Assert
.assertEquals(count
, root
.getRowCount());
272 printVectors(root
.getFieldVectors());
273 for (int i
= 0; i
< count
; i
++) {
274 LocalDateTime dt
= makeDateTimeFromCount(i
);
275 LocalDateTime dtMilli
= dt
.minusNanos(i
);
276 LocalDateTime dateVal
= ((DateMilliVector
) root
.getVector("date")).getObject(i
);
277 LocalDateTime dateExpected
= dt
.toLocalDate().atStartOfDay();
278 Assert
.assertEquals(dateExpected
, dateVal
);
279 LocalTime timeVal
= ((TimeMilliVector
) root
.getVector("time")).getObject(i
).toLocalTime();
280 Assert
.assertEquals(dtMilli
.toLocalTime(), timeVal
);
281 Object timestampMilliVal
= root
.getVector("timestamp-milli").getObject(i
);
282 Assert
.assertEquals(dtMilli
, timestampMilliVal
);
283 Object timestampMilliTZVal
= root
.getVector("timestamp-milliTZ").getObject(i
);
284 Assert
.assertEquals(dt
.atZone(ZoneId
.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal
);
285 Object timestampNanoVal
= root
.getVector("timestamp-nano").getObject(i
);
286 Assert
.assertEquals(dt
, timestampNanoVal
);
290 protected VectorSchemaRoot
writeFlatDictionaryData(
291 BufferAllocator bufferAllocator
,
292 DictionaryProvider
.MapDictionaryProvider provider
) {
294 // Define dictionaries and add to provider
295 VarCharVector dictionary1Vector
= newVarCharVector("D1", bufferAllocator
);
296 dictionary1Vector
.allocateNewSafe();
297 dictionary1Vector
.set(0, "foo".getBytes(StandardCharsets
.UTF_8
));
298 dictionary1Vector
.set(1, "bar".getBytes(StandardCharsets
.UTF_8
));
299 dictionary1Vector
.set(2, "baz".getBytes(StandardCharsets
.UTF_8
));
300 dictionary1Vector
.setValueCount(3);
302 Dictionary dictionary1
= new Dictionary(dictionary1Vector
, new DictionaryEncoding(1L, false, null));
303 provider
.put(dictionary1
);
305 VarCharVector dictionary2Vector
= newVarCharVector("D2", bufferAllocator
);
306 dictionary2Vector
.allocateNewSafe();
307 dictionary2Vector
.set(0, "micro".getBytes(StandardCharsets
.UTF_8
));
308 dictionary2Vector
.set(1, "small".getBytes(StandardCharsets
.UTF_8
));
309 dictionary2Vector
.set(2, "large".getBytes(StandardCharsets
.UTF_8
));
310 dictionary2Vector
.setValueCount(3);
312 Dictionary dictionary2
= new Dictionary(dictionary2Vector
, new DictionaryEncoding(2L, false, null));
313 provider
.put(dictionary2
);
315 // Populate the vectors
316 VarCharVector vector1A
= newVarCharVector("varcharA", bufferAllocator
);
317 vector1A
.allocateNewSafe();
318 vector1A
.set(0, "foo".getBytes(StandardCharsets
.UTF_8
));
319 vector1A
.set(1, "bar".getBytes(StandardCharsets
.UTF_8
));
320 vector1A
.set(3, "baz".getBytes(StandardCharsets
.UTF_8
));
321 vector1A
.set(4, "bar".getBytes(StandardCharsets
.UTF_8
));
322 vector1A
.set(5, "baz".getBytes(StandardCharsets
.UTF_8
));
323 vector1A
.setValueCount(6);
325 FieldVector encodedVector1A
= (FieldVector
) DictionaryEncoder
.encode(vector1A
, dictionary1
);
326 vector1A
.close(); // Done with this vector after encoding
328 // Write this vector using indices instead of encoding
329 IntVector encodedVector1B
= new IntVector("varcharB", bufferAllocator
);
330 encodedVector1B
.allocateNewSafe();
331 encodedVector1B
.set(0, 2); // "baz"
332 encodedVector1B
.set(1, 1); // "bar"
333 encodedVector1B
.set(2, 2); // "baz"
334 encodedVector1B
.set(4, 1); // "bar"
335 encodedVector1B
.set(5, 0); // "foo"
336 encodedVector1B
.setValueCount(6);
338 VarCharVector vector2
= newVarCharVector("sizes", bufferAllocator
);
339 vector2
.allocateNewSafe();
340 vector2
.set(1, "large".getBytes(StandardCharsets
.UTF_8
));
341 vector2
.set(2, "small".getBytes(StandardCharsets
.UTF_8
));
342 vector2
.set(3, "small".getBytes(StandardCharsets
.UTF_8
));
343 vector2
.set(4, "large".getBytes(StandardCharsets
.UTF_8
));
344 vector2
.setValueCount(6);
346 FieldVector encodedVector2
= (FieldVector
) DictionaryEncoder
.encode(vector2
, dictionary2
);
347 vector2
.close(); // Done with this vector after encoding
349 List
<Field
> fields
= Arrays
.asList(encodedVector1A
.getField(), encodedVector1B
.getField(),
350 encodedVector2
.getField());
351 List
<FieldVector
> vectors
= Collections2
.asImmutableList(encodedVector1A
, encodedVector1B
, encodedVector2
);
353 return new VectorSchemaRoot(fields
, vectors
, encodedVector1A
.getValueCount());
356 protected void validateFlatDictionary(VectorSchemaRoot root
, DictionaryProvider provider
) {
357 FieldVector vector1A
= root
.getVector("varcharA");
358 Assert
.assertNotNull(vector1A
);
360 DictionaryEncoding encoding1A
= vector1A
.getField().getDictionary();
361 Assert
.assertNotNull(encoding1A
);
362 Assert
.assertEquals(1L, encoding1A
.getId());
364 Assert
.assertEquals(6, vector1A
.getValueCount());
365 Assert
.assertEquals(0, vector1A
.getObject(0));
366 Assert
.assertEquals(1, vector1A
.getObject(1));
367 Assert
.assertEquals(null, vector1A
.getObject(2));
368 Assert
.assertEquals(2, vector1A
.getObject(3));
369 Assert
.assertEquals(1, vector1A
.getObject(4));
370 Assert
.assertEquals(2, vector1A
.getObject(5));
372 FieldVector vector1B
= root
.getVector("varcharB");
373 Assert
.assertNotNull(vector1B
);
375 DictionaryEncoding encoding1B
= vector1A
.getField().getDictionary();
376 Assert
.assertNotNull(encoding1B
);
377 Assert
.assertTrue(encoding1A
.equals(encoding1B
));
378 Assert
.assertEquals(1L, encoding1B
.getId());
380 Assert
.assertEquals(6, vector1B
.getValueCount());
381 Assert
.assertEquals(2, vector1B
.getObject(0));
382 Assert
.assertEquals(1, vector1B
.getObject(1));
383 Assert
.assertEquals(2, vector1B
.getObject(2));
384 Assert
.assertEquals(null, vector1B
.getObject(3));
385 Assert
.assertEquals(1, vector1B
.getObject(4));
386 Assert
.assertEquals(0, vector1B
.getObject(5));
388 FieldVector vector2
= root
.getVector("sizes");
389 Assert
.assertNotNull(vector2
);
391 DictionaryEncoding encoding2
= vector2
.getField().getDictionary();
392 Assert
.assertNotNull(encoding2
);
393 Assert
.assertEquals(2L, encoding2
.getId());
395 Assert
.assertEquals(6, vector2
.getValueCount());
396 Assert
.assertEquals(null, vector2
.getObject(0));
397 Assert
.assertEquals(2, vector2
.getObject(1));
398 Assert
.assertEquals(1, vector2
.getObject(2));
399 Assert
.assertEquals(1, vector2
.getObject(3));
400 Assert
.assertEquals(2, vector2
.getObject(4));
401 Assert
.assertEquals(null, vector2
.getObject(5));
403 Dictionary dictionary1
= provider
.lookup(1L);
404 Assert
.assertNotNull(dictionary1
);
405 VarCharVector dictionaryVector
= ((VarCharVector
) dictionary1
.getVector());
406 Assert
.assertEquals(3, dictionaryVector
.getValueCount());
407 Assert
.assertEquals(new Text("foo"), dictionaryVector
.getObject(0));
408 Assert
.assertEquals(new Text("bar"), dictionaryVector
.getObject(1));
409 Assert
.assertEquals(new Text("baz"), dictionaryVector
.getObject(2));
411 Dictionary dictionary2
= provider
.lookup(2L);
412 Assert
.assertNotNull(dictionary2
);
413 dictionaryVector
= ((VarCharVector
) dictionary2
.getVector());
414 Assert
.assertEquals(3, dictionaryVector
.getValueCount());
415 Assert
.assertEquals(new Text("micro"), dictionaryVector
.getObject(0));
416 Assert
.assertEquals(new Text("small"), dictionaryVector
.getObject(1));
417 Assert
.assertEquals(new Text("large"), dictionaryVector
.getObject(2));
420 protected VectorSchemaRoot
writeNestedDictionaryData(
421 BufferAllocator bufferAllocator
,
422 DictionaryProvider
.MapDictionaryProvider provider
) {
424 // Define the dictionary and add to the provider
425 VarCharVector dictionaryVector
= newVarCharVector("D2", bufferAllocator
);
426 dictionaryVector
.allocateNewSafe();
427 dictionaryVector
.set(0, "foo".getBytes(StandardCharsets
.UTF_8
));
428 dictionaryVector
.set(1, "bar".getBytes(StandardCharsets
.UTF_8
));
429 dictionaryVector
.setValueCount(2);
431 Dictionary dictionary
= new Dictionary(dictionaryVector
, new DictionaryEncoding(2L, false, null));
432 provider
.put(dictionary
);
434 // Write the vector data using dictionary indices
435 ListVector listVector
= ListVector
.empty("list", bufferAllocator
);
436 DictionaryEncoding encoding
= dictionary
.getEncoding();
437 listVector
.addOrGetVector(new FieldType(true, encoding
.getIndexType(), encoding
));
438 listVector
.allocateNew();
439 UnionListWriter listWriter
= new UnionListWriter(listVector
);
440 listWriter
.startList();
441 listWriter
.writeInt(0);
442 listWriter
.writeInt(1);
443 listWriter
.endList();
444 listWriter
.startList();
445 listWriter
.writeInt(0);
446 listWriter
.endList();
447 listWriter
.startList();
448 listWriter
.writeInt(1);
449 listWriter
.endList();
450 listWriter
.setValueCount(3);
452 List
<Field
> fields
= Collections2
.asImmutableList(listVector
.getField());
453 List
<FieldVector
> vectors
= Collections2
.asImmutableList(listVector
);
454 return new VectorSchemaRoot(fields
, vectors
, 3);
457 protected void validateNestedDictionary(VectorSchemaRoot root
, DictionaryProvider provider
) {
458 FieldVector vector
= root
.getFieldVectors().get(0);
459 Assert
.assertNotNull(vector
);
460 Assert
.assertNull(vector
.getField().getDictionary());
461 Field nestedField
= vector
.getField().getChildren().get(0);
463 DictionaryEncoding encoding
= nestedField
.getDictionary();
464 Assert
.assertNotNull(encoding
);
465 Assert
.assertEquals(2L, encoding
.getId());
466 Assert
.assertEquals(new ArrowType
.Int(32, true), encoding
.getIndexType());
468 Assert
.assertEquals(3, vector
.getValueCount());
469 Assert
.assertEquals(Arrays
.asList(0, 1), vector
.getObject(0));
470 Assert
.assertEquals(Arrays
.asList(0), vector
.getObject(1));
471 Assert
.assertEquals(Arrays
.asList(1), vector
.getObject(2));
473 Dictionary dictionary
= provider
.lookup(2L);
474 Assert
.assertNotNull(dictionary
);
475 VarCharVector dictionaryVector
= ((VarCharVector
) dictionary
.getVector());
476 Assert
.assertEquals(2, dictionaryVector
.getValueCount());
477 Assert
.assertEquals(new Text("foo"), dictionaryVector
.getObject(0));
478 Assert
.assertEquals(new Text("bar"), dictionaryVector
.getObject(1));
481 protected VectorSchemaRoot
writeDecimalData(BufferAllocator bufferAllocator
) {
482 DecimalVector decimalVector1
= new DecimalVector("decimal1", bufferAllocator
, 10, 3);
483 DecimalVector decimalVector2
= new DecimalVector("decimal2", bufferAllocator
, 4, 2);
484 DecimalVector decimalVector3
= new DecimalVector("decimal3", bufferAllocator
, 16, 8);
487 decimalVector1
.allocateNew(count
);
488 decimalVector2
.allocateNew(count
);
489 decimalVector3
.allocateNew(count
);
491 for (int i
= 0; i
< count
; i
++) {
492 decimalVector1
.setSafe(i
, new BigDecimal(BigInteger
.valueOf(i
), 3));
493 decimalVector2
.setSafe(i
, new BigDecimal(BigInteger
.valueOf(i
* (1 << 10)), 2));
494 decimalVector3
.setSafe(i
, new BigDecimal(BigInteger
.valueOf(i
* 1111111111111111L), 8));
497 decimalVector1
.setValueCount(count
);
498 decimalVector2
.setValueCount(count
);
499 decimalVector3
.setValueCount(count
);
501 List
<Field
> fields
= Collections2
.asImmutableList(decimalVector1
.getField(), decimalVector2
.getField(),
502 decimalVector3
.getField());
503 List
<FieldVector
> vectors
= Collections2
.asImmutableList(decimalVector1
, decimalVector2
, decimalVector3
);
504 return new VectorSchemaRoot(fields
, vectors
, count
);
507 protected void validateDecimalData(VectorSchemaRoot root
) {
508 DecimalVector decimalVector1
= (DecimalVector
) root
.getVector("decimal1");
509 DecimalVector decimalVector2
= (DecimalVector
) root
.getVector("decimal2");
510 DecimalVector decimalVector3
= (DecimalVector
) root
.getVector("decimal3");
512 Assert
.assertEquals(count
, root
.getRowCount());
514 for (int i
= 0; i
< count
; i
++) {
515 // Verify decimal 1 vector
516 BigDecimal readValue
= decimalVector1
.getObject(i
);
517 ArrowType
.Decimal type
= (ArrowType
.Decimal
) decimalVector1
.getField().getType();
518 BigDecimal genValue
= new BigDecimal(BigInteger
.valueOf(i
), type
.getScale());
519 Assert
.assertEquals(genValue
, readValue
);
521 // Verify decimal 2 vector
522 readValue
= decimalVector2
.getObject(i
);
523 type
= (ArrowType
.Decimal
) decimalVector2
.getField().getType();
524 genValue
= new BigDecimal(BigInteger
.valueOf(i
* (1 << 10)), type
.getScale());
525 Assert
.assertEquals(genValue
, readValue
);
527 // Verify decimal 3 vector
528 readValue
= decimalVector3
.getObject(i
);
529 type
= (ArrowType
.Decimal
) decimalVector3
.getField().getType();
530 genValue
= new BigDecimal(BigInteger
.valueOf(i
* 1111111111111111L), type
.getScale());
531 Assert
.assertEquals(genValue
, readValue
);
535 protected VectorSchemaRoot
writeNullData(int valueCount
) {
536 NullVector nullVector1
= new NullVector("vector1");
537 NullVector nullVector2
= new NullVector("vector2");
538 nullVector1
.setValueCount(valueCount
);
539 nullVector2
.setValueCount(valueCount
);
541 List
<Field
> fields
= Collections2
.asImmutableList(nullVector1
.getField(), nullVector2
.getField());
542 List
<FieldVector
> vectors
= Collections2
.asImmutableList(nullVector1
, nullVector2
);
543 return new VectorSchemaRoot(fields
, vectors
, valueCount
);
546 protected void validateNullData(VectorSchemaRoot root
, int valueCount
) {
548 NullVector vector1
= (NullVector
) root
.getFieldVectors().get(0);
549 NullVector vector2
= (NullVector
) root
.getFieldVectors().get(1);
551 assertEquals(valueCount
, vector1
.getValueCount());
552 assertEquals(valueCount
, vector2
.getValueCount());
555 public void validateUnionData(int count
, VectorSchemaRoot root
) {
556 FieldReader unionReader
= root
.getVector("union").getReader();
557 for (int i
= 0; i
< count
; i
++) {
558 unionReader
.setPosition(i
);
561 Assert
.assertEquals(i
, unionReader
.readInteger().intValue());
564 Assert
.assertEquals(i
, unionReader
.readLong().longValue());
567 Assert
.assertEquals(i
% 3, unionReader
.size());
570 NullableTimeStampMilliHolder h
= new NullableTimeStampMilliHolder();
571 unionReader
.reader("timestamp").read(h
);
572 Assert
.assertEquals(i
, h
.value
);
575 assert false : "Unexpected value in switch statement: " + i
;
580 public void writeUnionData(int count
, StructVector parent
) {
581 ArrowBuf varchar
= allocator
.buffer(3);
582 varchar
.readerIndex(0);
583 varchar
.setByte(0, 'a');
584 varchar
.setByte(1, 'b');
585 varchar
.setByte(2, 'c');
586 varchar
.writerIndex(3);
587 ComplexWriter writer
= new ComplexWriterImpl("root", parent
);
588 StructWriter rootWriter
= writer
.rootAsStruct();
589 IntWriter intWriter
= rootWriter
.integer("union");
590 BigIntWriter bigIntWriter
= rootWriter
.bigInt("union");
591 ListWriter listWriter
= rootWriter
.list("union");
592 StructWriter structWriter
= rootWriter
.struct("union");
593 for (int i
= 0; i
< count
; i
++) {
596 intWriter
.setPosition(i
);
597 intWriter
.writeInt(i
);
600 bigIntWriter
.setPosition(i
);
601 bigIntWriter
.writeBigInt(i
);
604 listWriter
.setPosition(i
);
605 listWriter
.startList();
606 for (int j
= 0; j
< i
% 3; j
++) {
607 listWriter
.varChar().writeVarChar(0, 3, varchar
);
609 listWriter
.endList();
612 structWriter
.setPosition(i
);
613 structWriter
.start();
614 structWriter
.timeStampMilli("timestamp").writeTimeStampMilli(i
);
618 assert false : "Unexpected value in switch statement: " + i
;
621 writer
.setValueCount(count
);
622 varchar
.getReferenceManager().release();
625 protected void writeVarBinaryData(int count
, StructVector parent
) {
626 Assert
.assertTrue(count
< 100);
627 ComplexWriter writer
= new ComplexWriterImpl("root", parent
);
628 StructWriter rootWriter
= writer
.rootAsStruct();
629 ListWriter listWriter
= rootWriter
.list("list");
630 ArrowBuf varbin
= allocator
.buffer(count
);
631 for (int i
= 0; i
< count
; i
++) {
632 varbin
.setByte(i
, i
);
633 listWriter
.setPosition(i
);
634 listWriter
.startList();
635 for (int j
= 0; j
< i
% 3; j
++) {
636 listWriter
.varBinary().writeVarBinary(0, i
+ 1, varbin
);
638 listWriter
.endList();
640 writer
.setValueCount(count
);
641 varbin
.getReferenceManager().release();
644 protected void validateVarBinary(int count
, VectorSchemaRoot root
) {
645 Assert
.assertEquals(count
, root
.getRowCount());
646 ListVector listVector
= (ListVector
) root
.getVector("list");
647 byte[] expectedArray
= new byte[count
];
648 int numVarBinaryValues
= 0;
649 for (int i
= 0; i
< count
; i
++) {
650 expectedArray
[i
] = (byte) i
;
651 List
<?
> objList
= listVector
.getObject(i
);
653 Assert
.assertTrue(objList
.isEmpty());
655 byte[] expected
= Arrays
.copyOfRange(expectedArray
, 0, i
+ 1);
656 for (int j
= 0; j
< i
% 3; j
++) {
657 byte[] result
= (byte[]) objList
.get(j
);
658 Assert
.assertArrayEquals(result
, expected
);
659 numVarBinaryValues
++;
664 // ListVector lastSet should be the index of last value + 1
665 Assert
.assertEquals(listVector
.getLastSet(), count
- 1);
667 // VarBinaryVector lastSet should be the index of last value
668 VarBinaryVector binaryVector
= (VarBinaryVector
) listVector
.getChildrenFromFields().get(0);
669 Assert
.assertEquals(binaryVector
.getLastSet(), numVarBinaryValues
- 1);
672 protected void writeBatchData(ArrowWriter writer
, IntVector vector
, VectorSchemaRoot root
) throws IOException
{
676 vector
.setSafe(1, 1);
677 vector
.setSafe(2, 2);
679 vector
.setSafe(4, 1);
680 vector
.setValueCount(5);
685 vector
.setSafe(1, 1);
686 vector
.setSafe(2, 2);
687 vector
.setValueCount(3);
694 protected void validateBatchData(ArrowReader reader
, IntVector vector
) throws IOException
{
695 reader
.loadNextBatch();
697 assertEquals(vector
.getValueCount(), 5);
698 assertTrue(vector
.isNull(0));
699 assertEquals(vector
.get(1), 1);
700 assertEquals(vector
.get(2), 2);
701 assertTrue(vector
.isNull(3));
702 assertEquals(vector
.get(4), 1);
704 reader
.loadNextBatch();
706 assertEquals(vector
.getValueCount(), 3);
707 assertTrue(vector
.isNull(0));
708 assertEquals(vector
.get(1), 1);
709 assertEquals(vector
.get(2), 2);
712 protected VectorSchemaRoot
writeMapData(BufferAllocator bufferAllocator
) {
713 MapVector mapVector
= MapVector
.empty("map", bufferAllocator
, false);
714 MapVector sortedMapVector
= MapVector
.empty("mapSorted", bufferAllocator
, true);
715 mapVector
.allocateNew();
716 sortedMapVector
.allocateNew();
717 UnionMapWriter mapWriter
= mapVector
.getWriter();
718 UnionMapWriter sortedMapWriter
= sortedMapVector
.getWriter();
720 final int count
= 10;
721 for (int i
= 0; i
< count
; i
++) {
722 // Write mapVector with NULL values
725 mapWriter
.setPosition(i
);
726 mapWriter
.startMap();
727 // i == 3 is an empty map
729 for (int j
= 0; j
< i
+ 1; j
++) {
730 mapWriter
.startEntry();
731 mapWriter
.key().bigInt().writeBigInt(j
);
732 // i == 5 maps to a NULL value
734 mapWriter
.value().integer().writeInt(j
);
736 mapWriter
.endEntry();
741 // Write sortedMapVector
742 sortedMapWriter
.setPosition(i
);
743 sortedMapWriter
.startMap();
744 for (int j
= 0; j
< i
+ 1; j
++) {
745 sortedMapWriter
.startEntry();
746 sortedMapWriter
.key().bigInt().writeBigInt(j
);
747 sortedMapWriter
.value().integer().writeInt(j
);
748 sortedMapWriter
.endEntry();
750 sortedMapWriter
.endMap();
752 mapWriter
.setValueCount(COUNT
);
753 sortedMapWriter
.setValueCount(COUNT
);
755 List
<Field
> fields
= Collections2
.asImmutableList(mapVector
.getField(), sortedMapVector
.getField());
756 List
<FieldVector
> vectors
= Collections2
.asImmutableList(mapVector
, sortedMapVector
);
757 return new VectorSchemaRoot(fields
, vectors
, count
);
760 protected void validateMapData(VectorSchemaRoot root
) {
761 MapVector mapVector
= (MapVector
) root
.getVector("map");
762 MapVector sortedMapVector
= (MapVector
) root
.getVector("mapSorted");
764 final int count
= 10;
765 Assert
.assertEquals(count
, root
.getRowCount());
767 UnionMapReader mapReader
= new UnionMapReader(mapVector
);
768 UnionMapReader sortedMapReader
= new UnionMapReader(sortedMapVector
);
769 for (int i
= 0; i
< count
; i
++) {
770 // Read mapVector with NULL values
771 mapReader
.setPosition(i
);
773 assertFalse(mapReader
.isSet());
776 JsonStringArrayList
<?
> result
= (JsonStringArrayList
<?
>) mapReader
.readObject();
777 assertTrue(result
.isEmpty());
779 for (int j
= 0; j
< i
+ 1; j
++) {
781 assertEquals(j
, mapReader
.key().readLong().longValue());
783 assertFalse(mapReader
.value().isSet());
785 assertEquals(j
, mapReader
.value().readInteger().intValue());
790 // Read sortedMapVector
791 sortedMapReader
.setPosition(i
);
792 for (int j
= 0; j
< i
+ 1; j
++) {
793 sortedMapReader
.next();
794 assertEquals(j
, sortedMapReader
.key().readLong().longValue());
795 assertEquals(j
, sortedMapReader
.value().readInteger().intValue());
800 protected VectorSchemaRoot
writeListAsMapData(BufferAllocator bufferAllocator
) {
801 ListVector mapEntryList
= ListVector
.empty("entryList", bufferAllocator
);
802 FieldType mapEntryType
= new FieldType(false, ArrowType
.Struct
.INSTANCE
, null, null);
803 StructVector mapEntryData
= new StructVector("entryData", bufferAllocator
, mapEntryType
, null);
804 mapEntryData
.addOrGet("myKey", new FieldType(false, new ArrowType
.Int(64, true), null), BigIntVector
.class);
805 mapEntryData
.addOrGet("myValue", FieldType
.nullable(new ArrowType
.Int(32, true)), IntVector
.class);
806 mapEntryList
.initializeChildrenFromFields(Collections2
.asImmutableList(mapEntryData
.getField()));
807 UnionListWriter entryWriter
= mapEntryList
.getWriter();
808 entryWriter
.allocate();
810 final int count
= 10;
811 for (int i
= 0; i
< count
; i
++) {
812 entryWriter
.setPosition(i
);
813 entryWriter
.startList();
814 for (int j
= 0; j
< i
+ 1; j
++) {
815 entryWriter
.struct().start();
816 entryWriter
.struct().bigInt("myKey").writeBigInt(j
);
817 entryWriter
.struct().integer("myValue").writeInt(j
);
818 entryWriter
.struct().end();
820 entryWriter
.endList();
822 entryWriter
.setValueCount(COUNT
);
824 MapVector mapVector
= MapVector
.empty("map", bufferAllocator
, false);
825 mapEntryList
.makeTransferPair(mapVector
).transfer();
827 List
<Field
> fields
= Collections2
.asImmutableList(mapVector
.getField());
828 List
<FieldVector
> vectors
= Collections2
.asImmutableList(mapVector
);
829 return new VectorSchemaRoot(fields
, vectors
, count
);
832 protected void validateListAsMapData(VectorSchemaRoot root
) {
833 MapVector sortedMapVector
= (MapVector
) root
.getVector("map");
835 final int count
= 10;
836 Assert
.assertEquals(count
, root
.getRowCount());
838 UnionMapReader sortedMapReader
= new UnionMapReader(sortedMapVector
);
839 sortedMapReader
.setKeyValueNames("myKey", "myValue");
840 for (int i
= 0; i
< count
; i
++) {
841 sortedMapReader
.setPosition(i
);
842 for (int j
= 0; j
< i
+ 1; j
++) {
843 sortedMapReader
.next();
844 assertEquals(j
, sortedMapReader
.key().readLong().longValue());
845 assertEquals(j
, sortedMapReader
.value().readInteger().intValue());