2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org
.apache
.arrow
.vector
.validate
;
20 import static org
.apache
.arrow
.vector
.validate
.ValidateUtil
.validateOrThrow
;
22 import org
.apache
.arrow
.vector
.BaseFixedWidthVector
;
23 import org
.apache
.arrow
.vector
.BaseLargeVariableWidthVector
;
24 import org
.apache
.arrow
.vector
.BaseVariableWidthVector
;
25 import org
.apache
.arrow
.vector
.BigIntVector
;
26 import org
.apache
.arrow
.vector
.BitVector
;
27 import org
.apache
.arrow
.vector
.DateDayVector
;
28 import org
.apache
.arrow
.vector
.DateMilliVector
;
29 import org
.apache
.arrow
.vector
.Decimal256Vector
;
30 import org
.apache
.arrow
.vector
.DecimalVector
;
31 import org
.apache
.arrow
.vector
.DurationVector
;
32 import org
.apache
.arrow
.vector
.ExtensionTypeVector
;
33 import org
.apache
.arrow
.vector
.FixedSizeBinaryVector
;
34 import org
.apache
.arrow
.vector
.Float4Vector
;
35 import org
.apache
.arrow
.vector
.Float8Vector
;
36 import org
.apache
.arrow
.vector
.IntVector
;
37 import org
.apache
.arrow
.vector
.IntervalDayVector
;
38 import org
.apache
.arrow
.vector
.IntervalMonthDayNanoVector
;
39 import org
.apache
.arrow
.vector
.IntervalYearVector
;
40 import org
.apache
.arrow
.vector
.LargeVarBinaryVector
;
41 import org
.apache
.arrow
.vector
.LargeVarCharVector
;
42 import org
.apache
.arrow
.vector
.NullVector
;
43 import org
.apache
.arrow
.vector
.SmallIntVector
;
44 import org
.apache
.arrow
.vector
.TimeMicroVector
;
45 import org
.apache
.arrow
.vector
.TimeMilliVector
;
46 import org
.apache
.arrow
.vector
.TimeNanoVector
;
47 import org
.apache
.arrow
.vector
.TimeSecVector
;
48 import org
.apache
.arrow
.vector
.TimeStampMicroTZVector
;
49 import org
.apache
.arrow
.vector
.TimeStampMicroVector
;
50 import org
.apache
.arrow
.vector
.TimeStampMilliTZVector
;
51 import org
.apache
.arrow
.vector
.TimeStampMilliVector
;
52 import org
.apache
.arrow
.vector
.TimeStampNanoTZVector
;
53 import org
.apache
.arrow
.vector
.TimeStampNanoVector
;
54 import org
.apache
.arrow
.vector
.TimeStampSecTZVector
;
55 import org
.apache
.arrow
.vector
.TimeStampSecVector
;
56 import org
.apache
.arrow
.vector
.TinyIntVector
;
57 import org
.apache
.arrow
.vector
.UInt1Vector
;
58 import org
.apache
.arrow
.vector
.UInt2Vector
;
59 import org
.apache
.arrow
.vector
.UInt4Vector
;
60 import org
.apache
.arrow
.vector
.UInt8Vector
;
61 import org
.apache
.arrow
.vector
.ValueVector
;
62 import org
.apache
.arrow
.vector
.VarBinaryVector
;
63 import org
.apache
.arrow
.vector
.VarCharVector
;
64 import org
.apache
.arrow
.vector
.compare
.VectorVisitor
;
65 import org
.apache
.arrow
.vector
.complex
.DenseUnionVector
;
66 import org
.apache
.arrow
.vector
.complex
.FixedSizeListVector
;
67 import org
.apache
.arrow
.vector
.complex
.LargeListVector
;
68 import org
.apache
.arrow
.vector
.complex
.ListVector
;
69 import org
.apache
.arrow
.vector
.complex
.NonNullableStructVector
;
70 import org
.apache
.arrow
.vector
.complex
.UnionVector
;
71 import org
.apache
.arrow
.vector
.types
.DateUnit
;
72 import org
.apache
.arrow
.vector
.types
.FloatingPointPrecision
;
73 import org
.apache
.arrow
.vector
.types
.IntervalUnit
;
74 import org
.apache
.arrow
.vector
.types
.TimeUnit
;
75 import org
.apache
.arrow
.vector
.types
.UnionMode
;
76 import org
.apache
.arrow
.vector
.types
.pojo
.ArrowType
;
77 import org
.apache
.arrow
.vector
.types
.pojo
.FieldType
;
80 * Utility to validate vector type information.
82 public class ValidateVectorTypeVisitor
implements VectorVisitor
<Void
, Void
> {
84 private void validateVectorCommon(ValueVector vector
, Class
<?
extends ArrowType
> expectedArrowType
) {
85 validateOrThrow(vector
.getField() != null, "Vector field is empty.");
86 validateOrThrow(vector
.getField().getFieldType() != null, "Vector field type is empty.");
87 ArrowType arrowType
= vector
.getField().getFieldType().getType();
88 validateOrThrow(arrowType
!= null, "Vector arrow type is empty.");
89 validateOrThrow(expectedArrowType
== arrowType
.getClass(),
90 "Incorrect arrow type for " + vector
.getClass() + " : " + arrowType
.toString());
93 private void validateIntVector(ValueVector vector
, int expectedWidth
, boolean expectedSigned
) {
94 validateOrThrow(vector
.getField().getFieldType().getType() instanceof ArrowType
.Int
,
95 "Vector %s is not an integer vector.", vector
.getClass());
96 ArrowType
.Int intType
= (ArrowType
.Int
) vector
.getField().getFieldType().getType();
97 validateOrThrow(intType
.getIsSigned() == expectedSigned
,
98 "Expecting bit width %s, actual width %s.", expectedWidth
, intType
.getBitWidth());
99 validateOrThrow(intType
.getBitWidth() == expectedWidth
, "Expecting bit width %s, actual bit width %s.",
100 expectedWidth
, intType
.getBitWidth());
103 private void validateFloatingPointVector(ValueVector vector
, FloatingPointPrecision expectedPrecision
) {
104 validateOrThrow(vector
.getField().getFieldType().getType() instanceof ArrowType
.FloatingPoint
,
105 "Vector %s is not a floating point vector.", vector
.getClass());
106 ArrowType
.FloatingPoint floatType
= (ArrowType
.FloatingPoint
) vector
.getField().getFieldType().getType();
107 validateOrThrow(floatType
.getPrecision() == expectedPrecision
, "Expecting precision %s, actual precision %s.",
108 expectedPrecision
, floatType
.getPrecision());
111 private void validateDateVector(ValueVector vector
, DateUnit expectedDateUnit
) {
112 validateOrThrow(vector
.getField().getFieldType().getType() instanceof ArrowType
.Date
,
113 "Vector %s is not a date vector", vector
.getClass());
114 ArrowType
.Date dateType
= (ArrowType
.Date
) vector
.getField().getFieldType().getType();
115 validateOrThrow(dateType
.getUnit() == expectedDateUnit
,
116 "Expecting date unit %s, actual date unit %s.", expectedDateUnit
, dateType
.getUnit());
119 private void validateTimeVector(ValueVector vector
, TimeUnit expectedTimeUnit
, int expectedBitWidth
) {
120 validateOrThrow(vector
.getField().getFieldType().getType() instanceof ArrowType
.Time
,
121 "Vector %s is not a time vector.", vector
.getClass());
122 ArrowType
.Time timeType
= (ArrowType
.Time
) vector
.getField().getFieldType().getType();
123 validateOrThrow(timeType
.getUnit() == expectedTimeUnit
,
124 "Expecting time unit %s, actual time unit %s.", expectedTimeUnit
, timeType
.getUnit());
125 validateOrThrow(timeType
.getBitWidth() == expectedBitWidth
,
126 "Expecting bit width %s, actual bit width %s.", expectedBitWidth
, timeType
.getBitWidth());
129 private void validateIntervalVector(ValueVector vector
, IntervalUnit expectedIntervalUnit
) {
130 validateOrThrow(vector
.getField().getFieldType().getType() instanceof ArrowType
.Interval
,
131 "Vector %s is not an interval vector.", vector
.getClass());
132 ArrowType
.Interval intervalType
= (ArrowType
.Interval
) vector
.getField().getFieldType().getType();
133 validateOrThrow(intervalType
.getUnit() == expectedIntervalUnit
,
134 "Expecting interval unit %s, actual date unit %s.", expectedIntervalUnit
, intervalType
.getUnit());
137 private void validateTimeStampVector(ValueVector vector
, TimeUnit expectedTimeUnit
, boolean expectTZ
) {
138 validateOrThrow(vector
.getField().getFieldType().getType() instanceof ArrowType
.Timestamp
,
139 "Vector %s is not a time stamp vector.", vector
.getClass());
140 ArrowType
.Timestamp timestampType
= (ArrowType
.Timestamp
) vector
.getField().getFieldType().getType();
141 validateOrThrow(timestampType
.getUnit() == expectedTimeUnit
,
142 "Expecting time stamp unit %s, actual time stamp unit %s.", expectedTimeUnit
, timestampType
.getUnit());
144 validateOrThrow(timestampType
.getTimezone() != null, "The time zone should not be null");
146 validateOrThrow(timestampType
.getTimezone() == null, "The time zone should be null");
150 private void validateExtensionTypeVector(ExtensionTypeVector
<?
> vector
) {
151 validateOrThrow(vector
.getField().getFieldType().getType() instanceof ArrowType
.ExtensionType
,
152 "Vector %s is not an extension type vector.", vector
.getClass());
153 validateOrThrow(vector
.getField().getMetadata().containsKey(ArrowType
.ExtensionType
.EXTENSION_METADATA_KEY_NAME
),
154 "Field %s does not have proper extension type metadata: %s",
155 vector
.getField().getName(),
156 vector
.getField().getMetadata());
157 // Validate the storage vector type
158 vector
.getUnderlyingVector().accept(this, null);
162 public Void
visit(BaseFixedWidthVector vector
, Void value
) {
163 if (vector
instanceof TinyIntVector
) {
164 validateVectorCommon(vector
, ArrowType
.Int
.class);
165 validateIntVector(vector
, 8, true);
166 } else if (vector
instanceof SmallIntVector
) {
167 validateVectorCommon(vector
, ArrowType
.Int
.class);
168 validateIntVector(vector
, 16, true);
169 } else if (vector
instanceof IntVector
) {
170 validateVectorCommon(vector
, ArrowType
.Int
.class);
171 validateIntVector(vector
, 32, true);
172 } else if (vector
instanceof BigIntVector
) {
173 validateVectorCommon(vector
, ArrowType
.Int
.class);
174 validateIntVector(vector
, 64, true);
175 } else if (vector
instanceof UInt1Vector
) {
176 validateVectorCommon(vector
, ArrowType
.Int
.class);
177 validateIntVector(vector
, 8, false);
178 } else if (vector
instanceof UInt2Vector
) {
179 validateVectorCommon(vector
, ArrowType
.Int
.class);
180 validateIntVector(vector
, 16, false);
181 } else if (vector
instanceof UInt4Vector
) {
182 validateVectorCommon(vector
, ArrowType
.Int
.class);
183 validateIntVector(vector
, 32, false);
184 } else if (vector
instanceof UInt8Vector
) {
185 validateVectorCommon(vector
, ArrowType
.Int
.class);
186 validateIntVector(vector
, 64, false);
187 } else if (vector
instanceof BitVector
) {
188 validateVectorCommon(vector
, ArrowType
.Bool
.class);
189 } else if (vector
instanceof DecimalVector
|| vector
instanceof Decimal256Vector
) {
190 validateVectorCommon(vector
, ArrowType
.Decimal
.class);
191 ArrowType
.Decimal arrowType
= (ArrowType
.Decimal
) vector
.getField().getType();
192 validateOrThrow(arrowType
.getScale() > 0, "The scale of decimal %s is not positive.", arrowType
.getScale());
193 validateOrThrow(arrowType
.getPrecision() > 0, "The precision of decimal %S is not positive.",
194 arrowType
.getPrecision());
195 } else if (vector
instanceof DateDayVector
) {
196 validateVectorCommon(vector
, ArrowType
.Date
.class);
197 validateDateVector(vector
, DateUnit
.DAY
);
198 } else if (vector
instanceof DateMilliVector
) {
199 validateVectorCommon(vector
, ArrowType
.Date
.class);
200 validateDateVector(vector
, DateUnit
.MILLISECOND
);
201 } else if (vector
instanceof DurationVector
) {
202 validateVectorCommon(vector
, ArrowType
.Duration
.class);
203 ArrowType
.Duration arrowType
= (ArrowType
.Duration
) vector
.getField().getType();
204 validateOrThrow(((DurationVector
) vector
).getUnit() == arrowType
.getUnit(),
205 "Different duration time unit for vector and arrow type. Vector time unit %s, type time unit %s.",
206 ((DurationVector
) vector
).getUnit(), arrowType
.getUnit());
207 } else if (vector
instanceof Float4Vector
) {
208 validateVectorCommon(vector
, ArrowType
.FloatingPoint
.class);
209 validateFloatingPointVector(vector
, FloatingPointPrecision
.SINGLE
);
210 } else if (vector
instanceof Float8Vector
) {
211 validateVectorCommon(vector
, ArrowType
.FloatingPoint
.class);
212 validateFloatingPointVector(vector
, FloatingPointPrecision
.DOUBLE
);
213 } else if (vector
instanceof IntervalDayVector
) {
214 validateVectorCommon(vector
, ArrowType
.Interval
.class);
215 validateIntervalVector(vector
, IntervalUnit
.DAY_TIME
);
216 } else if (vector
instanceof IntervalMonthDayNanoVector
) {
217 validateVectorCommon(vector
, ArrowType
.Interval
.class);
218 validateIntervalVector(vector
, IntervalUnit
.MONTH_DAY_NANO
);
219 } else if (vector
instanceof IntervalYearVector
) {
220 validateVectorCommon(vector
, ArrowType
.Interval
.class);
221 validateIntervalVector(vector
, IntervalUnit
.YEAR_MONTH
);
222 } else if (vector
instanceof TimeMicroVector
) {
223 validateVectorCommon(vector
, ArrowType
.Time
.class);
224 validateTimeVector(vector
, TimeUnit
.MICROSECOND
, 64);
225 } else if (vector
instanceof TimeMilliVector
) {
226 validateVectorCommon(vector
, ArrowType
.Time
.class);
227 validateTimeVector(vector
, TimeUnit
.MILLISECOND
, 32);
228 } else if (vector
instanceof TimeNanoVector
) {
229 validateVectorCommon(vector
, ArrowType
.Time
.class);
230 validateTimeVector(vector
, TimeUnit
.NANOSECOND
, 64);
231 } else if (vector
instanceof TimeSecVector
) {
232 validateVectorCommon(vector
, ArrowType
.Time
.class);
233 validateTimeVector(vector
, TimeUnit
.SECOND
, 32);
234 } else if (vector
instanceof TimeStampMicroTZVector
) {
235 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
236 validateTimeStampVector(vector
, TimeUnit
.MICROSECOND
, true);
237 } else if (vector
instanceof TimeStampMicroVector
) {
238 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
239 validateTimeStampVector(vector
, TimeUnit
.MICROSECOND
, false);
240 } else if (vector
instanceof TimeStampMilliTZVector
) {
241 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
242 validateTimeStampVector(vector
, TimeUnit
.MILLISECOND
, true);
243 } else if (vector
instanceof TimeStampMilliVector
) {
244 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
245 validateTimeStampVector(vector
, TimeUnit
.MILLISECOND
, false);
246 } else if (vector
instanceof TimeStampNanoTZVector
) {
247 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
248 validateTimeStampVector(vector
, TimeUnit
.NANOSECOND
, true);
249 } else if (vector
instanceof TimeStampNanoVector
) {
250 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
251 validateTimeStampVector(vector
, TimeUnit
.NANOSECOND
, false);
252 } else if (vector
instanceof TimeStampSecTZVector
) {
253 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
254 validateTimeStampVector(vector
, TimeUnit
.SECOND
, true);
255 } else if (vector
instanceof TimeStampSecVector
) {
256 validateVectorCommon(vector
, ArrowType
.Timestamp
.class);
257 validateTimeStampVector(vector
, TimeUnit
.SECOND
, false);
258 } else if (vector
instanceof FixedSizeBinaryVector
) {
259 validateVectorCommon(vector
, ArrowType
.FixedSizeBinary
.class);
260 ArrowType
.FixedSizeBinary arrowType
= (ArrowType
.FixedSizeBinary
) vector
.getField().getType();
261 validateOrThrow(arrowType
.getByteWidth() > 0, "The byte width of a FixedSizeBinaryVector %s is not positive.",
262 arrowType
.getByteWidth());
263 validateOrThrow(arrowType
.getByteWidth() == vector
.getTypeWidth(),
264 "Type width mismatch for FixedSizeBinaryVector. Vector type width %s, arrow type type width %s.",
265 vector
.getTypeWidth(), arrowType
.getByteWidth());
267 throw new IllegalArgumentException("Unknown type for fixed width vector " + vector
.getClass());
273 public Void
visit(BaseVariableWidthVector vector
, Void value
) {
274 if (vector
instanceof VarCharVector
) {
275 validateVectorCommon(vector
, ArrowType
.Utf8
.class);
276 } else if (vector
instanceof VarBinaryVector
) {
277 validateVectorCommon(vector
, ArrowType
.Binary
.class);
283 public Void
visit(BaseLargeVariableWidthVector vector
, Void value
) {
284 if (vector
instanceof LargeVarCharVector
) {
285 validateVectorCommon(vector
, ArrowType
.LargeUtf8
.class);
286 } else if (vector
instanceof LargeVarBinaryVector
) {
287 validateVectorCommon(vector
, ArrowType
.LargeBinary
.class);
293 public Void
visit(ListVector vector
, Void value
) {
294 validateVectorCommon(vector
, ArrowType
.List
.class);
295 ValueVector innerVector
= vector
.getDataVector();
296 if (innerVector
!= null) {
297 innerVector
.accept(this, null);
303 public Void
visit(FixedSizeListVector vector
, Void value
) {
304 validateVectorCommon(vector
, ArrowType
.FixedSizeList
.class);
305 ArrowType
.FixedSizeList arrowType
= (ArrowType
.FixedSizeList
) vector
.getField().getType();
306 validateOrThrow(arrowType
.getListSize() == vector
.getListSize(),
307 "Inconsistent list size for FixedSizeListVector. Vector list size %s, arrow type list size %s.",
308 vector
.getListSize(), arrowType
.getListSize());
309 validateOrThrow(arrowType
.getListSize() > 0, "The list size %s is not positive.", arrowType
.getListSize());
310 ValueVector innerVector
= vector
.getDataVector();
311 if (innerVector
!= null) {
312 innerVector
.accept(this, null);
318 public Void
visit(LargeListVector vector
, Void value
) {
319 validateVectorCommon(vector
, ArrowType
.LargeList
.class);
320 ValueVector innerVector
= vector
.getDataVector();
321 if (innerVector
!= null) {
322 innerVector
.accept(this, null);
328 public Void
visit(NonNullableStructVector vector
, Void value
) {
329 validateVectorCommon(vector
, ArrowType
.Struct
.class);
330 validateOrThrow(vector
.getField().getChildren().size() == vector
.getChildrenFromFields().size(),
331 "Child field count and child vector count mismatch. Vector child count %s, field child count %s",
332 vector
.getChildrenFromFields().size(), vector
.getField().getChildren().size());
333 for (int i
= 0; i
< vector
.getChildrenFromFields().size(); i
++) {
334 ValueVector subVector
= vector
.getChildByOrdinal(i
);
335 FieldType subType
= vector
.getField().getChildren().get(i
).getFieldType();
337 validateOrThrow(subType
.equals(subVector
.getField().getFieldType()),
338 "Struct vector's field type not equal to the child vector's field type. " +
339 "Struct field type %s, sub-vector field type %s", subType
, subVector
.getField().getFieldType());
340 subVector
.accept(this, null);
346 public Void
visit(UnionVector vector
, Void value
) {
347 validateVectorCommon(vector
, ArrowType
.Union
.class);
348 ArrowType
.Union arrowType
= (ArrowType
.Union
) vector
.getField().getType();
349 validateOrThrow(arrowType
.getMode() == UnionMode
.Sparse
, "The union mode of UnionVector must be sparse");
350 for (ValueVector subVector
: vector
.getChildrenFromFields()) {
351 subVector
.accept(this, null);
357 public Void
visit(DenseUnionVector vector
, Void value
) {
358 validateVectorCommon(vector
, ArrowType
.Union
.class);
359 ArrowType
.Union arrowType
= (ArrowType
.Union
) vector
.getField().getType();
360 validateOrThrow(arrowType
.getMode() == UnionMode
.Dense
, "The union mode of DenseUnionVector must be dense");
361 for (ValueVector subVector
: vector
.getChildrenFromFields()) {
362 subVector
.accept(this, null);
368 public Void
visit(NullVector vector
, Void value
) {
369 validateVectorCommon(vector
, ArrowType
.Null
.class);
374 public Void
visit(ExtensionTypeVector
<?
> vector
, Void value
) {
375 validateExtensionTypeVector(vector
);