]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / vector / src / main / java / org / apache / arrow / vector / validate / ValidateVectorTypeVisitor.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.arrow.vector.validate;
19
20 import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
21
22 import org.apache.arrow.vector.BaseFixedWidthVector;
23 import org.apache.arrow.vector.BaseLargeVariableWidthVector;
24 import org.apache.arrow.vector.BaseVariableWidthVector;
25 import org.apache.arrow.vector.BigIntVector;
26 import org.apache.arrow.vector.BitVector;
27 import org.apache.arrow.vector.DateDayVector;
28 import org.apache.arrow.vector.DateMilliVector;
29 import org.apache.arrow.vector.Decimal256Vector;
30 import org.apache.arrow.vector.DecimalVector;
31 import org.apache.arrow.vector.DurationVector;
32 import org.apache.arrow.vector.ExtensionTypeVector;
33 import org.apache.arrow.vector.FixedSizeBinaryVector;
34 import org.apache.arrow.vector.Float4Vector;
35 import org.apache.arrow.vector.Float8Vector;
36 import org.apache.arrow.vector.IntVector;
37 import org.apache.arrow.vector.IntervalDayVector;
38 import org.apache.arrow.vector.IntervalMonthDayNanoVector;
39 import org.apache.arrow.vector.IntervalYearVector;
40 import org.apache.arrow.vector.LargeVarBinaryVector;
41 import org.apache.arrow.vector.LargeVarCharVector;
42 import org.apache.arrow.vector.NullVector;
43 import org.apache.arrow.vector.SmallIntVector;
44 import org.apache.arrow.vector.TimeMicroVector;
45 import org.apache.arrow.vector.TimeMilliVector;
46 import org.apache.arrow.vector.TimeNanoVector;
47 import org.apache.arrow.vector.TimeSecVector;
48 import org.apache.arrow.vector.TimeStampMicroTZVector;
49 import org.apache.arrow.vector.TimeStampMicroVector;
50 import org.apache.arrow.vector.TimeStampMilliTZVector;
51 import org.apache.arrow.vector.TimeStampMilliVector;
52 import org.apache.arrow.vector.TimeStampNanoTZVector;
53 import org.apache.arrow.vector.TimeStampNanoVector;
54 import org.apache.arrow.vector.TimeStampSecTZVector;
55 import org.apache.arrow.vector.TimeStampSecVector;
56 import org.apache.arrow.vector.TinyIntVector;
57 import org.apache.arrow.vector.UInt1Vector;
58 import org.apache.arrow.vector.UInt2Vector;
59 import org.apache.arrow.vector.UInt4Vector;
60 import org.apache.arrow.vector.UInt8Vector;
61 import org.apache.arrow.vector.ValueVector;
62 import org.apache.arrow.vector.VarBinaryVector;
63 import org.apache.arrow.vector.VarCharVector;
64 import org.apache.arrow.vector.compare.VectorVisitor;
65 import org.apache.arrow.vector.complex.DenseUnionVector;
66 import org.apache.arrow.vector.complex.FixedSizeListVector;
67 import org.apache.arrow.vector.complex.LargeListVector;
68 import org.apache.arrow.vector.complex.ListVector;
69 import org.apache.arrow.vector.complex.NonNullableStructVector;
70 import org.apache.arrow.vector.complex.UnionVector;
71 import org.apache.arrow.vector.types.DateUnit;
72 import org.apache.arrow.vector.types.FloatingPointPrecision;
73 import org.apache.arrow.vector.types.IntervalUnit;
74 import org.apache.arrow.vector.types.TimeUnit;
75 import org.apache.arrow.vector.types.UnionMode;
76 import org.apache.arrow.vector.types.pojo.ArrowType;
77 import org.apache.arrow.vector.types.pojo.FieldType;
78
79 /**
80 * Utility to validate vector type information.
81 */
82 public class ValidateVectorTypeVisitor implements VectorVisitor<Void, Void> {
83
84 private void validateVectorCommon(ValueVector vector, Class<? extends ArrowType> expectedArrowType) {
85 validateOrThrow(vector.getField() != null, "Vector field is empty.");
86 validateOrThrow(vector.getField().getFieldType() != null, "Vector field type is empty.");
87 ArrowType arrowType = vector.getField().getFieldType().getType();
88 validateOrThrow(arrowType != null, "Vector arrow type is empty.");
89 validateOrThrow(expectedArrowType == arrowType.getClass(),
90 "Incorrect arrow type for " + vector.getClass() + " : " + arrowType.toString());
91 }
92
93 private void validateIntVector(ValueVector vector, int expectedWidth, boolean expectedSigned) {
94 validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Int,
95 "Vector %s is not an integer vector.", vector.getClass());
96 ArrowType.Int intType = (ArrowType.Int) vector.getField().getFieldType().getType();
97 validateOrThrow(intType.getIsSigned() == expectedSigned,
98 "Expecting bit width %s, actual width %s.", expectedWidth, intType.getBitWidth());
99 validateOrThrow(intType.getBitWidth() == expectedWidth, "Expecting bit width %s, actual bit width %s.",
100 expectedWidth, intType.getBitWidth());
101 }
102
103 private void validateFloatingPointVector(ValueVector vector, FloatingPointPrecision expectedPrecision) {
104 validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.FloatingPoint,
105 "Vector %s is not a floating point vector.", vector.getClass());
106 ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) vector.getField().getFieldType().getType();
107 validateOrThrow(floatType.getPrecision() == expectedPrecision, "Expecting precision %s, actual precision %s.",
108 expectedPrecision, floatType.getPrecision());
109 }
110
111 private void validateDateVector(ValueVector vector, DateUnit expectedDateUnit) {
112 validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Date,
113 "Vector %s is not a date vector", vector.getClass());
114 ArrowType.Date dateType = (ArrowType.Date) vector.getField().getFieldType().getType();
115 validateOrThrow(dateType.getUnit() == expectedDateUnit,
116 "Expecting date unit %s, actual date unit %s.", expectedDateUnit, dateType.getUnit());
117 }
118
119 private void validateTimeVector(ValueVector vector, TimeUnit expectedTimeUnit, int expectedBitWidth) {
120 validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Time,
121 "Vector %s is not a time vector.", vector.getClass());
122 ArrowType.Time timeType = (ArrowType.Time) vector.getField().getFieldType().getType();
123 validateOrThrow(timeType.getUnit() == expectedTimeUnit,
124 "Expecting time unit %s, actual time unit %s.", expectedTimeUnit, timeType.getUnit());
125 validateOrThrow(timeType.getBitWidth() == expectedBitWidth,
126 "Expecting bit width %s, actual bit width %s.", expectedBitWidth, timeType.getBitWidth());
127 }
128
129 private void validateIntervalVector(ValueVector vector, IntervalUnit expectedIntervalUnit) {
130 validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Interval,
131 "Vector %s is not an interval vector.", vector.getClass());
132 ArrowType.Interval intervalType = (ArrowType.Interval) vector.getField().getFieldType().getType();
133 validateOrThrow(intervalType.getUnit() == expectedIntervalUnit,
134 "Expecting interval unit %s, actual date unit %s.", expectedIntervalUnit, intervalType.getUnit());
135 }
136
137 private void validateTimeStampVector(ValueVector vector, TimeUnit expectedTimeUnit, boolean expectTZ) {
138 validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Timestamp,
139 "Vector %s is not a time stamp vector.", vector.getClass());
140 ArrowType.Timestamp timestampType = (ArrowType.Timestamp) vector.getField().getFieldType().getType();
141 validateOrThrow(timestampType.getUnit() == expectedTimeUnit,
142 "Expecting time stamp unit %s, actual time stamp unit %s.", expectedTimeUnit, timestampType.getUnit());
143 if (expectTZ) {
144 validateOrThrow(timestampType.getTimezone() != null, "The time zone should not be null");
145 } else {
146 validateOrThrow(timestampType.getTimezone() == null, "The time zone should be null");
147 }
148 }
149
150 private void validateExtensionTypeVector(ExtensionTypeVector<?> vector) {
151 validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.ExtensionType,
152 "Vector %s is not an extension type vector.", vector.getClass());
153 validateOrThrow(vector.getField().getMetadata().containsKey(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME),
154 "Field %s does not have proper extension type metadata: %s",
155 vector.getField().getName(),
156 vector.getField().getMetadata());
157 // Validate the storage vector type
158 vector.getUnderlyingVector().accept(this, null);
159 }
160
161 @Override
162 public Void visit(BaseFixedWidthVector vector, Void value) {
163 if (vector instanceof TinyIntVector) {
164 validateVectorCommon(vector, ArrowType.Int.class);
165 validateIntVector(vector, 8, true);
166 } else if (vector instanceof SmallIntVector) {
167 validateVectorCommon(vector, ArrowType.Int.class);
168 validateIntVector(vector, 16, true);
169 } else if (vector instanceof IntVector) {
170 validateVectorCommon(vector, ArrowType.Int.class);
171 validateIntVector(vector, 32, true);
172 } else if (vector instanceof BigIntVector) {
173 validateVectorCommon(vector, ArrowType.Int.class);
174 validateIntVector(vector, 64, true);
175 } else if (vector instanceof UInt1Vector) {
176 validateVectorCommon(vector, ArrowType.Int.class);
177 validateIntVector(vector, 8, false);
178 } else if (vector instanceof UInt2Vector) {
179 validateVectorCommon(vector, ArrowType.Int.class);
180 validateIntVector(vector, 16, false);
181 } else if (vector instanceof UInt4Vector) {
182 validateVectorCommon(vector, ArrowType.Int.class);
183 validateIntVector(vector, 32, false);
184 } else if (vector instanceof UInt8Vector) {
185 validateVectorCommon(vector, ArrowType.Int.class);
186 validateIntVector(vector, 64, false);
187 } else if (vector instanceof BitVector) {
188 validateVectorCommon(vector, ArrowType.Bool.class);
189 } else if (vector instanceof DecimalVector || vector instanceof Decimal256Vector) {
190 validateVectorCommon(vector, ArrowType.Decimal.class);
191 ArrowType.Decimal arrowType = (ArrowType.Decimal) vector.getField().getType();
192 validateOrThrow(arrowType.getScale() > 0, "The scale of decimal %s is not positive.", arrowType.getScale());
193 validateOrThrow(arrowType.getPrecision() > 0, "The precision of decimal %S is not positive.",
194 arrowType.getPrecision());
195 } else if (vector instanceof DateDayVector) {
196 validateVectorCommon(vector, ArrowType.Date.class);
197 validateDateVector(vector, DateUnit.DAY);
198 } else if (vector instanceof DateMilliVector) {
199 validateVectorCommon(vector, ArrowType.Date.class);
200 validateDateVector(vector, DateUnit.MILLISECOND);
201 } else if (vector instanceof DurationVector) {
202 validateVectorCommon(vector, ArrowType.Duration.class);
203 ArrowType.Duration arrowType = (ArrowType.Duration) vector.getField().getType();
204 validateOrThrow(((DurationVector) vector).getUnit() == arrowType.getUnit(),
205 "Different duration time unit for vector and arrow type. Vector time unit %s, type time unit %s.",
206 ((DurationVector) vector).getUnit(), arrowType.getUnit());
207 } else if (vector instanceof Float4Vector) {
208 validateVectorCommon(vector, ArrowType.FloatingPoint.class);
209 validateFloatingPointVector(vector, FloatingPointPrecision.SINGLE);
210 } else if (vector instanceof Float8Vector) {
211 validateVectorCommon(vector, ArrowType.FloatingPoint.class);
212 validateFloatingPointVector(vector, FloatingPointPrecision.DOUBLE);
213 } else if (vector instanceof IntervalDayVector) {
214 validateVectorCommon(vector, ArrowType.Interval.class);
215 validateIntervalVector(vector, IntervalUnit.DAY_TIME);
216 } else if (vector instanceof IntervalMonthDayNanoVector) {
217 validateVectorCommon(vector, ArrowType.Interval.class);
218 validateIntervalVector(vector, IntervalUnit.MONTH_DAY_NANO);
219 } else if (vector instanceof IntervalYearVector) {
220 validateVectorCommon(vector, ArrowType.Interval.class);
221 validateIntervalVector(vector, IntervalUnit.YEAR_MONTH);
222 } else if (vector instanceof TimeMicroVector) {
223 validateVectorCommon(vector, ArrowType.Time.class);
224 validateTimeVector(vector, TimeUnit.MICROSECOND, 64);
225 } else if (vector instanceof TimeMilliVector) {
226 validateVectorCommon(vector, ArrowType.Time.class);
227 validateTimeVector(vector, TimeUnit.MILLISECOND, 32);
228 } else if (vector instanceof TimeNanoVector) {
229 validateVectorCommon(vector, ArrowType.Time.class);
230 validateTimeVector(vector, TimeUnit.NANOSECOND, 64);
231 } else if (vector instanceof TimeSecVector) {
232 validateVectorCommon(vector, ArrowType.Time.class);
233 validateTimeVector(vector, TimeUnit.SECOND, 32);
234 } else if (vector instanceof TimeStampMicroTZVector) {
235 validateVectorCommon(vector, ArrowType.Timestamp.class);
236 validateTimeStampVector(vector, TimeUnit.MICROSECOND, true);
237 } else if (vector instanceof TimeStampMicroVector) {
238 validateVectorCommon(vector, ArrowType.Timestamp.class);
239 validateTimeStampVector(vector, TimeUnit.MICROSECOND, false);
240 } else if (vector instanceof TimeStampMilliTZVector) {
241 validateVectorCommon(vector, ArrowType.Timestamp.class);
242 validateTimeStampVector(vector, TimeUnit.MILLISECOND, true);
243 } else if (vector instanceof TimeStampMilliVector) {
244 validateVectorCommon(vector, ArrowType.Timestamp.class);
245 validateTimeStampVector(vector, TimeUnit.MILLISECOND, false);
246 } else if (vector instanceof TimeStampNanoTZVector) {
247 validateVectorCommon(vector, ArrowType.Timestamp.class);
248 validateTimeStampVector(vector, TimeUnit.NANOSECOND, true);
249 } else if (vector instanceof TimeStampNanoVector) {
250 validateVectorCommon(vector, ArrowType.Timestamp.class);
251 validateTimeStampVector(vector, TimeUnit.NANOSECOND, false);
252 } else if (vector instanceof TimeStampSecTZVector) {
253 validateVectorCommon(vector, ArrowType.Timestamp.class);
254 validateTimeStampVector(vector, TimeUnit.SECOND, true);
255 } else if (vector instanceof TimeStampSecVector) {
256 validateVectorCommon(vector, ArrowType.Timestamp.class);
257 validateTimeStampVector(vector, TimeUnit.SECOND, false);
258 } else if (vector instanceof FixedSizeBinaryVector) {
259 validateVectorCommon(vector, ArrowType.FixedSizeBinary.class);
260 ArrowType.FixedSizeBinary arrowType = (ArrowType.FixedSizeBinary) vector.getField().getType();
261 validateOrThrow(arrowType.getByteWidth() > 0, "The byte width of a FixedSizeBinaryVector %s is not positive.",
262 arrowType.getByteWidth());
263 validateOrThrow(arrowType.getByteWidth() == vector.getTypeWidth(),
264 "Type width mismatch for FixedSizeBinaryVector. Vector type width %s, arrow type type width %s.",
265 vector.getTypeWidth(), arrowType.getByteWidth());
266 } else {
267 throw new IllegalArgumentException("Unknown type for fixed width vector " + vector.getClass());
268 }
269 return null;
270 }
271
272 @Override
273 public Void visit(BaseVariableWidthVector vector, Void value) {
274 if (vector instanceof VarCharVector) {
275 validateVectorCommon(vector, ArrowType.Utf8.class);
276 } else if (vector instanceof VarBinaryVector) {
277 validateVectorCommon(vector, ArrowType.Binary.class);
278 }
279 return null;
280 }
281
282 @Override
283 public Void visit(BaseLargeVariableWidthVector vector, Void value) {
284 if (vector instanceof LargeVarCharVector) {
285 validateVectorCommon(vector, ArrowType.LargeUtf8.class);
286 } else if (vector instanceof LargeVarBinaryVector) {
287 validateVectorCommon(vector, ArrowType.LargeBinary.class);
288 }
289 return null;
290 }
291
292 @Override
293 public Void visit(ListVector vector, Void value) {
294 validateVectorCommon(vector, ArrowType.List.class);
295 ValueVector innerVector = vector.getDataVector();
296 if (innerVector != null) {
297 innerVector.accept(this, null);
298 }
299 return null;
300 }
301
302 @Override
303 public Void visit(FixedSizeListVector vector, Void value) {
304 validateVectorCommon(vector, ArrowType.FixedSizeList.class);
305 ArrowType.FixedSizeList arrowType = (ArrowType.FixedSizeList) vector.getField().getType();
306 validateOrThrow(arrowType.getListSize() == vector.getListSize(),
307 "Inconsistent list size for FixedSizeListVector. Vector list size %s, arrow type list size %s.",
308 vector.getListSize(), arrowType.getListSize());
309 validateOrThrow(arrowType.getListSize() > 0, "The list size %s is not positive.", arrowType.getListSize());
310 ValueVector innerVector = vector.getDataVector();
311 if (innerVector != null) {
312 innerVector.accept(this, null);
313 }
314 return null;
315 }
316
317 @Override
318 public Void visit(LargeListVector vector, Void value) {
319 validateVectorCommon(vector, ArrowType.LargeList.class);
320 ValueVector innerVector = vector.getDataVector();
321 if (innerVector != null) {
322 innerVector.accept(this, null);
323 }
324 return null;
325 }
326
327 @Override
328 public Void visit(NonNullableStructVector vector, Void value) {
329 validateVectorCommon(vector, ArrowType.Struct.class);
330 validateOrThrow(vector.getField().getChildren().size() == vector.getChildrenFromFields().size(),
331 "Child field count and child vector count mismatch. Vector child count %s, field child count %s",
332 vector.getChildrenFromFields().size(), vector.getField().getChildren().size());
333 for (int i = 0; i < vector.getChildrenFromFields().size(); i++) {
334 ValueVector subVector = vector.getChildByOrdinal(i);
335 FieldType subType = vector.getField().getChildren().get(i).getFieldType();
336
337 validateOrThrow(subType.equals(subVector.getField().getFieldType()),
338 "Struct vector's field type not equal to the child vector's field type. " +
339 "Struct field type %s, sub-vector field type %s", subType, subVector.getField().getFieldType());
340 subVector.accept(this, null);
341 }
342 return null;
343 }
344
345 @Override
346 public Void visit(UnionVector vector, Void value) {
347 validateVectorCommon(vector, ArrowType.Union.class);
348 ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType();
349 validateOrThrow(arrowType.getMode() == UnionMode.Sparse, "The union mode of UnionVector must be sparse");
350 for (ValueVector subVector : vector.getChildrenFromFields()) {
351 subVector.accept(this, null);
352 }
353 return null;
354 }
355
356 @Override
357 public Void visit(DenseUnionVector vector, Void value) {
358 validateVectorCommon(vector, ArrowType.Union.class);
359 ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType();
360 validateOrThrow(arrowType.getMode() == UnionMode.Dense, "The union mode of DenseUnionVector must be dense");
361 for (ValueVector subVector : vector.getChildrenFromFields()) {
362 subVector.accept(this, null);
363 }
364 return null;
365 }
366
367 @Override
368 public Void visit(NullVector vector, Void value) {
369 validateVectorCommon(vector, ArrowType.Null.class);
370 return null;
371 }
372
373 @Override
374 public Void visit(ExtensionTypeVector<?> vector, Void value) {
375 validateExtensionTypeVector(vector);
376 return null;
377 }
378 }