2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org
.apache
.arrow
.vector
.validate
;
20 import static org
.apache
.arrow
.vector
.validate
.ValidateUtil
.validateOrThrow
;
22 import org
.apache
.arrow
.memory
.ArrowBuf
;
23 import org
.apache
.arrow
.vector
.BaseFixedWidthVector
;
24 import org
.apache
.arrow
.vector
.BaseLargeVariableWidthVector
;
25 import org
.apache
.arrow
.vector
.BaseVariableWidthVector
;
26 import org
.apache
.arrow
.vector
.ExtensionTypeVector
;
27 import org
.apache
.arrow
.vector
.NullVector
;
28 import org
.apache
.arrow
.vector
.ValueVector
;
29 import org
.apache
.arrow
.vector
.compare
.VectorVisitor
;
30 import org
.apache
.arrow
.vector
.complex
.DenseUnionVector
;
31 import org
.apache
.arrow
.vector
.complex
.FixedSizeListVector
;
32 import org
.apache
.arrow
.vector
.complex
.LargeListVector
;
33 import org
.apache
.arrow
.vector
.complex
.ListVector
;
34 import org
.apache
.arrow
.vector
.complex
.NonNullableStructVector
;
35 import org
.apache
.arrow
.vector
.complex
.UnionVector
;
38 * Utility for validating vector data.
40 public class ValidateVectorDataVisitor
implements VectorVisitor
<Void
, Void
> {
42 private void validateOffsetBuffer(ValueVector vector
, int valueCount
) {
43 if (valueCount
== 0) {
46 ArrowBuf offsetBuffer
= vector
.getOffsetBuffer();
48 // verify that the values in the offset buffer is non-decreasing
49 int prevValue
= offsetBuffer
.getInt(0);
50 for (int i
= 1; i
<= valueCount
; i
++) {
51 int curValue
= offsetBuffer
.getInt(i
* 4);
52 validateOrThrow(curValue
>= 0, "The value at position %s of the offset buffer is negative: %s.", i
, curValue
);
53 validateOrThrow(curValue
>= prevValue
,
54 "The values in positions %s and %s of the offset buffer are decreasing: %s, %s.",
55 i
- 1, i
, prevValue
, curValue
);
60 private void validateLargeOffsetBuffer(ValueVector vector
, int valueCount
) {
61 if (valueCount
== 0) {
64 ArrowBuf offsetBuffer
= vector
.getOffsetBuffer();
66 // verify that the values in the large offset buffer is non-decreasing
67 long prevValue
= offsetBuffer
.getLong(0);
68 for (int i
= 1; i
<= valueCount
; i
++) {
69 long curValue
= offsetBuffer
.getLong((long) i
* 8);
70 validateOrThrow(curValue
>= 0L, "The value at position %s of the large offset buffer is negative: %s.",
72 validateOrThrow(curValue
>= prevValue
,
73 "The values in positions %s and %s of the large offset buffer are decreasing: %s, %s.",
74 i
- 1, i
, prevValue
, curValue
);
79 private void validateTypeBuffer(ArrowBuf typeBuf
, int valueCount
) {
80 for (int i
= 0; i
< valueCount
; i
++) {
81 validateOrThrow(typeBuf
.getByte(i
) >= 0, "The type id at position %s is negative: %s.",
82 i
, typeBuf
.getByte(i
));
87 public Void
visit(BaseFixedWidthVector vector
, Void value
) {
92 public Void
visit(BaseVariableWidthVector vector
, Void value
) {
93 validateOffsetBuffer(vector
, vector
.getValueCount());
98 public Void
visit(BaseLargeVariableWidthVector vector
, Void value
) {
99 validateLargeOffsetBuffer(vector
, vector
.getValueCount());
104 public Void
visit(ListVector vector
, Void value
) {
105 validateOffsetBuffer(vector
, vector
.getValueCount());
106 ValueVector innerVector
= vector
.getDataVector();
107 if (innerVector
!= null) {
108 innerVector
.accept(this, null);
114 public Void
visit(FixedSizeListVector vector
, Void value
) {
115 validateOffsetBuffer(vector
, vector
.getValueCount());
116 ValueVector innerVector
= vector
.getDataVector();
117 if (innerVector
!= null) {
118 innerVector
.accept(this, null);
124 public Void
visit(LargeListVector vector
, Void value
) {
125 validateLargeOffsetBuffer(vector
, vector
.getValueCount());
126 ValueVector innerVector
= vector
.getDataVector();
127 if (innerVector
!= null) {
128 innerVector
.accept(this, null);
134 public Void
visit(NonNullableStructVector vector
, Void value
) {
135 for (ValueVector subVector
: vector
.getChildrenFromFields()) {
136 subVector
.accept(this, null);
142 public Void
visit(UnionVector vector
, Void value
) {
143 validateTypeBuffer(vector
.getTypeBuffer(), vector
.getValueCount());
144 for (ValueVector subVector
: vector
.getChildrenFromFields()) {
145 subVector
.accept(this, null);
151 public Void
visit(DenseUnionVector vector
, Void value
) {
152 validateTypeBuffer(vector
.getTypeBuffer(), vector
.getValueCount());
154 // validate offset buffer
155 for (int i
= 0; i
< vector
.getValueCount(); i
++) {
156 int offset
= vector
.getOffset(i
);
157 byte typeId
= vector
.getTypeId(i
);
158 ValueVector subVector
= vector
.getVectorByType(typeId
);
159 validateOrThrow(offset
< subVector
.getValueCount(),
160 "Dense union vector offset exceeds sub-vector boundary. Vector offset %s, sub vector size %s",
161 offset
, subVector
.getValueCount());
164 for (ValueVector subVector
: vector
.getChildrenFromFields()) {
165 subVector
.accept(this, null);
171 public Void
visit(NullVector vector
, Void value
) {
176 public Void
visit(ExtensionTypeVector
<?
> vector
, Void value
) {
177 vector
.getUnderlyingVector().accept(this, value
);