]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / vector / src / main / java / org / apache / arrow / vector / validate / ValidateVectorDataVisitor.java
diff --git a/ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java b/ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
new file mode 100644 (file)
index 0000000..cdeb4f1
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+/**
+ * Utility for validating vector data.
+ */
+public class ValidateVectorDataVisitor implements VectorVisitor<Void, Void> {
+
+  private void validateOffsetBuffer(ValueVector vector, int valueCount) {
+    if (valueCount == 0) {
+      return;
+    }
+    ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+
+    // verify that the values in the offset buffer is non-decreasing
+    int prevValue = offsetBuffer.getInt(0);
+    for (int i = 1; i <= valueCount; i++) {
+      int curValue = offsetBuffer.getInt(i * 4);
+      validateOrThrow(curValue >= 0, "The value at position %s of the offset buffer is negative: %s.", i, curValue);
+      validateOrThrow(curValue >= prevValue,
+          "The values in positions %s and %s of the offset buffer are decreasing: %s, %s.",
+          i - 1, i, prevValue, curValue);
+      prevValue = curValue;
+    }
+  }
+
+  private void validateLargeOffsetBuffer(ValueVector vector, int valueCount) {
+    if (valueCount == 0) {
+      return;
+    }
+    ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+
+    // verify that the values in the large offset buffer is non-decreasing
+    long prevValue = offsetBuffer.getLong(0);
+    for (int i = 1; i <= valueCount; i++) {
+      long curValue = offsetBuffer.getLong((long) i * 8);
+      validateOrThrow(curValue >= 0L, "The value at position %s of the large offset buffer is negative: %s.",
+          i, curValue);
+      validateOrThrow(curValue >= prevValue,
+          "The values in positions %s and %s of the large offset buffer are decreasing: %s, %s.",
+          i - 1, i, prevValue, curValue);
+      prevValue = curValue;
+    }
+  }
+
+  private void validateTypeBuffer(ArrowBuf typeBuf, int valueCount) {
+    for (int i = 0; i < valueCount; i++) {
+      validateOrThrow(typeBuf.getByte(i) >= 0, "The type id at position %s is negative: %s.",
+          i, typeBuf.getByte(i));
+    }
+  }
+
+  @Override
+  public Void visit(BaseFixedWidthVector vector, Void value) {
+    return null;
+  }
+
+  @Override
+  public Void visit(BaseVariableWidthVector vector, Void value) {
+    validateOffsetBuffer(vector, vector.getValueCount());
+    return null;
+  }
+
+  @Override
+  public Void visit(BaseLargeVariableWidthVector vector, Void value) {
+    validateLargeOffsetBuffer(vector, vector.getValueCount());
+    return null;
+  }
+
+  @Override
+  public Void visit(ListVector vector, Void value) {
+    validateOffsetBuffer(vector, vector.getValueCount());
+    ValueVector innerVector = vector.getDataVector();
+    if (innerVector != null) {
+      innerVector.accept(this, null);
+    }
+    return null;
+  }
+
+  @Override
+  public Void visit(FixedSizeListVector vector, Void value) {
+    validateOffsetBuffer(vector, vector.getValueCount());
+    ValueVector innerVector = vector.getDataVector();
+    if (innerVector != null) {
+      innerVector.accept(this, null);
+    }
+    return null;
+  }
+
+  @Override
+  public Void visit(LargeListVector vector, Void value) {
+    validateLargeOffsetBuffer(vector, vector.getValueCount());
+    ValueVector innerVector = vector.getDataVector();
+    if (innerVector != null) {
+      innerVector.accept(this, null);
+    }
+    return null;
+  }
+
+  @Override
+  public Void visit(NonNullableStructVector vector, Void value) {
+    for (ValueVector subVector : vector.getChildrenFromFields()) {
+      subVector.accept(this, null);
+    }
+    return null;
+  }
+
+  @Override
+  public Void visit(UnionVector vector, Void value) {
+    validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());
+    for (ValueVector subVector : vector.getChildrenFromFields()) {
+      subVector.accept(this, null);
+    }
+    return null;
+  }
+
+  @Override
+  public Void visit(DenseUnionVector vector, Void value) {
+    validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());
+
+    // validate offset buffer
+    for (int i = 0; i < vector.getValueCount(); i++) {
+      int offset = vector.getOffset(i);
+      byte typeId = vector.getTypeId(i);
+      ValueVector subVector = vector.getVectorByType(typeId);
+      validateOrThrow(offset < subVector.getValueCount(),
+          "Dense union vector offset exceeds sub-vector boundary. Vector offset %s, sub vector size %s",
+          offset, subVector.getValueCount());
+    }
+
+    for (ValueVector subVector : vector.getChildrenFromFields()) {
+      subVector.accept(this, null);
+    }
+    return null;
+  }
+
+  @Override
+  public Void visit(NullVector vector, Void value) {
+    return null;
+  }
+
+  @Override
+  public Void visit(ExtensionTypeVector<?> vector, Void value) {
+    vector.getUnderlyingVector().accept(this, value);
+    return null;
+  }
+}