]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.arrow.vector; | |
19 | ||
20 | import java.util.ArrayList; | |
21 | import java.util.List; | |
22 | ||
23 | import org.apache.arrow.memory.ArrowBuf; | |
24 | import org.apache.arrow.vector.compression.CompressionCodec; | |
25 | import org.apache.arrow.vector.compression.CompressionUtil; | |
26 | import org.apache.arrow.vector.compression.NoCompressionCodec; | |
27 | import org.apache.arrow.vector.ipc.message.ArrowFieldNode; | |
28 | import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; | |
29 | ||
30 | /** | |
31 | * Helper class that handles converting a {@link VectorSchemaRoot} | |
32 | * to a {@link ArrowRecordBatch}. | |
33 | */ | |
34 | public class VectorUnloader { | |
35 | ||
36 | private final VectorSchemaRoot root; | |
37 | private final boolean includeNullCount; | |
38 | private final CompressionCodec codec; | |
39 | private final boolean alignBuffers; | |
40 | ||
41 | /** | |
42 | * Constructs a new instance of the given set of vectors. | |
43 | */ | |
44 | public VectorUnloader(VectorSchemaRoot root) { | |
45 | this(root, true, NoCompressionCodec.INSTANCE, true); | |
46 | } | |
47 | ||
48 | /** | |
49 | * Constructs a new instance. | |
50 | * | |
51 | * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}. | |
52 | * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} | |
53 | * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. | |
54 | */ | |
55 | public VectorUnloader( | |
56 | VectorSchemaRoot root, boolean includeNullCount, boolean alignBuffers) { | |
57 | this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers); | |
58 | } | |
59 | ||
60 | /** | |
61 | * Constructs a new instance. | |
62 | * | |
63 | * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}. | |
64 | * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} | |
65 | * @param codec the codec for compressing data. If it is null, then no compression is needed. | |
66 | * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. | |
67 | */ | |
68 | public VectorUnloader( | |
69 | VectorSchemaRoot root, boolean includeNullCount, CompressionCodec codec, boolean alignBuffers) { | |
70 | this.root = root; | |
71 | this.includeNullCount = includeNullCount; | |
72 | this.codec = codec; | |
73 | this.alignBuffers = alignBuffers; | |
74 | } | |
75 | ||
76 | /** | |
77 | * Performs the depth first traversal of the Vectors to create an {@link ArrowRecordBatch} suitable | |
78 | * for serialization. | |
79 | */ | |
80 | public ArrowRecordBatch getRecordBatch() { | |
81 | List<ArrowFieldNode> nodes = new ArrayList<>(); | |
82 | List<ArrowBuf> buffers = new ArrayList<>(); | |
83 | for (FieldVector vector : root.getFieldVectors()) { | |
84 | appendNodes(vector, nodes, buffers); | |
85 | } | |
86 | return new ArrowRecordBatch( | |
87 | root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers); | |
88 | } | |
89 | ||
90 | private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) { | |
91 | nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); | |
92 | List<ArrowBuf> fieldBuffers = vector.getFieldBuffers(); | |
93 | int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); | |
94 | if (fieldBuffers.size() != expectedBufferCount) { | |
95 | throw new IllegalArgumentException(String.format( | |
96 | "wrong number of buffers for field %s in vector %s. found: %s", | |
97 | vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); | |
98 | } | |
99 | for (ArrowBuf buf : fieldBuffers) { | |
100 | buffers.add(codec.compress(vector.getAllocator(), buf)); | |
101 | } | |
102 | for (FieldVector child : vector.getChildrenFromFields()) { | |
103 | appendNodes(child, nodes, buffers); | |
104 | } | |
105 | } | |
106 | ||
107 | } |