]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / vector / src / main / java / org / apache / arrow / vector / dictionary / ListSubfieldEncoder.java
CommitLineData
1d09f67e
TL
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.arrow.vector.dictionary;
19
20import java.util.Collections;
21
22import org.apache.arrow.memory.BufferAllocator;
23import org.apache.arrow.memory.util.hash.ArrowBufHasher;
24import org.apache.arrow.memory.util.hash.SimpleHasher;
25import org.apache.arrow.vector.BaseIntVector;
26import org.apache.arrow.vector.FieldVector;
27import org.apache.arrow.vector.ValueVector;
28import org.apache.arrow.vector.complex.BaseListVector;
29import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
30import org.apache.arrow.vector.types.pojo.Field;
31import org.apache.arrow.vector.types.pojo.FieldType;
32import org.apache.arrow.vector.util.TransferPair;
33
34/**
35 * Sub fields encoder/decoder for Dictionary encoded {@link BaseListVector}.
36 */
37public class ListSubfieldEncoder {
38
39 private final DictionaryHashTable hashTable;
40 private final Dictionary dictionary;
41 private final BufferAllocator allocator;
42
43 public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator) {
44 this (dictionary, allocator, SimpleHasher.INSTANCE);
45 }
46
47 /**
48 * Construct an instance.
49 */
50 public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) {
51 this.dictionary = dictionary;
52 this.allocator = allocator;
53 BaseListVector dictVector = (BaseListVector) dictionary.getVector();
54 hashTable = new DictionaryHashTable(getDataVector(dictVector), hasher);
55 }
56
57 private FieldVector getDataVector(BaseListVector vector) {
58 return vector.getChildrenFromFields().get(0);
59 }
60
61 private BaseListVector cloneVector(BaseListVector vector) {
62
63 final FieldType fieldType = vector.getField().getFieldType();
64 BaseListVector cloned = (BaseListVector) fieldType.createNewSingleVector(vector.getField().getName(),
65 allocator, /*schemaCallBack=*/null);
66
67 final ArrowFieldNode fieldNode = new ArrowFieldNode(vector.getValueCount(), vector.getNullCount());
68 cloned.loadFieldBuffers(fieldNode, vector.getFieldBuffers());
69
70 return cloned;
71 }
72
73 /**
74 * Dictionary encodes subfields for complex vector with a provided dictionary.
75 * The dictionary must contain all values in the sub fields vector.
76 * @param vector vector to encode
77 * @return dictionary encoded vector
78 */
79 public BaseListVector encodeListSubField(BaseListVector vector) {
80 final int valueCount = vector.getValueCount();
81
82 FieldType indexFieldType = new FieldType(vector.getField().isNullable(),
83 dictionary.getEncoding().getIndexType(), dictionary.getEncoding(), vector.getField().getMetadata());
84 Field valueField = new Field(vector.getField().getName(), indexFieldType, null);
85
86 // clone list vector and initialize data vector
87 BaseListVector encoded = cloneVector(vector);
88 encoded.initializeChildrenFromFields(Collections.singletonList(valueField));
89 BaseIntVector indices = (BaseIntVector) getDataVector(encoded);
90
91 ValueVector dataVector = getDataVector(vector);
92 for (int i = 0; i < valueCount; i++) {
93 if (!vector.isNull(i)) {
94 int start = vector.getElementStartIndex(i);
95 int end = vector.getElementEndIndex(i);
96
97 DictionaryEncoder.buildIndexVector(dataVector, indices, hashTable, start, end);
98 }
99 }
100
101 return encoded;
102 }
103
104 /**
105 * Decodes a dictionary subfields encoded vector using the provided dictionary.
106 * @param vector dictionary encoded vector, its data vector must be int type
107 * @return vector with values restored from dictionary
108 */
109 public BaseListVector decodeListSubField(BaseListVector vector) {
110
111 int valueCount = vector.getValueCount();
112 BaseListVector dictionaryVector = (BaseListVector) dictionary.getVector();
113 int dictionaryValueCount = getDataVector(dictionaryVector).getValueCount();
114
115 // clone list vector and initialize data vector
116 BaseListVector decoded = cloneVector(vector);
117 Field dataVectorField = getDataVector(dictionaryVector).getField();
118 decoded.initializeChildrenFromFields(Collections.singletonList(dataVectorField));
119
120 // get data vector
121 ValueVector dataVector = getDataVector(decoded);
122
123 TransferPair transfer = getDataVector(dictionaryVector).makeTransferPair(dataVector);
124 BaseIntVector indices = (BaseIntVector) getDataVector(vector);
125
126 for (int i = 0; i < valueCount; i++) {
127
128 if (!vector.isNull(i)) {
129 int start = vector.getElementStartIndex(i);
130 int end = vector.getElementEndIndex(i);
131
132 DictionaryEncoder.retrieveIndexVector(indices, transfer, dictionaryValueCount, start, end);
133 }
134 }
135 return decoded;
136 }
137}