]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / java / gandiva / src / main / java / org / apache / arrow / gandiva / evaluator / Filter.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.arrow.gandiva.evaluator;
19
20 import java.util.ArrayList;
21 import java.util.List;
22
23 import org.apache.arrow.gandiva.exceptions.EvaluatorClosedException;
24 import org.apache.arrow.gandiva.exceptions.GandivaException;
25 import org.apache.arrow.gandiva.expression.ArrowTypeHelper;
26 import org.apache.arrow.gandiva.expression.Condition;
27 import org.apache.arrow.gandiva.ipc.GandivaTypes;
28 import org.apache.arrow.memory.ArrowBuf;
29 import org.apache.arrow.vector.ipc.message.ArrowBuffer;
30 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
31 import org.apache.arrow.vector.types.pojo.Schema;
32 import org.slf4j.Logger;
33 import org.slf4j.LoggerFactory;
34
35 /**
36 * This class provides a mechanism to filter a RecordBatch by evaluating a condition expression.
37 * Follow these steps to use this class: 1) Use the static method make() to create an instance of
38 * this class that evaluates a condition. 2) Invoke the method evaluate() to evaluate the filter
39 * against a RecordBatch 3) Invoke close() to release resources
40 */
41 public class Filter {
42
43 private static final Logger logger = LoggerFactory.getLogger(Filter.class);
44
45 private final JniWrapper wrapper;
46 private final long moduleId;
47 private final Schema schema;
48 private boolean closed;
49
50 private Filter(JniWrapper wrapper, long moduleId, Schema schema) {
51 this.wrapper = wrapper;
52 this.moduleId = moduleId;
53 this.schema = schema;
54 this.closed = false;
55 }
56
57 /**
58 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
59 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
60 *
61 * @param schema Table schema. The field names in the schema should match the fields used to
62 * create the TreeNodes
63 * @param condition condition to be evaluated against data
64 * @return A native filter object that can be used to invoke on a RecordBatch
65 */
66 public static Filter make(Schema schema, Condition condition) throws GandivaException {
67 return make(schema, condition, JniLoader.getDefaultConfiguration());
68 }
69
70 /**
71 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
72 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
73 *
74 * @param schema Table schema. The field names in the schema should match the fields used to
75 * create the TreeNodes
76 * @param condition condition to be evaluated against data
77 * @param configOptions ConfigOptions parameter
78 * @return A native filter object that can be used to invoke on a RecordBatch
79 */
80 public static Filter make(Schema schema, Condition condition, ConfigurationBuilder.ConfigOptions configOptions)
81 throws GandivaException {
82 return make(schema, condition, JniLoader.getConfiguration(configOptions));
83 }
84
85 /**
86 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
87 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
88 *
89 * @param schema Table schema. The field names in the schema should match the fields used to
90 * create the TreeNodes
91 * @param condition condition to be evaluated against data
92 * @param optimize Flag to choose if the generated llvm code is to be optimized
93 * @return A native filter object that can be used to invoke on a RecordBatch
94 */
95 @Deprecated
96 public static Filter make(Schema schema, Condition condition, boolean optimize) throws GandivaException {
97 return make(schema, condition, JniLoader.getConfiguration((new ConfigurationBuilder.ConfigOptions())
98 .withOptimize(optimize)));
99 }
100
101 /**
102 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
103 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
104 *
105 * @param schema Table schema. The field names in the schema should match the fields used to
106 * create the TreeNodes
107 * @param condition condition to be evaluated against data
108 * @param configurationId Custom configuration created through config builder.
109 * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
110 */
111 public static Filter make(Schema schema, Condition condition, long configurationId)
112 throws GandivaException {
113 // Invoke the JNI layer to create the LLVM module representing the filter.
114 GandivaTypes.Condition conditionBuf = condition.toProtobuf();
115 GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema);
116 JniWrapper wrapper = JniLoader.getInstance().getWrapper();
117 long moduleId = wrapper.buildFilter(schemaBuf.toByteArray(),
118 conditionBuf.toByteArray(), configurationId);
119 logger.debug("Created module for the filter with id {}", moduleId);
120 return new Filter(wrapper, moduleId, schema);
121 }
122
123 /**
124 * Invoke this function to evaluate a filter against a recordBatch.
125 *
126 * @param recordBatch Record batch including the data
127 * @param selectionVector Result of applying the filter on the data
128 */
129 public void evaluate(ArrowRecordBatch recordBatch, SelectionVector selectionVector)
130 throws GandivaException {
131 evaluate(recordBatch.getLength(), recordBatch.getBuffers(), recordBatch.getBuffersLayout(),
132 selectionVector);
133 }
134
135 /**
136 * Invoke this function to evaluate filter against a set of arrow buffers. (this is an optimised
137 * version that skips taking references).
138 *
139 * @param numRows number of rows.
140 * @param buffers List of input arrow buffers
141 * @param selectionVector Result of applying the filter on the data
142 */
143 public void evaluate(int numRows, List<ArrowBuf> buffers,
144 SelectionVector selectionVector) throws GandivaException {
145 List<ArrowBuffer> buffersLayout = new ArrayList<>();
146 long offset = 0;
147 for (ArrowBuf arrowBuf : buffers) {
148 long size = arrowBuf.readableBytes();
149 buffersLayout.add(new ArrowBuffer(offset, size));
150 offset += size;
151 }
152 evaluate(numRows, buffers, buffersLayout, selectionVector);
153 }
154
155 private void evaluate(int numRows, List<ArrowBuf> buffers, List<ArrowBuffer> buffersLayout,
156 SelectionVector selectionVector) throws GandivaException {
157 if (this.closed) {
158 throw new EvaluatorClosedException();
159 }
160 if (selectionVector.getMaxRecords() < numRows) {
161 logger.error("selectionVector has capacity for " + selectionVector.getMaxRecords() +
162 " rows, minimum required " + numRows);
163 throw new GandivaException("SelectionVector too small");
164 }
165
166 long[] bufAddrs = new long[buffers.size()];
167 long[] bufSizes = new long[buffers.size()];
168
169 int idx = 0;
170 for (ArrowBuf buf : buffers) {
171 bufAddrs[idx++] = buf.memoryAddress();
172 }
173
174 idx = 0;
175 for (ArrowBuffer bufLayout : buffersLayout) {
176 bufSizes[idx++] = bufLayout.getSize();
177 }
178
179 int numRecords = wrapper.evaluateFilter(this.moduleId, numRows,
180 bufAddrs, bufSizes,
181 selectionVector.getType().getNumber(),
182 selectionVector.getBuffer().memoryAddress(), selectionVector.getBuffer().capacity());
183 if (numRecords >= 0) {
184 selectionVector.setRecordCount(numRecords);
185 }
186 }
187
188 /**
189 * Closes the LLVM module representing this filter.
190 */
191 public void close() throws GandivaException {
192 if (this.closed) {
193 return;
194 }
195
196 wrapper.closeFilter(this.moduleId);
197 this.closed = true;
198 }
199 }