2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org
.apache
.arrow
.gandiva
.evaluator
;
20 import java
.util
.ArrayList
;
21 import java
.util
.List
;
23 import org
.apache
.arrow
.gandiva
.exceptions
.EvaluatorClosedException
;
24 import org
.apache
.arrow
.gandiva
.exceptions
.GandivaException
;
25 import org
.apache
.arrow
.gandiva
.expression
.ArrowTypeHelper
;
26 import org
.apache
.arrow
.gandiva
.expression
.Condition
;
27 import org
.apache
.arrow
.gandiva
.ipc
.GandivaTypes
;
28 import org
.apache
.arrow
.memory
.ArrowBuf
;
29 import org
.apache
.arrow
.vector
.ipc
.message
.ArrowBuffer
;
30 import org
.apache
.arrow
.vector
.ipc
.message
.ArrowRecordBatch
;
31 import org
.apache
.arrow
.vector
.types
.pojo
.Schema
;
32 import org
.slf4j
.Logger
;
33 import org
.slf4j
.LoggerFactory
;
36 * This class provides a mechanism to filter a RecordBatch by evaluating a condition expression.
37 * Follow these steps to use this class: 1) Use the static method make() to create an instance of
38 * this class that evaluates a condition. 2) Invoke the method evaluate() to evaluate the filter
39 * against a RecordBatch 3) Invoke close() to release resources
43 private static final Logger logger
= LoggerFactory
.getLogger(Filter
.class);
45 private final JniWrapper wrapper
;
46 private final long moduleId
;
47 private final Schema schema
;
48 private boolean closed
;
50 private Filter(JniWrapper wrapper
, long moduleId
, Schema schema
) {
51 this.wrapper
= wrapper
;
52 this.moduleId
= moduleId
;
58 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
59 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
61 * @param schema Table schema. The field names in the schema should match the fields used to
62 * create the TreeNodes
63 * @param condition condition to be evaluated against data
64 * @return A native filter object that can be used to invoke on a RecordBatch
66 public static Filter
make(Schema schema
, Condition condition
) throws GandivaException
{
67 return make(schema
, condition
, JniLoader
.getDefaultConfiguration());
71 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
72 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
74 * @param schema Table schema. The field names in the schema should match the fields used to
75 * create the TreeNodes
76 * @param condition condition to be evaluated against data
77 * @param configOptions ConfigOptions parameter
78 * @return A native filter object that can be used to invoke on a RecordBatch
80 public static Filter
make(Schema schema
, Condition condition
, ConfigurationBuilder
.ConfigOptions configOptions
)
81 throws GandivaException
{
82 return make(schema
, condition
, JniLoader
.getConfiguration(configOptions
));
86 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
87 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
89 * @param schema Table schema. The field names in the schema should match the fields used to
90 * create the TreeNodes
91 * @param condition condition to be evaluated against data
92 * @param optimize Flag to choose if the generated llvm code is to be optimized
93 * @return A native filter object that can be used to invoke on a RecordBatch
96 public static Filter
make(Schema schema
, Condition condition
, boolean optimize
) throws GandivaException
{
97 return make(schema
, condition
, JniLoader
.getConfiguration((new ConfigurationBuilder
.ConfigOptions())
98 .withOptimize(optimize
)));
102 * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke
103 * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch
105 * @param schema Table schema. The field names in the schema should match the fields used to
106 * create the TreeNodes
107 * @param condition condition to be evaluated against data
108 * @param configurationId Custom configuration created through config builder.
109 * @return A native evaluator object that can be used to invoke these projections on a RecordBatch
111 public static Filter
make(Schema schema
, Condition condition
, long configurationId
)
112 throws GandivaException
{
113 // Invoke the JNI layer to create the LLVM module representing the filter.
114 GandivaTypes
.Condition conditionBuf
= condition
.toProtobuf();
115 GandivaTypes
.Schema schemaBuf
= ArrowTypeHelper
.arrowSchemaToProtobuf(schema
);
116 JniWrapper wrapper
= JniLoader
.getInstance().getWrapper();
117 long moduleId
= wrapper
.buildFilter(schemaBuf
.toByteArray(),
118 conditionBuf
.toByteArray(), configurationId
);
119 logger
.debug("Created module for the filter with id {}", moduleId
);
120 return new Filter(wrapper
, moduleId
, schema
);
124 * Invoke this function to evaluate a filter against a recordBatch.
126 * @param recordBatch Record batch including the data
127 * @param selectionVector Result of applying the filter on the data
129 public void evaluate(ArrowRecordBatch recordBatch
, SelectionVector selectionVector
)
130 throws GandivaException
{
131 evaluate(recordBatch
.getLength(), recordBatch
.getBuffers(), recordBatch
.getBuffersLayout(),
136 * Invoke this function to evaluate filter against a set of arrow buffers. (this is an optimised
137 * version that skips taking references).
139 * @param numRows number of rows.
140 * @param buffers List of input arrow buffers
141 * @param selectionVector Result of applying the filter on the data
143 public void evaluate(int numRows
, List
<ArrowBuf
> buffers
,
144 SelectionVector selectionVector
) throws GandivaException
{
145 List
<ArrowBuffer
> buffersLayout
= new ArrayList
<>();
147 for (ArrowBuf arrowBuf
: buffers
) {
148 long size
= arrowBuf
.readableBytes();
149 buffersLayout
.add(new ArrowBuffer(offset
, size
));
152 evaluate(numRows
, buffers
, buffersLayout
, selectionVector
);
155 private void evaluate(int numRows
, List
<ArrowBuf
> buffers
, List
<ArrowBuffer
> buffersLayout
,
156 SelectionVector selectionVector
) throws GandivaException
{
158 throw new EvaluatorClosedException();
160 if (selectionVector
.getMaxRecords() < numRows
) {
161 logger
.error("selectionVector has capacity for " + selectionVector
.getMaxRecords() +
162 " rows, minimum required " + numRows
);
163 throw new GandivaException("SelectionVector too small");
166 long[] bufAddrs
= new long[buffers
.size()];
167 long[] bufSizes
= new long[buffers
.size()];
170 for (ArrowBuf buf
: buffers
) {
171 bufAddrs
[idx
++] = buf
.memoryAddress();
175 for (ArrowBuffer bufLayout
: buffersLayout
) {
176 bufSizes
[idx
++] = bufLayout
.getSize();
179 int numRecords
= wrapper
.evaluateFilter(this.moduleId
, numRows
,
181 selectionVector
.getType().getNumber(),
182 selectionVector
.getBuffer().memoryAddress(), selectionVector
.getBuffer().capacity());
183 if (numRecords
>= 0) {
184 selectionVector
.setRecordCount(numRecords
);
189 * Closes the LLVM module representing this filter.
191 public void close() throws GandivaException
{
196 wrapper
.closeFilter(this.moduleId
);