]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/gandiva/tests/in_expr_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / gandiva / tests / in_expr_test.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <gtest/gtest.h>
19 #include <cmath>
20
21 #include "arrow/memory_pool.h"
22 #include "gandiva/filter.h"
23 #include "gandiva/tests/test_util.h"
24 #include "gandiva/tree_expr_builder.h"
25
26 namespace gandiva {
27
28 using arrow::boolean;
29 using arrow::float32;
30 using arrow::float64;
31 using arrow::int32;
32
33 class TestIn : public ::testing::Test {
34 public:
35 void SetUp() { pool_ = arrow::default_memory_pool(); }
36
37 protected:
38 arrow::MemoryPool* pool_;
39 };
40 std::vector<Decimal128> MakeDecimalVector(std::vector<std::string> values) {
41 std::vector<arrow::Decimal128> ret;
42 for (auto str : values) {
43 Decimal128 decimal_value;
44 int32_t decimal_precision;
45 int32_t decimal_scale;
46
47 DCHECK_OK(
48 Decimal128::FromString(str, &decimal_value, &decimal_precision, &decimal_scale));
49
50 ret.push_back(decimal_value);
51 }
52 return ret;
53 }
54
55 TEST_F(TestIn, TestInSimple) {
56 // schema for input fields
57 auto field0 = field("f0", int32());
58 auto field1 = field("f1", int32());
59 auto schema = arrow::schema({field0, field1});
60
61 // Build In f0 + f1 in (6, 11)
62 auto node_f0 = TreeExprBuilder::MakeField(field0);
63 auto node_f1 = TreeExprBuilder::MakeField(field1);
64 auto sum_func =
65 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32());
66 std::unordered_set<int32_t> in_constants({6, 11});
67 auto in_expr = TreeExprBuilder::MakeInExpressionInt32(sum_func, in_constants);
68 auto condition = TreeExprBuilder::MakeCondition(in_expr);
69
70 std::shared_ptr<Filter> filter;
71 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
72 EXPECT_TRUE(status.ok());
73
74 // Create a row-batch with some sample data
75 int num_records = 5;
76 auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, true});
77 auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 5}, {true, true, false, true, false});
78 // expected output (indices for which condition matches)
79 auto exp = MakeArrowArrayUint16({0, 1});
80
81 // prepare input record batch
82 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
83
84 std::shared_ptr<SelectionVector> selection_vector;
85 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
86 EXPECT_TRUE(status.ok());
87
88 // Evaluate expression
89 status = filter->Evaluate(*in_batch, selection_vector);
90 EXPECT_TRUE(status.ok());
91
92 // Validate results
93 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
94 }
95
96 TEST_F(TestIn, TestInFloat) {
97 // schema for input fields
98 auto field0 = field("f0", float32());
99 auto schema = arrow::schema({field0});
100
101 // Build In f0 + f1 in (6, 11)
102 auto node_f0 = TreeExprBuilder::MakeField(field0);
103
104 std::unordered_set<float> in_constants({6.5f, 12.0f, 11.5f});
105 auto in_expr = TreeExprBuilder::MakeInExpressionFloat(node_f0, in_constants);
106 auto condition = TreeExprBuilder::MakeCondition(in_expr);
107
108 std::shared_ptr<Filter> filter;
109 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
110 EXPECT_TRUE(status.ok());
111
112 // Create a row-batch with some sample data
113 int num_records = 5;
114 auto array0 =
115 MakeArrowArrayFloat32({6.5f, 11.5f, 4, 3.15f, 6}, {true, true, false, true, true});
116 // expected output (indices for which condition matches)
117 auto exp = MakeArrowArrayUint16({0, 1});
118
119 // prepare input record batch
120 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
121
122 std::shared_ptr<SelectionVector> selection_vector;
123 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
124 EXPECT_TRUE(status.ok());
125
126 // Evaluate expression
127 status = filter->Evaluate(*in_batch, selection_vector);
128 EXPECT_TRUE(status.ok());
129
130 // Validate results
131 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
132 }
133
134 TEST_F(TestIn, TestInDouble) {
135 // schema for input fields
136 auto field0 = field("double0", float64());
137 auto field1 = field("double1", float64());
138 auto schema = arrow::schema({field0, field1});
139
140 auto node_f0 = TreeExprBuilder::MakeField(field0);
141 auto node_f1 = TreeExprBuilder::MakeField(field1);
142 auto sum_func =
143 TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::float64());
144 std::unordered_set<double> in_constants({3.14159265359, 15.5555555});
145 auto in_expr = TreeExprBuilder::MakeInExpressionDouble(sum_func, in_constants);
146 auto condition = TreeExprBuilder::MakeCondition(in_expr);
147
148 std::shared_ptr<Filter> filter;
149 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
150 EXPECT_TRUE(status.ok());
151
152 // Create a row-batch with some sample data
153 int num_records = 5;
154 auto array0 = MakeArrowArrayFloat64({1, 2, 3, 4, 11}, {true, true, true, false, false});
155 auto array1 = MakeArrowArrayFloat64({5, 9, 0.14159265359, 17, 4.5555555},
156 {true, true, true, true, true});
157
158 // expected output (indices for which condition matches)
159 auto exp = MakeArrowArrayUint16({2});
160
161 // prepare input record batch
162 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
163
164 std::shared_ptr<SelectionVector> selection_vector;
165 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
166 EXPECT_TRUE(status.ok());
167
168 // Evaluate expression
169 status = filter->Evaluate(*in_batch, selection_vector);
170 EXPECT_TRUE(status.ok());
171
172 // Validate results
173 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
174 }
175
176 TEST_F(TestIn, TestInDecimal) {
177 int32_t precision = 38;
178 int32_t scale = 5;
179 auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
180
181 // schema for input fields
182 auto field0 = field("f0", arrow::decimal(precision, scale));
183 auto schema = arrow::schema({field0});
184
185 // Build In f0 + f1 in (6, 11)
186 auto node_f0 = TreeExprBuilder::MakeField(field0);
187
188 gandiva::DecimalScalar128 d0("6", precision, scale);
189 gandiva::DecimalScalar128 d1("12", precision, scale);
190 gandiva::DecimalScalar128 d2("11", precision, scale);
191 std::unordered_set<gandiva::DecimalScalar128> in_constants({d0, d1, d2});
192 auto in_expr = TreeExprBuilder::MakeInExpressionDecimal(node_f0, in_constants);
193 auto condition = TreeExprBuilder::MakeCondition(in_expr);
194
195 std::shared_ptr<Filter> filter;
196 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
197 EXPECT_TRUE(status.ok());
198
199 // Create a row-batch with some sample data
200 int num_records = 5;
201 auto values0 = MakeDecimalVector({"1", "2", "0", "-6", "6"});
202 auto array0 =
203 MakeArrowArrayDecimal(decimal_type, values0, {true, true, true, false, true});
204 // expected output (indices for which condition matches)
205 auto exp = MakeArrowArrayUint16({4});
206
207 // prepare input record batch
208 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
209
210 std::shared_ptr<SelectionVector> selection_vector;
211 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
212 EXPECT_TRUE(status.ok());
213
214 // Evaluate expression
215 status = filter->Evaluate(*in_batch, selection_vector);
216 EXPECT_TRUE(status.ok());
217
218 // Validate results
219 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
220 }
221
222 TEST_F(TestIn, TestInString) {
223 // schema for input fields
224 auto field0 = field("f0", arrow::utf8());
225 auto schema = arrow::schema({field0});
226
227 // Build f0 in ("test" ,"me")
228 auto node_f0 = TreeExprBuilder::MakeField(field0);
229 std::unordered_set<std::string> in_constants({"test", "me"});
230 auto in_expr = TreeExprBuilder::MakeInExpressionString(node_f0, in_constants);
231
232 auto condition = TreeExprBuilder::MakeCondition(in_expr);
233
234 std::shared_ptr<Filter> filter;
235 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
236 EXPECT_TRUE(status.ok());
237
238 // Create a row-batch with some sample data
239 int num_records = 5;
240 auto array_a = MakeArrowArrayUtf8({"test", "lol", "me", "arrow", "test"},
241 {true, true, true, true, false});
242 // expected output (indices for which condition matches)
243 auto exp = MakeArrowArrayUint16({0, 2});
244
245 // prepare input record batch
246 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
247
248 std::shared_ptr<SelectionVector> selection_vector;
249 status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
250 EXPECT_TRUE(status.ok());
251
252 // Evaluate expression
253 status = filter->Evaluate(*in_batch, selection_vector);
254 EXPECT_TRUE(status.ok());
255
256 // Validate results
257 EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
258 }
259
260 TEST_F(TestIn, TestInStringValidationError) {
261 // schema for input fields
262 auto field0 = field("f0", arrow::int32());
263 auto schema = arrow::schema({field0});
264
265 // Build f0 in ("test" ,"me")
266 auto node_f0 = TreeExprBuilder::MakeField(field0);
267 std::unordered_set<std::string> in_constants({"test", "me"});
268 auto in_expr = TreeExprBuilder::MakeInExpressionString(node_f0, in_constants);
269 auto condition = TreeExprBuilder::MakeCondition(in_expr);
270
271 std::shared_ptr<Filter> filter;
272 auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
273
274 EXPECT_TRUE(status.IsExpressionValidationError());
275 std::string expected_error = "Evaluation expression for IN clause returns ";
276 EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
277 }
278 } // namespace gandiva