]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <gtest/gtest.h> | |
19 | #include <cmath> | |
20 | ||
21 | #include "arrow/memory_pool.h" | |
22 | #include "gandiva/filter.h" | |
23 | #include "gandiva/tests/test_util.h" | |
24 | #include "gandiva/tree_expr_builder.h" | |
25 | ||
26 | namespace gandiva { | |
27 | ||
28 | using arrow::boolean; | |
29 | using arrow::float32; | |
30 | using arrow::float64; | |
31 | using arrow::int32; | |
32 | ||
33 | class TestIn : public ::testing::Test { | |
34 | public: | |
35 | void SetUp() { pool_ = arrow::default_memory_pool(); } | |
36 | ||
37 | protected: | |
38 | arrow::MemoryPool* pool_; | |
39 | }; | |
40 | std::vector<Decimal128> MakeDecimalVector(std::vector<std::string> values) { | |
41 | std::vector<arrow::Decimal128> ret; | |
42 | for (auto str : values) { | |
43 | Decimal128 decimal_value; | |
44 | int32_t decimal_precision; | |
45 | int32_t decimal_scale; | |
46 | ||
47 | DCHECK_OK( | |
48 | Decimal128::FromString(str, &decimal_value, &decimal_precision, &decimal_scale)); | |
49 | ||
50 | ret.push_back(decimal_value); | |
51 | } | |
52 | return ret; | |
53 | } | |
54 | ||
55 | TEST_F(TestIn, TestInSimple) { | |
56 | // schema for input fields | |
57 | auto field0 = field("f0", int32()); | |
58 | auto field1 = field("f1", int32()); | |
59 | auto schema = arrow::schema({field0, field1}); | |
60 | ||
61 | // Build In f0 + f1 in (6, 11) | |
62 | auto node_f0 = TreeExprBuilder::MakeField(field0); | |
63 | auto node_f1 = TreeExprBuilder::MakeField(field1); | |
64 | auto sum_func = | |
65 | TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::int32()); | |
66 | std::unordered_set<int32_t> in_constants({6, 11}); | |
67 | auto in_expr = TreeExprBuilder::MakeInExpressionInt32(sum_func, in_constants); | |
68 | auto condition = TreeExprBuilder::MakeCondition(in_expr); | |
69 | ||
70 | std::shared_ptr<Filter> filter; | |
71 | auto status = Filter::Make(schema, condition, TestConfiguration(), &filter); | |
72 | EXPECT_TRUE(status.ok()); | |
73 | ||
74 | // Create a row-batch with some sample data | |
75 | int num_records = 5; | |
76 | auto array0 = MakeArrowArrayInt32({1, 2, 3, 4, 6}, {true, true, true, false, true}); | |
77 | auto array1 = MakeArrowArrayInt32({5, 9, 6, 17, 5}, {true, true, false, true, false}); | |
78 | // expected output (indices for which condition matches) | |
79 | auto exp = MakeArrowArrayUint16({0, 1}); | |
80 | ||
81 | // prepare input record batch | |
82 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); | |
83 | ||
84 | std::shared_ptr<SelectionVector> selection_vector; | |
85 | status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector); | |
86 | EXPECT_TRUE(status.ok()); | |
87 | ||
88 | // Evaluate expression | |
89 | status = filter->Evaluate(*in_batch, selection_vector); | |
90 | EXPECT_TRUE(status.ok()); | |
91 | ||
92 | // Validate results | |
93 | EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); | |
94 | } | |
95 | ||
96 | TEST_F(TestIn, TestInFloat) { | |
97 | // schema for input fields | |
98 | auto field0 = field("f0", float32()); | |
99 | auto schema = arrow::schema({field0}); | |
100 | ||
101 | // Build In f0 + f1 in (6, 11) | |
102 | auto node_f0 = TreeExprBuilder::MakeField(field0); | |
103 | ||
104 | std::unordered_set<float> in_constants({6.5f, 12.0f, 11.5f}); | |
105 | auto in_expr = TreeExprBuilder::MakeInExpressionFloat(node_f0, in_constants); | |
106 | auto condition = TreeExprBuilder::MakeCondition(in_expr); | |
107 | ||
108 | std::shared_ptr<Filter> filter; | |
109 | auto status = Filter::Make(schema, condition, TestConfiguration(), &filter); | |
110 | EXPECT_TRUE(status.ok()); | |
111 | ||
112 | // Create a row-batch with some sample data | |
113 | int num_records = 5; | |
114 | auto array0 = | |
115 | MakeArrowArrayFloat32({6.5f, 11.5f, 4, 3.15f, 6}, {true, true, false, true, true}); | |
116 | // expected output (indices for which condition matches) | |
117 | auto exp = MakeArrowArrayUint16({0, 1}); | |
118 | ||
119 | // prepare input record batch | |
120 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0}); | |
121 | ||
122 | std::shared_ptr<SelectionVector> selection_vector; | |
123 | status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector); | |
124 | EXPECT_TRUE(status.ok()); | |
125 | ||
126 | // Evaluate expression | |
127 | status = filter->Evaluate(*in_batch, selection_vector); | |
128 | EXPECT_TRUE(status.ok()); | |
129 | ||
130 | // Validate results | |
131 | EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); | |
132 | } | |
133 | ||
134 | TEST_F(TestIn, TestInDouble) { | |
135 | // schema for input fields | |
136 | auto field0 = field("double0", float64()); | |
137 | auto field1 = field("double1", float64()); | |
138 | auto schema = arrow::schema({field0, field1}); | |
139 | ||
140 | auto node_f0 = TreeExprBuilder::MakeField(field0); | |
141 | auto node_f1 = TreeExprBuilder::MakeField(field1); | |
142 | auto sum_func = | |
143 | TreeExprBuilder::MakeFunction("add", {node_f0, node_f1}, arrow::float64()); | |
144 | std::unordered_set<double> in_constants({3.14159265359, 15.5555555}); | |
145 | auto in_expr = TreeExprBuilder::MakeInExpressionDouble(sum_func, in_constants); | |
146 | auto condition = TreeExprBuilder::MakeCondition(in_expr); | |
147 | ||
148 | std::shared_ptr<Filter> filter; | |
149 | auto status = Filter::Make(schema, condition, TestConfiguration(), &filter); | |
150 | EXPECT_TRUE(status.ok()); | |
151 | ||
152 | // Create a row-batch with some sample data | |
153 | int num_records = 5; | |
154 | auto array0 = MakeArrowArrayFloat64({1, 2, 3, 4, 11}, {true, true, true, false, false}); | |
155 | auto array1 = MakeArrowArrayFloat64({5, 9, 0.14159265359, 17, 4.5555555}, | |
156 | {true, true, true, true, true}); | |
157 | ||
158 | // expected output (indices for which condition matches) | |
159 | auto exp = MakeArrowArrayUint16({2}); | |
160 | ||
161 | // prepare input record batch | |
162 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); | |
163 | ||
164 | std::shared_ptr<SelectionVector> selection_vector; | |
165 | status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector); | |
166 | EXPECT_TRUE(status.ok()); | |
167 | ||
168 | // Evaluate expression | |
169 | status = filter->Evaluate(*in_batch, selection_vector); | |
170 | EXPECT_TRUE(status.ok()); | |
171 | ||
172 | // Validate results | |
173 | EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); | |
174 | } | |
175 | ||
176 | TEST_F(TestIn, TestInDecimal) { | |
177 | int32_t precision = 38; | |
178 | int32_t scale = 5; | |
179 | auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale); | |
180 | ||
181 | // schema for input fields | |
182 | auto field0 = field("f0", arrow::decimal(precision, scale)); | |
183 | auto schema = arrow::schema({field0}); | |
184 | ||
185 | // Build In f0 + f1 in (6, 11) | |
186 | auto node_f0 = TreeExprBuilder::MakeField(field0); | |
187 | ||
188 | gandiva::DecimalScalar128 d0("6", precision, scale); | |
189 | gandiva::DecimalScalar128 d1("12", precision, scale); | |
190 | gandiva::DecimalScalar128 d2("11", precision, scale); | |
191 | std::unordered_set<gandiva::DecimalScalar128> in_constants({d0, d1, d2}); | |
192 | auto in_expr = TreeExprBuilder::MakeInExpressionDecimal(node_f0, in_constants); | |
193 | auto condition = TreeExprBuilder::MakeCondition(in_expr); | |
194 | ||
195 | std::shared_ptr<Filter> filter; | |
196 | auto status = Filter::Make(schema, condition, TestConfiguration(), &filter); | |
197 | EXPECT_TRUE(status.ok()); | |
198 | ||
199 | // Create a row-batch with some sample data | |
200 | int num_records = 5; | |
201 | auto values0 = MakeDecimalVector({"1", "2", "0", "-6", "6"}); | |
202 | auto array0 = | |
203 | MakeArrowArrayDecimal(decimal_type, values0, {true, true, true, false, true}); | |
204 | // expected output (indices for which condition matches) | |
205 | auto exp = MakeArrowArrayUint16({4}); | |
206 | ||
207 | // prepare input record batch | |
208 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0}); | |
209 | ||
210 | std::shared_ptr<SelectionVector> selection_vector; | |
211 | status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector); | |
212 | EXPECT_TRUE(status.ok()); | |
213 | ||
214 | // Evaluate expression | |
215 | status = filter->Evaluate(*in_batch, selection_vector); | |
216 | EXPECT_TRUE(status.ok()); | |
217 | ||
218 | // Validate results | |
219 | EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); | |
220 | } | |
221 | ||
222 | TEST_F(TestIn, TestInString) { | |
223 | // schema for input fields | |
224 | auto field0 = field("f0", arrow::utf8()); | |
225 | auto schema = arrow::schema({field0}); | |
226 | ||
227 | // Build f0 in ("test" ,"me") | |
228 | auto node_f0 = TreeExprBuilder::MakeField(field0); | |
229 | std::unordered_set<std::string> in_constants({"test", "me"}); | |
230 | auto in_expr = TreeExprBuilder::MakeInExpressionString(node_f0, in_constants); | |
231 | ||
232 | auto condition = TreeExprBuilder::MakeCondition(in_expr); | |
233 | ||
234 | std::shared_ptr<Filter> filter; | |
235 | auto status = Filter::Make(schema, condition, TestConfiguration(), &filter); | |
236 | EXPECT_TRUE(status.ok()); | |
237 | ||
238 | // Create a row-batch with some sample data | |
239 | int num_records = 5; | |
240 | auto array_a = MakeArrowArrayUtf8({"test", "lol", "me", "arrow", "test"}, | |
241 | {true, true, true, true, false}); | |
242 | // expected output (indices for which condition matches) | |
243 | auto exp = MakeArrowArrayUint16({0, 2}); | |
244 | ||
245 | // prepare input record batch | |
246 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); | |
247 | ||
248 | std::shared_ptr<SelectionVector> selection_vector; | |
249 | status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector); | |
250 | EXPECT_TRUE(status.ok()); | |
251 | ||
252 | // Evaluate expression | |
253 | status = filter->Evaluate(*in_batch, selection_vector); | |
254 | EXPECT_TRUE(status.ok()); | |
255 | ||
256 | // Validate results | |
257 | EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray()); | |
258 | } | |
259 | ||
260 | TEST_F(TestIn, TestInStringValidationError) { | |
261 | // schema for input fields | |
262 | auto field0 = field("f0", arrow::int32()); | |
263 | auto schema = arrow::schema({field0}); | |
264 | ||
265 | // Build f0 in ("test" ,"me") | |
266 | auto node_f0 = TreeExprBuilder::MakeField(field0); | |
267 | std::unordered_set<std::string> in_constants({"test", "me"}); | |
268 | auto in_expr = TreeExprBuilder::MakeInExpressionString(node_f0, in_constants); | |
269 | auto condition = TreeExprBuilder::MakeCondition(in_expr); | |
270 | ||
271 | std::shared_ptr<Filter> filter; | |
272 | auto status = Filter::Make(schema, condition, TestConfiguration(), &filter); | |
273 | ||
274 | EXPECT_TRUE(status.IsExpressionValidationError()); | |
275 | std::string expected_error = "Evaluation expression for IN clause returns "; | |
276 | EXPECT_TRUE(status.message().find(expected_error) != std::string::npos); | |
277 | } | |
278 | } // namespace gandiva |