]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <gtest/gtest.h> | |
19 | ||
20 | #include <sstream> | |
21 | ||
22 | #include "arrow/memory_pool.h" | |
23 | #include "arrow/status.h" | |
24 | #include "gandiva/projector.h" | |
25 | #include "gandiva/tests/test_util.h" | |
26 | #include "gandiva/tree_expr_builder.h" | |
27 | ||
28 | namespace gandiva { | |
29 | ||
30 | using arrow::boolean; | |
31 | using arrow::float32; | |
32 | using arrow::float64; | |
33 | using arrow::int32; | |
34 | using arrow::int64; | |
35 | using arrow::utf8; | |
36 | ||
37 | class TestHash : public ::testing::Test { | |
38 | public: | |
39 | void SetUp() { pool_ = arrow::default_memory_pool(); } | |
40 | ||
41 | protected: | |
42 | arrow::MemoryPool* pool_; | |
43 | }; | |
44 | ||
45 | TEST_F(TestHash, TestSimple) { | |
46 | // schema for input fields | |
47 | auto field_a = field("a", int32()); | |
48 | auto schema = arrow::schema({field_a}); | |
49 | ||
50 | // output fields | |
51 | auto res_0 = field("res0", int32()); | |
52 | auto res_1 = field("res1", int64()); | |
53 | ||
54 | // build expression. | |
55 | // hash32(a, 10) | |
56 | // hash64(a) | |
57 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
58 | auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10); | |
59 | auto hash32 = TreeExprBuilder::MakeFunction("hash32", {node_a, literal_10}, int32()); | |
60 | auto hash64 = TreeExprBuilder::MakeFunction("hash64", {node_a}, int64()); | |
61 | auto expr_0 = TreeExprBuilder::MakeExpression(hash32, res_0); | |
62 | auto expr_1 = TreeExprBuilder::MakeExpression(hash64, res_1); | |
63 | ||
64 | // Build a projector for the expression. | |
65 | std::shared_ptr<Projector> projector; | |
66 | auto status = | |
67 | Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector); | |
68 | EXPECT_TRUE(status.ok()) << status.message(); | |
69 | ||
70 | // Create a row-batch with some sample data | |
71 | int num_records = 4; | |
72 | auto array_a = MakeArrowArrayInt32({1, 2, 3, 4}, {false, true, true, true}); | |
73 | ||
74 | // prepare input record batch | |
75 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); | |
76 | ||
77 | // Evaluate expression | |
78 | arrow::ArrayVector outputs; | |
79 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
80 | EXPECT_TRUE(status.ok()); | |
81 | ||
82 | // Validate results | |
83 | auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0)); | |
84 | EXPECT_EQ(int32_arr->null_count(), 0); | |
85 | EXPECT_EQ(int32_arr->Value(0), 10); | |
86 | for (int i = 1; i < num_records; ++i) { | |
87 | EXPECT_NE(int32_arr->Value(i), int32_arr->Value(i - 1)); | |
88 | } | |
89 | ||
90 | auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1)); | |
91 | EXPECT_EQ(int64_arr->null_count(), 0); | |
92 | EXPECT_EQ(int64_arr->Value(0), 0); | |
93 | for (int i = 1; i < num_records; ++i) { | |
94 | EXPECT_NE(int64_arr->Value(i), int64_arr->Value(i - 1)); | |
95 | } | |
96 | } | |
97 | ||
98 | TEST_F(TestHash, TestBuf) { | |
99 | // schema for input fields | |
100 | auto field_a = field("a", utf8()); | |
101 | auto schema = arrow::schema({field_a}); | |
102 | ||
103 | // output fields | |
104 | auto res_0 = field("res0", int32()); | |
105 | auto res_1 = field("res1", int64()); | |
106 | ||
107 | // build expressions. | |
108 | // hash32(a) | |
109 | // hash64(a, 10) | |
110 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
111 | auto literal_10 = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10)); | |
112 | auto hash32 = TreeExprBuilder::MakeFunction("hash32", {node_a}, int32()); | |
113 | auto hash64 = TreeExprBuilder::MakeFunction("hash64", {node_a, literal_10}, int64()); | |
114 | auto expr_0 = TreeExprBuilder::MakeExpression(hash32, res_0); | |
115 | auto expr_1 = TreeExprBuilder::MakeExpression(hash64, res_1); | |
116 | ||
117 | // Build a projector for the expressions. | |
118 | std::shared_ptr<Projector> projector; | |
119 | auto status = | |
120 | Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector); | |
121 | ASSERT_OK(status) << status.message(); | |
122 | ||
123 | // Create a row-batch with some sample data | |
124 | int num_records = 4; | |
125 | auto array_a = | |
126 | MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {false, true, true, true}); | |
127 | ||
128 | // prepare input record batch | |
129 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); | |
130 | ||
131 | // Evaluate expression | |
132 | arrow::ArrayVector outputs; | |
133 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
134 | ASSERT_OK(status); | |
135 | ||
136 | // Validate results | |
137 | auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0)); | |
138 | EXPECT_EQ(int32_arr->null_count(), 0); | |
139 | EXPECT_EQ(int32_arr->Value(0), 0); | |
140 | for (int i = 1; i < num_records; ++i) { | |
141 | EXPECT_NE(int32_arr->Value(i), int32_arr->Value(i - 1)); | |
142 | } | |
143 | ||
144 | auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1)); | |
145 | EXPECT_EQ(int64_arr->null_count(), 0); | |
146 | EXPECT_EQ(int64_arr->Value(0), 10); | |
147 | for (int i = 1; i < num_records; ++i) { | |
148 | EXPECT_NE(int64_arr->Value(i), int64_arr->Value(i - 1)); | |
149 | } | |
150 | } | |
151 | ||
152 | TEST_F(TestHash, TestSha256Simple) { | |
153 | // schema for input fields | |
154 | auto field_a = field("a", int32()); | |
155 | auto field_b = field("b", int64()); | |
156 | auto field_c = field("c", float32()); | |
157 | auto field_d = field("d", float64()); | |
158 | auto schema = arrow::schema({field_a, field_b, field_c, field_d}); | |
159 | ||
160 | // output fields | |
161 | auto res_0 = field("res0", utf8()); | |
162 | auto res_1 = field("res1", utf8()); | |
163 | auto res_2 = field("res2", utf8()); | |
164 | auto res_3 = field("res3", utf8()); | |
165 | ||
166 | // build expressions. | |
167 | // hashSHA256(a) | |
168 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
169 | auto hashSha256_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8()); | |
170 | auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256_1, res_0); | |
171 | ||
172 | auto node_b = TreeExprBuilder::MakeField(field_b); | |
173 | auto hashSha256_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8()); | |
174 | auto expr_1 = TreeExprBuilder::MakeExpression(hashSha256_2, res_1); | |
175 | ||
176 | auto node_c = TreeExprBuilder::MakeField(field_c); | |
177 | auto hashSha256_3 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8()); | |
178 | auto expr_2 = TreeExprBuilder::MakeExpression(hashSha256_3, res_2); | |
179 | ||
180 | auto node_d = TreeExprBuilder::MakeField(field_d); | |
181 | auto hashSha256_4 = TreeExprBuilder::MakeFunction("hashSHA256", {node_d}, utf8()); | |
182 | auto expr_3 = TreeExprBuilder::MakeExpression(hashSha256_4, res_3); | |
183 | ||
184 | // Build a projector for the expressions. | |
185 | std::shared_ptr<Projector> projector; | |
186 | auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3}, | |
187 | TestConfiguration(), &projector); | |
188 | ASSERT_OK(status) << status.message(); | |
189 | ||
190 | // Create a row-batch with some sample data | |
191 | int num_records = 2; | |
192 | auto validity_array = {false, true}; | |
193 | ||
194 | auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array); | |
195 | ||
196 | auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array); | |
197 | ||
198 | auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array); | |
199 | ||
200 | auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array); | |
201 | ||
202 | // prepare input record batch | |
203 | auto in_batch = arrow::RecordBatch::Make( | |
204 | schema, num_records, {array_int32, array_int64, array_float32, array_float64}); | |
205 | ||
206 | // Evaluate expression | |
207 | arrow::ArrayVector outputs; | |
208 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
209 | ASSERT_OK(status); | |
210 | ||
211 | auto response_int32 = outputs.at(0); | |
212 | auto response_int64 = outputs.at(1); | |
213 | auto response_float32 = outputs.at(2); | |
214 | auto response_float64 = outputs.at(3); | |
215 | ||
216 | // Checks if the null and zero representation for numeric values | |
217 | // are consistent between the types | |
218 | EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64); | |
219 | EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32); | |
220 | EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64); | |
221 | ||
222 | const int sha256_hash_size = 64; | |
223 | ||
224 | // Checks if the hash size in response is correct | |
225 | for (int i = 1; i < num_records; ++i) { | |
226 | const auto& value_at_position = response_int32->GetScalar(i).ValueOrDie()->ToString(); | |
227 | ||
228 | EXPECT_EQ(value_at_position.size(), sha256_hash_size); | |
229 | EXPECT_NE(value_at_position, | |
230 | response_int32->GetScalar(i - 1).ValueOrDie()->ToString()); | |
231 | } | |
232 | } | |
233 | ||
234 | TEST_F(TestHash, TestSha256Varlen) { | |
235 | // schema for input fields | |
236 | auto field_a = field("a", utf8()); | |
237 | auto schema = arrow::schema({field_a}); | |
238 | ||
239 | // output fields | |
240 | auto res_0 = field("res0", utf8()); | |
241 | ||
242 | // build expressions. | |
243 | // hashSHA256(a) | |
244 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
245 | auto hashSha256 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8()); | |
246 | auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256, res_0); | |
247 | ||
248 | // Build a projector for the expressions. | |
249 | std::shared_ptr<Projector> projector; | |
250 | auto status = Projector::Make(schema, {expr_0}, TestConfiguration(), &projector); | |
251 | EXPECT_TRUE(status.ok()) << status.message(); | |
252 | ||
253 | // Create a row-batch with some sample data | |
254 | int num_records = 3; | |
255 | ||
256 | std::string first_string = | |
257 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY " | |
258 | "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]"; | |
259 | std::string second_string = | |
260 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY " | |
261 | "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ"; | |
262 | ||
263 | auto array_a = | |
264 | MakeArrowArrayUtf8({"foo", first_string, second_string}, {false, true, true}); | |
265 | ||
266 | // prepare input record batch | |
267 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); | |
268 | ||
269 | // Evaluate expression | |
270 | arrow::ArrayVector outputs; | |
271 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
272 | ASSERT_OK(status); | |
273 | ||
274 | auto response = outputs.at(0); | |
275 | const int sha256_hash_size = 64; | |
276 | ||
277 | EXPECT_EQ(response->null_count(), 0); | |
278 | ||
279 | // Checks that the null value was hashed | |
280 | EXPECT_NE(response->GetScalar(0).ValueOrDie()->ToString(), ""); | |
281 | EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString().size(), sha256_hash_size); | |
282 | ||
283 | // Check that all generated hashes were different | |
284 | for (int i = 1; i < num_records; ++i) { | |
285 | const auto& value_at_position = response->GetScalar(i).ValueOrDie()->ToString(); | |
286 | ||
287 | EXPECT_EQ(value_at_position.size(), sha256_hash_size); | |
288 | EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString()); | |
289 | } | |
290 | } | |
291 | ||
292 | TEST_F(TestHash, TestSha1Simple) { | |
293 | // schema for input fields | |
294 | auto field_a = field("a", int32()); | |
295 | auto field_b = field("b", int64()); | |
296 | auto field_c = field("c", float32()); | |
297 | auto field_d = field("d", float64()); | |
298 | auto schema = arrow::schema({field_a, field_b, field_c, field_d}); | |
299 | ||
300 | // output fields | |
301 | auto res_0 = field("res0", utf8()); | |
302 | auto res_1 = field("res1", utf8()); | |
303 | auto res_2 = field("res2", utf8()); | |
304 | auto res_3 = field("res3", utf8()); | |
305 | ||
306 | // build expressions. | |
307 | // hashSHA1(a) | |
308 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
309 | auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8()); | |
310 | auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1_1, res_0); | |
311 | ||
312 | auto node_b = TreeExprBuilder::MakeField(field_b); | |
313 | auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8()); | |
314 | auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_2, res_1); | |
315 | ||
316 | auto node_c = TreeExprBuilder::MakeField(field_c); | |
317 | auto hashSha1_3 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8()); | |
318 | auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_3, res_2); | |
319 | ||
320 | auto node_d = TreeExprBuilder::MakeField(field_d); | |
321 | auto hashSha1_4 = TreeExprBuilder::MakeFunction("hashSHA1", {node_d}, utf8()); | |
322 | auto expr_3 = TreeExprBuilder::MakeExpression(hashSha1_4, res_3); | |
323 | ||
324 | // Build a projector for the expressions. | |
325 | std::shared_ptr<Projector> projector; | |
326 | auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3}, | |
327 | TestConfiguration(), &projector); | |
328 | EXPECT_TRUE(status.ok()) << status.message(); | |
329 | ||
330 | // Create a row-batch with some sample data | |
331 | int num_records = 2; | |
332 | auto validity_array = {false, true}; | |
333 | ||
334 | auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array); | |
335 | ||
336 | auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array); | |
337 | ||
338 | auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array); | |
339 | ||
340 | auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array); | |
341 | ||
342 | // prepare input record batch | |
343 | auto in_batch = arrow::RecordBatch::Make( | |
344 | schema, num_records, {array_int32, array_int64, array_float32, array_float64}); | |
345 | ||
346 | // Evaluate expression | |
347 | arrow::ArrayVector outputs; | |
348 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
349 | ASSERT_OK(status); | |
350 | ||
351 | auto response_int32 = outputs.at(0); | |
352 | auto response_int64 = outputs.at(1); | |
353 | auto response_float32 = outputs.at(2); | |
354 | auto response_float64 = outputs.at(3); | |
355 | ||
356 | // Checks if the null and zero representation for numeric values | |
357 | // are consistent between the types | |
358 | EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64); | |
359 | EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32); | |
360 | EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64); | |
361 | ||
362 | const int sha1_hash_size = 40; | |
363 | ||
364 | // Checks if the hash size in response is correct | |
365 | for (int i = 1; i < num_records; ++i) { | |
366 | const auto& value_at_position = response_int32->GetScalar(i).ValueOrDie()->ToString(); | |
367 | ||
368 | EXPECT_EQ(value_at_position.size(), sha1_hash_size); | |
369 | EXPECT_NE(value_at_position, | |
370 | response_int32->GetScalar(i - 1).ValueOrDie()->ToString()); | |
371 | } | |
372 | } | |
373 | ||
374 | TEST_F(TestHash, TestSha1Varlen) { | |
375 | // schema for input fields | |
376 | auto field_a = field("a", utf8()); | |
377 | auto schema = arrow::schema({field_a}); | |
378 | ||
379 | // output fields | |
380 | auto res_0 = field("res0", utf8()); | |
381 | ||
382 | // build expressions. | |
383 | // hashSHA1(a) | |
384 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
385 | auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8()); | |
386 | auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0); | |
387 | ||
388 | // Build a projector for the expressions. | |
389 | std::shared_ptr<Projector> projector; | |
390 | auto status = Projector::Make(schema, {expr_0}, TestConfiguration(), &projector); | |
391 | ASSERT_OK(status) << status.message(); | |
392 | ||
393 | // Create a row-batch with some sample data | |
394 | int num_records = 3; | |
395 | ||
396 | std::string first_string = | |
397 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], " | |
398 | "Yen [jɛn], Yoga [ˈjoːgɑ]"; | |
399 | std::string second_string = | |
400 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], " | |
401 | "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ"; | |
402 | ||
403 | auto array_a = | |
404 | MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true}); | |
405 | ||
406 | // prepare input record batch | |
407 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); | |
408 | ||
409 | // Evaluate expression | |
410 | arrow::ArrayVector outputs; | |
411 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
412 | ASSERT_OK(status); | |
413 | ||
414 | auto response = outputs.at(0); | |
415 | const int sha1_hash_size = 40; | |
416 | ||
417 | EXPECT_EQ(response->null_count(), 0); | |
418 | ||
419 | // Checks that the null value was hashed | |
420 | EXPECT_NE(response->GetScalar(0).ValueOrDie()->ToString(), ""); | |
421 | EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString().size(), sha1_hash_size); | |
422 | ||
423 | // Check that all generated hashes were different | |
424 | for (int i = 1; i < num_records; ++i) { | |
425 | const auto& value_at_position = response->GetScalar(i).ValueOrDie()->ToString(); | |
426 | ||
427 | EXPECT_EQ(value_at_position.size(), sha1_hash_size); | |
428 | EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString()); | |
429 | } | |
430 | } | |
431 | ||
432 | TEST_F(TestHash, TestSha1FunctionsAlias) { | |
433 | // schema for input fields | |
434 | auto field_a = field("a", utf8()); | |
435 | auto field_b = field("c", int64()); | |
436 | auto field_c = field("e", float64()); | |
437 | auto schema = arrow::schema({field_a, field_b, field_c}); | |
438 | ||
439 | // output fields | |
440 | auto res_0 = field("res0", utf8()); | |
441 | auto res_0_sha1 = field("res0sha1", utf8()); | |
442 | auto res_0_sha = field("res0sha", utf8()); | |
443 | ||
444 | auto res_1 = field("res1", utf8()); | |
445 | auto res_1_sha1 = field("res1sha1", utf8()); | |
446 | auto res_1_sha = field("res1sha", utf8()); | |
447 | ||
448 | auto res_2 = field("res2", utf8()); | |
449 | auto res_2_sha1 = field("res2_sha1", utf8()); | |
450 | auto res_2_sha = field("res2_sha", utf8()); | |
451 | ||
452 | // build expressions. | |
453 | // hashSHA1(a) | |
454 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
455 | auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8()); | |
456 | auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0); | |
457 | auto sha1 = TreeExprBuilder::MakeFunction("sha1", {node_a}, utf8()); | |
458 | auto expr_0_sha1 = TreeExprBuilder::MakeExpression(sha1, res_0_sha1); | |
459 | auto sha = TreeExprBuilder::MakeFunction("sha", {node_a}, utf8()); | |
460 | auto expr_0_sha = TreeExprBuilder::MakeExpression(sha, res_0_sha); | |
461 | ||
462 | auto node_b = TreeExprBuilder::MakeField(field_b); | |
463 | auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8()); | |
464 | auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_1, res_1); | |
465 | auto sha1_1 = TreeExprBuilder::MakeFunction("sha1", {node_b}, utf8()); | |
466 | auto expr_1_sha1 = TreeExprBuilder::MakeExpression(sha1_1, res_1_sha1); | |
467 | auto sha_1 = TreeExprBuilder::MakeFunction("sha", {node_b}, utf8()); | |
468 | auto expr_1_sha = TreeExprBuilder::MakeExpression(sha_1, res_1_sha); | |
469 | ||
470 | auto node_c = TreeExprBuilder::MakeField(field_c); | |
471 | auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8()); | |
472 | auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_2, res_2); | |
473 | auto sha1_2 = TreeExprBuilder::MakeFunction("sha1", {node_c}, utf8()); | |
474 | auto expr_2_sha1 = TreeExprBuilder::MakeExpression(sha1_2, res_2_sha1); | |
475 | auto sha_2 = TreeExprBuilder::MakeFunction("sha", {node_c}, utf8()); | |
476 | auto expr_2_sha = TreeExprBuilder::MakeExpression(sha_2, res_2_sha); | |
477 | ||
478 | // Build a projector for the expressions. | |
479 | std::shared_ptr<Projector> projector; | |
480 | auto status = Projector::Make(schema, | |
481 | {expr_0, expr_0_sha, expr_0_sha1, expr_1, expr_1_sha, | |
482 | expr_1_sha1, expr_2, expr_2_sha, expr_2_sha1}, | |
483 | TestConfiguration(), &projector); | |
484 | ASSERT_OK(status) << status.message(); | |
485 | ||
486 | // Create a row-batch with some sample data | |
487 | int32_t num_records = 3; | |
488 | ||
489 | std::string first_string = | |
490 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], " | |
491 | "Yen [jɛn], Yoga [ˈjoːgɑ]"; | |
492 | std::string second_string = | |
493 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], " | |
494 | "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ"; | |
495 | ||
496 | auto array_utf8 = | |
497 | MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true}); | |
498 | ||
499 | auto validity_array = {false, true, true}; | |
500 | ||
501 | auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array); | |
502 | ||
503 | auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array); | |
504 | ||
505 | // prepare input record batch | |
506 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, | |
507 | {array_utf8, array_int64, array_float64}); | |
508 | ||
509 | // Evaluate expression | |
510 | arrow::ArrayVector outputs; | |
511 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
512 | ASSERT_OK(status); | |
513 | ||
514 | // Checks that the response for the hashSHA1, sha and sha1 are equals for the first | |
515 | // field of utf8 type | |
516 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1)); // hashSha1 and sha | |
517 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(1), outputs.at(2)); // sha and sha1 | |
518 | ||
519 | // Checks that the response for the hashSHA1, sha and sha1 are equals for the second | |
520 | // field of int64 type | |
521 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(3), outputs.at(4)); // hashSha1 and sha | |
522 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4), outputs.at(5)); // sha and sha1 | |
523 | ||
524 | // Checks that the response for the hashSHA1, sha and sha1 are equals for the first | |
525 | // field of float64 type | |
526 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(6), outputs.at(7)); // hashSha1 and sha responses | |
527 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(7), outputs.at(8)); // sha and sha1 responses | |
528 | } | |
529 | ||
530 | TEST_F(TestHash, TestSha256FunctionsAlias) { | |
531 | // schema for input fields | |
532 | auto field_a = field("a", utf8()); | |
533 | auto field_b = field("c", int64()); | |
534 | auto field_c = field("e", float64()); | |
535 | auto schema = arrow::schema({field_a, field_b, field_c}); | |
536 | ||
537 | // output fields | |
538 | auto res_0 = field("res0", utf8()); | |
539 | auto res_0_sha256 = field("res0sha256", utf8()); | |
540 | ||
541 | auto res_1 = field("res1", utf8()); | |
542 | auto res_1_sha256 = field("res1sha256", utf8()); | |
543 | ||
544 | auto res_2 = field("res2", utf8()); | |
545 | auto res_2_sha256 = field("res2_sha256", utf8()); | |
546 | ||
547 | // build expressions. | |
548 | // hashSHA1(a) | |
549 | auto node_a = TreeExprBuilder::MakeField(field_a); | |
550 | auto hashSha2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8()); | |
551 | auto expr_0 = TreeExprBuilder::MakeExpression(hashSha2, res_0); | |
552 | auto sha256 = TreeExprBuilder::MakeFunction("sha256", {node_a}, utf8()); | |
553 | auto expr_0_sha256 = TreeExprBuilder::MakeExpression(sha256, res_0_sha256); | |
554 | ||
555 | auto node_b = TreeExprBuilder::MakeField(field_b); | |
556 | auto hashSha2_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8()); | |
557 | auto expr_1 = TreeExprBuilder::MakeExpression(hashSha2_1, res_1); | |
558 | auto sha256_1 = TreeExprBuilder::MakeFunction("sha256", {node_b}, utf8()); | |
559 | auto expr_1_sha256 = TreeExprBuilder::MakeExpression(sha256_1, res_1_sha256); | |
560 | ||
561 | auto node_c = TreeExprBuilder::MakeField(field_c); | |
562 | auto hashSha2_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8()); | |
563 | auto expr_2 = TreeExprBuilder::MakeExpression(hashSha2_2, res_2); | |
564 | auto sha256_2 = TreeExprBuilder::MakeFunction("sha256", {node_c}, utf8()); | |
565 | auto expr_2_sha256 = TreeExprBuilder::MakeExpression(sha256_2, res_2_sha256); | |
566 | ||
567 | // Build a projector for the expressions. | |
568 | std::shared_ptr<Projector> projector; | |
569 | auto status = Projector::Make( | |
570 | schema, {expr_0, expr_0_sha256, expr_1, expr_1_sha256, expr_2, expr_2_sha256}, | |
571 | TestConfiguration(), &projector); | |
572 | ASSERT_OK(status) << status.message(); | |
573 | ||
574 | // Create a row-batch with some sample data | |
575 | int32_t num_records = 3; | |
576 | ||
577 | std::string first_string = | |
578 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], " | |
579 | "Yen [jɛn], Yoga [ˈjoːgɑ]"; | |
580 | std::string second_string = | |
581 | "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], " | |
582 | "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ"; | |
583 | ||
584 | auto array_utf8 = | |
585 | MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true}); | |
586 | ||
587 | auto validity_array = {false, true, true}; | |
588 | ||
589 | auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array); | |
590 | ||
591 | auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array); | |
592 | ||
593 | // prepare input record batch | |
594 | auto in_batch = arrow::RecordBatch::Make(schema, num_records, | |
595 | {array_utf8, array_int64, array_float64}); | |
596 | ||
597 | // Evaluate expression | |
598 | arrow::ArrayVector outputs; | |
599 | status = projector->Evaluate(*in_batch, pool_, &outputs); | |
600 | ASSERT_OK(status); | |
601 | ||
602 | // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first | |
603 | // field of utf8 type | |
604 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1)); // hashSha2 and sha256 | |
605 | ||
606 | // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the second | |
607 | // field of int64 type | |
608 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3)); // hashSha2 and sha256 | |
609 | ||
610 | // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first | |
611 | // field of float64 type | |
612 | EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4), | |
613 | outputs.at(5)); // hashSha2 and sha256 responses | |
614 | } | |
615 | } // namespace gandiva |