]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/gandiva/tests/hash_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / gandiva / tests / hash_test.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <gtest/gtest.h>
19
20 #include <sstream>
21
22 #include "arrow/memory_pool.h"
23 #include "arrow/status.h"
24 #include "gandiva/projector.h"
25 #include "gandiva/tests/test_util.h"
26 #include "gandiva/tree_expr_builder.h"
27
28 namespace gandiva {
29
30 using arrow::boolean;
31 using arrow::float32;
32 using arrow::float64;
33 using arrow::int32;
34 using arrow::int64;
35 using arrow::utf8;
36
37 class TestHash : public ::testing::Test {
38 public:
39 void SetUp() { pool_ = arrow::default_memory_pool(); }
40
41 protected:
42 arrow::MemoryPool* pool_;
43 };
44
45 TEST_F(TestHash, TestSimple) {
46 // schema for input fields
47 auto field_a = field("a", int32());
48 auto schema = arrow::schema({field_a});
49
50 // output fields
51 auto res_0 = field("res0", int32());
52 auto res_1 = field("res1", int64());
53
54 // build expression.
55 // hash32(a, 10)
56 // hash64(a)
57 auto node_a = TreeExprBuilder::MakeField(field_a);
58 auto literal_10 = TreeExprBuilder::MakeLiteral((int32_t)10);
59 auto hash32 = TreeExprBuilder::MakeFunction("hash32", {node_a, literal_10}, int32());
60 auto hash64 = TreeExprBuilder::MakeFunction("hash64", {node_a}, int64());
61 auto expr_0 = TreeExprBuilder::MakeExpression(hash32, res_0);
62 auto expr_1 = TreeExprBuilder::MakeExpression(hash64, res_1);
63
64 // Build a projector for the expression.
65 std::shared_ptr<Projector> projector;
66 auto status =
67 Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector);
68 EXPECT_TRUE(status.ok()) << status.message();
69
70 // Create a row-batch with some sample data
71 int num_records = 4;
72 auto array_a = MakeArrowArrayInt32({1, 2, 3, 4}, {false, true, true, true});
73
74 // prepare input record batch
75 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
76
77 // Evaluate expression
78 arrow::ArrayVector outputs;
79 status = projector->Evaluate(*in_batch, pool_, &outputs);
80 EXPECT_TRUE(status.ok());
81
82 // Validate results
83 auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
84 EXPECT_EQ(int32_arr->null_count(), 0);
85 EXPECT_EQ(int32_arr->Value(0), 10);
86 for (int i = 1; i < num_records; ++i) {
87 EXPECT_NE(int32_arr->Value(i), int32_arr->Value(i - 1));
88 }
89
90 auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
91 EXPECT_EQ(int64_arr->null_count(), 0);
92 EXPECT_EQ(int64_arr->Value(0), 0);
93 for (int i = 1; i < num_records; ++i) {
94 EXPECT_NE(int64_arr->Value(i), int64_arr->Value(i - 1));
95 }
96 }
97
98 TEST_F(TestHash, TestBuf) {
99 // schema for input fields
100 auto field_a = field("a", utf8());
101 auto schema = arrow::schema({field_a});
102
103 // output fields
104 auto res_0 = field("res0", int32());
105 auto res_1 = field("res1", int64());
106
107 // build expressions.
108 // hash32(a)
109 // hash64(a, 10)
110 auto node_a = TreeExprBuilder::MakeField(field_a);
111 auto literal_10 = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10));
112 auto hash32 = TreeExprBuilder::MakeFunction("hash32", {node_a}, int32());
113 auto hash64 = TreeExprBuilder::MakeFunction("hash64", {node_a, literal_10}, int64());
114 auto expr_0 = TreeExprBuilder::MakeExpression(hash32, res_0);
115 auto expr_1 = TreeExprBuilder::MakeExpression(hash64, res_1);
116
117 // Build a projector for the expressions.
118 std::shared_ptr<Projector> projector;
119 auto status =
120 Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector);
121 ASSERT_OK(status) << status.message();
122
123 // Create a row-batch with some sample data
124 int num_records = 4;
125 auto array_a =
126 MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {false, true, true, true});
127
128 // prepare input record batch
129 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
130
131 // Evaluate expression
132 arrow::ArrayVector outputs;
133 status = projector->Evaluate(*in_batch, pool_, &outputs);
134 ASSERT_OK(status);
135
136 // Validate results
137 auto int32_arr = std::dynamic_pointer_cast<arrow::Int32Array>(outputs.at(0));
138 EXPECT_EQ(int32_arr->null_count(), 0);
139 EXPECT_EQ(int32_arr->Value(0), 0);
140 for (int i = 1; i < num_records; ++i) {
141 EXPECT_NE(int32_arr->Value(i), int32_arr->Value(i - 1));
142 }
143
144 auto int64_arr = std::dynamic_pointer_cast<arrow::Int64Array>(outputs.at(1));
145 EXPECT_EQ(int64_arr->null_count(), 0);
146 EXPECT_EQ(int64_arr->Value(0), 10);
147 for (int i = 1; i < num_records; ++i) {
148 EXPECT_NE(int64_arr->Value(i), int64_arr->Value(i - 1));
149 }
150 }
151
152 TEST_F(TestHash, TestSha256Simple) {
153 // schema for input fields
154 auto field_a = field("a", int32());
155 auto field_b = field("b", int64());
156 auto field_c = field("c", float32());
157 auto field_d = field("d", float64());
158 auto schema = arrow::schema({field_a, field_b, field_c, field_d});
159
160 // output fields
161 auto res_0 = field("res0", utf8());
162 auto res_1 = field("res1", utf8());
163 auto res_2 = field("res2", utf8());
164 auto res_3 = field("res3", utf8());
165
166 // build expressions.
167 // hashSHA256(a)
168 auto node_a = TreeExprBuilder::MakeField(field_a);
169 auto hashSha256_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8());
170 auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256_1, res_0);
171
172 auto node_b = TreeExprBuilder::MakeField(field_b);
173 auto hashSha256_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8());
174 auto expr_1 = TreeExprBuilder::MakeExpression(hashSha256_2, res_1);
175
176 auto node_c = TreeExprBuilder::MakeField(field_c);
177 auto hashSha256_3 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8());
178 auto expr_2 = TreeExprBuilder::MakeExpression(hashSha256_3, res_2);
179
180 auto node_d = TreeExprBuilder::MakeField(field_d);
181 auto hashSha256_4 = TreeExprBuilder::MakeFunction("hashSHA256", {node_d}, utf8());
182 auto expr_3 = TreeExprBuilder::MakeExpression(hashSha256_4, res_3);
183
184 // Build a projector for the expressions.
185 std::shared_ptr<Projector> projector;
186 auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3},
187 TestConfiguration(), &projector);
188 ASSERT_OK(status) << status.message();
189
190 // Create a row-batch with some sample data
191 int num_records = 2;
192 auto validity_array = {false, true};
193
194 auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array);
195
196 auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array);
197
198 auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array);
199
200 auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array);
201
202 // prepare input record batch
203 auto in_batch = arrow::RecordBatch::Make(
204 schema, num_records, {array_int32, array_int64, array_float32, array_float64});
205
206 // Evaluate expression
207 arrow::ArrayVector outputs;
208 status = projector->Evaluate(*in_batch, pool_, &outputs);
209 ASSERT_OK(status);
210
211 auto response_int32 = outputs.at(0);
212 auto response_int64 = outputs.at(1);
213 auto response_float32 = outputs.at(2);
214 auto response_float64 = outputs.at(3);
215
216 // Checks if the null and zero representation for numeric values
217 // are consistent between the types
218 EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64);
219 EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32);
220 EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64);
221
222 const int sha256_hash_size = 64;
223
224 // Checks if the hash size in response is correct
225 for (int i = 1; i < num_records; ++i) {
226 const auto& value_at_position = response_int32->GetScalar(i).ValueOrDie()->ToString();
227
228 EXPECT_EQ(value_at_position.size(), sha256_hash_size);
229 EXPECT_NE(value_at_position,
230 response_int32->GetScalar(i - 1).ValueOrDie()->ToString());
231 }
232 }
233
234 TEST_F(TestHash, TestSha256Varlen) {
235 // schema for input fields
236 auto field_a = field("a", utf8());
237 auto schema = arrow::schema({field_a});
238
239 // output fields
240 auto res_0 = field("res0", utf8());
241
242 // build expressions.
243 // hashSHA256(a)
244 auto node_a = TreeExprBuilder::MakeField(field_a);
245 auto hashSha256 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8());
246 auto expr_0 = TreeExprBuilder::MakeExpression(hashSha256, res_0);
247
248 // Build a projector for the expressions.
249 std::shared_ptr<Projector> projector;
250 auto status = Projector::Make(schema, {expr_0}, TestConfiguration(), &projector);
251 EXPECT_TRUE(status.ok()) << status.message();
252
253 // Create a row-batch with some sample data
254 int num_records = 3;
255
256 std::string first_string =
257 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY "
258 "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]";
259 std::string second_string =
260 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY "
261 "[ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
262
263 auto array_a =
264 MakeArrowArrayUtf8({"foo", first_string, second_string}, {false, true, true});
265
266 // prepare input record batch
267 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
268
269 // Evaluate expression
270 arrow::ArrayVector outputs;
271 status = projector->Evaluate(*in_batch, pool_, &outputs);
272 ASSERT_OK(status);
273
274 auto response = outputs.at(0);
275 const int sha256_hash_size = 64;
276
277 EXPECT_EQ(response->null_count(), 0);
278
279 // Checks that the null value was hashed
280 EXPECT_NE(response->GetScalar(0).ValueOrDie()->ToString(), "");
281 EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString().size(), sha256_hash_size);
282
283 // Check that all generated hashes were different
284 for (int i = 1; i < num_records; ++i) {
285 const auto& value_at_position = response->GetScalar(i).ValueOrDie()->ToString();
286
287 EXPECT_EQ(value_at_position.size(), sha256_hash_size);
288 EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString());
289 }
290 }
291
292 TEST_F(TestHash, TestSha1Simple) {
293 // schema for input fields
294 auto field_a = field("a", int32());
295 auto field_b = field("b", int64());
296 auto field_c = field("c", float32());
297 auto field_d = field("d", float64());
298 auto schema = arrow::schema({field_a, field_b, field_c, field_d});
299
300 // output fields
301 auto res_0 = field("res0", utf8());
302 auto res_1 = field("res1", utf8());
303 auto res_2 = field("res2", utf8());
304 auto res_3 = field("res3", utf8());
305
306 // build expressions.
307 // hashSHA1(a)
308 auto node_a = TreeExprBuilder::MakeField(field_a);
309 auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8());
310 auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1_1, res_0);
311
312 auto node_b = TreeExprBuilder::MakeField(field_b);
313 auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8());
314 auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_2, res_1);
315
316 auto node_c = TreeExprBuilder::MakeField(field_c);
317 auto hashSha1_3 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8());
318 auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_3, res_2);
319
320 auto node_d = TreeExprBuilder::MakeField(field_d);
321 auto hashSha1_4 = TreeExprBuilder::MakeFunction("hashSHA1", {node_d}, utf8());
322 auto expr_3 = TreeExprBuilder::MakeExpression(hashSha1_4, res_3);
323
324 // Build a projector for the expressions.
325 std::shared_ptr<Projector> projector;
326 auto status = Projector::Make(schema, {expr_0, expr_1, expr_2, expr_3},
327 TestConfiguration(), &projector);
328 EXPECT_TRUE(status.ok()) << status.message();
329
330 // Create a row-batch with some sample data
331 int num_records = 2;
332 auto validity_array = {false, true};
333
334 auto array_int32 = MakeArrowArrayInt32({1, 0}, validity_array);
335
336 auto array_int64 = MakeArrowArrayInt64({1, 0}, validity_array);
337
338 auto array_float32 = MakeArrowArrayFloat32({1.0, 0.0}, validity_array);
339
340 auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0}, validity_array);
341
342 // prepare input record batch
343 auto in_batch = arrow::RecordBatch::Make(
344 schema, num_records, {array_int32, array_int64, array_float32, array_float64});
345
346 // Evaluate expression
347 arrow::ArrayVector outputs;
348 status = projector->Evaluate(*in_batch, pool_, &outputs);
349 ASSERT_OK(status);
350
351 auto response_int32 = outputs.at(0);
352 auto response_int64 = outputs.at(1);
353 auto response_float32 = outputs.at(2);
354 auto response_float64 = outputs.at(3);
355
356 // Checks if the null and zero representation for numeric values
357 // are consistent between the types
358 EXPECT_ARROW_ARRAY_EQUALS(response_int32, response_int64);
359 EXPECT_ARROW_ARRAY_EQUALS(response_int64, response_float32);
360 EXPECT_ARROW_ARRAY_EQUALS(response_float32, response_float64);
361
362 const int sha1_hash_size = 40;
363
364 // Checks if the hash size in response is correct
365 for (int i = 1; i < num_records; ++i) {
366 const auto& value_at_position = response_int32->GetScalar(i).ValueOrDie()->ToString();
367
368 EXPECT_EQ(value_at_position.size(), sha1_hash_size);
369 EXPECT_NE(value_at_position,
370 response_int32->GetScalar(i - 1).ValueOrDie()->ToString());
371 }
372 }
373
374 TEST_F(TestHash, TestSha1Varlen) {
375 // schema for input fields
376 auto field_a = field("a", utf8());
377 auto schema = arrow::schema({field_a});
378
379 // output fields
380 auto res_0 = field("res0", utf8());
381
382 // build expressions.
383 // hashSHA1(a)
384 auto node_a = TreeExprBuilder::MakeField(field_a);
385 auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8());
386 auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0);
387
388 // Build a projector for the expressions.
389 std::shared_ptr<Projector> projector;
390 auto status = Projector::Make(schema, {expr_0}, TestConfiguration(), &projector);
391 ASSERT_OK(status) << status.message();
392
393 // Create a row-batch with some sample data
394 int num_records = 3;
395
396 std::string first_string =
397 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
398 "Yen [jɛn], Yoga [ˈjoːgɑ]";
399 std::string second_string =
400 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
401 "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
402
403 auto array_a =
404 MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
405
406 // prepare input record batch
407 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
408
409 // Evaluate expression
410 arrow::ArrayVector outputs;
411 status = projector->Evaluate(*in_batch, pool_, &outputs);
412 ASSERT_OK(status);
413
414 auto response = outputs.at(0);
415 const int sha1_hash_size = 40;
416
417 EXPECT_EQ(response->null_count(), 0);
418
419 // Checks that the null value was hashed
420 EXPECT_NE(response->GetScalar(0).ValueOrDie()->ToString(), "");
421 EXPECT_EQ(response->GetScalar(0).ValueOrDie()->ToString().size(), sha1_hash_size);
422
423 // Check that all generated hashes were different
424 for (int i = 1; i < num_records; ++i) {
425 const auto& value_at_position = response->GetScalar(i).ValueOrDie()->ToString();
426
427 EXPECT_EQ(value_at_position.size(), sha1_hash_size);
428 EXPECT_NE(value_at_position, response->GetScalar(i - 1).ValueOrDie()->ToString());
429 }
430 }
431
432 TEST_F(TestHash, TestSha1FunctionsAlias) {
433 // schema for input fields
434 auto field_a = field("a", utf8());
435 auto field_b = field("c", int64());
436 auto field_c = field("e", float64());
437 auto schema = arrow::schema({field_a, field_b, field_c});
438
439 // output fields
440 auto res_0 = field("res0", utf8());
441 auto res_0_sha1 = field("res0sha1", utf8());
442 auto res_0_sha = field("res0sha", utf8());
443
444 auto res_1 = field("res1", utf8());
445 auto res_1_sha1 = field("res1sha1", utf8());
446 auto res_1_sha = field("res1sha", utf8());
447
448 auto res_2 = field("res2", utf8());
449 auto res_2_sha1 = field("res2_sha1", utf8());
450 auto res_2_sha = field("res2_sha", utf8());
451
452 // build expressions.
453 // hashSHA1(a)
454 auto node_a = TreeExprBuilder::MakeField(field_a);
455 auto hashSha1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_a}, utf8());
456 auto expr_0 = TreeExprBuilder::MakeExpression(hashSha1, res_0);
457 auto sha1 = TreeExprBuilder::MakeFunction("sha1", {node_a}, utf8());
458 auto expr_0_sha1 = TreeExprBuilder::MakeExpression(sha1, res_0_sha1);
459 auto sha = TreeExprBuilder::MakeFunction("sha", {node_a}, utf8());
460 auto expr_0_sha = TreeExprBuilder::MakeExpression(sha, res_0_sha);
461
462 auto node_b = TreeExprBuilder::MakeField(field_b);
463 auto hashSha1_1 = TreeExprBuilder::MakeFunction("hashSHA1", {node_b}, utf8());
464 auto expr_1 = TreeExprBuilder::MakeExpression(hashSha1_1, res_1);
465 auto sha1_1 = TreeExprBuilder::MakeFunction("sha1", {node_b}, utf8());
466 auto expr_1_sha1 = TreeExprBuilder::MakeExpression(sha1_1, res_1_sha1);
467 auto sha_1 = TreeExprBuilder::MakeFunction("sha", {node_b}, utf8());
468 auto expr_1_sha = TreeExprBuilder::MakeExpression(sha_1, res_1_sha);
469
470 auto node_c = TreeExprBuilder::MakeField(field_c);
471 auto hashSha1_2 = TreeExprBuilder::MakeFunction("hashSHA1", {node_c}, utf8());
472 auto expr_2 = TreeExprBuilder::MakeExpression(hashSha1_2, res_2);
473 auto sha1_2 = TreeExprBuilder::MakeFunction("sha1", {node_c}, utf8());
474 auto expr_2_sha1 = TreeExprBuilder::MakeExpression(sha1_2, res_2_sha1);
475 auto sha_2 = TreeExprBuilder::MakeFunction("sha", {node_c}, utf8());
476 auto expr_2_sha = TreeExprBuilder::MakeExpression(sha_2, res_2_sha);
477
478 // Build a projector for the expressions.
479 std::shared_ptr<Projector> projector;
480 auto status = Projector::Make(schema,
481 {expr_0, expr_0_sha, expr_0_sha1, expr_1, expr_1_sha,
482 expr_1_sha1, expr_2, expr_2_sha, expr_2_sha1},
483 TestConfiguration(), &projector);
484 ASSERT_OK(status) << status.message();
485
486 // Create a row-batch with some sample data
487 int32_t num_records = 3;
488
489 std::string first_string =
490 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
491 "Yen [jɛn], Yoga [ˈjoːgɑ]";
492 std::string second_string =
493 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
494 "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
495
496 auto array_utf8 =
497 MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
498
499 auto validity_array = {false, true, true};
500
501 auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
502
503 auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
504
505 // prepare input record batch
506 auto in_batch = arrow::RecordBatch::Make(schema, num_records,
507 {array_utf8, array_int64, array_float64});
508
509 // Evaluate expression
510 arrow::ArrayVector outputs;
511 status = projector->Evaluate(*in_batch, pool_, &outputs);
512 ASSERT_OK(status);
513
514 // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
515 // field of utf8 type
516 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1)); // hashSha1 and sha
517 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(1), outputs.at(2)); // sha and sha1
518
519 // Checks that the response for the hashSHA1, sha and sha1 are equals for the second
520 // field of int64 type
521 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(3), outputs.at(4)); // hashSha1 and sha
522 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4), outputs.at(5)); // sha and sha1
523
524 // Checks that the response for the hashSHA1, sha and sha1 are equals for the first
525 // field of float64 type
526 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(6), outputs.at(7)); // hashSha1 and sha responses
527 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(7), outputs.at(8)); // sha and sha1 responses
528 }
529
530 TEST_F(TestHash, TestSha256FunctionsAlias) {
531 // schema for input fields
532 auto field_a = field("a", utf8());
533 auto field_b = field("c", int64());
534 auto field_c = field("e", float64());
535 auto schema = arrow::schema({field_a, field_b, field_c});
536
537 // output fields
538 auto res_0 = field("res0", utf8());
539 auto res_0_sha256 = field("res0sha256", utf8());
540
541 auto res_1 = field("res1", utf8());
542 auto res_1_sha256 = field("res1sha256", utf8());
543
544 auto res_2 = field("res2", utf8());
545 auto res_2_sha256 = field("res2_sha256", utf8());
546
547 // build expressions.
548 // hashSHA1(a)
549 auto node_a = TreeExprBuilder::MakeField(field_a);
550 auto hashSha2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_a}, utf8());
551 auto expr_0 = TreeExprBuilder::MakeExpression(hashSha2, res_0);
552 auto sha256 = TreeExprBuilder::MakeFunction("sha256", {node_a}, utf8());
553 auto expr_0_sha256 = TreeExprBuilder::MakeExpression(sha256, res_0_sha256);
554
555 auto node_b = TreeExprBuilder::MakeField(field_b);
556 auto hashSha2_1 = TreeExprBuilder::MakeFunction("hashSHA256", {node_b}, utf8());
557 auto expr_1 = TreeExprBuilder::MakeExpression(hashSha2_1, res_1);
558 auto sha256_1 = TreeExprBuilder::MakeFunction("sha256", {node_b}, utf8());
559 auto expr_1_sha256 = TreeExprBuilder::MakeExpression(sha256_1, res_1_sha256);
560
561 auto node_c = TreeExprBuilder::MakeField(field_c);
562 auto hashSha2_2 = TreeExprBuilder::MakeFunction("hashSHA256", {node_c}, utf8());
563 auto expr_2 = TreeExprBuilder::MakeExpression(hashSha2_2, res_2);
564 auto sha256_2 = TreeExprBuilder::MakeFunction("sha256", {node_c}, utf8());
565 auto expr_2_sha256 = TreeExprBuilder::MakeExpression(sha256_2, res_2_sha256);
566
567 // Build a projector for the expressions.
568 std::shared_ptr<Projector> projector;
569 auto status = Projector::Make(
570 schema, {expr_0, expr_0_sha256, expr_1, expr_1_sha256, expr_2, expr_2_sha256},
571 TestConfiguration(), &projector);
572 ASSERT_OK(status) << status.message();
573
574 // Create a row-batch with some sample data
575 int32_t num_records = 3;
576
577 std::string first_string =
578 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
579 "Yen [jɛn], Yoga [ˈjoːgɑ]";
580 std::string second_string =
581 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
582 "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
583
584 auto array_utf8 =
585 MakeArrowArrayUtf8({"", first_string, second_string}, {false, true, true});
586
587 auto validity_array = {false, true, true};
588
589 auto array_int64 = MakeArrowArrayInt64({1, 0, 32423}, validity_array);
590
591 auto array_float64 = MakeArrowArrayFloat64({1.0, 0.0, 324893.3849}, validity_array);
592
593 // prepare input record batch
594 auto in_batch = arrow::RecordBatch::Make(schema, num_records,
595 {array_utf8, array_int64, array_float64});
596
597 // Evaluate expression
598 arrow::ArrayVector outputs;
599 status = projector->Evaluate(*in_batch, pool_, &outputs);
600 ASSERT_OK(status);
601
602 // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
603 // field of utf8 type
604 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(0), outputs.at(1)); // hashSha2 and sha256
605
606 // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the second
607 // field of int64 type
608 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(2), outputs.at(3)); // hashSha2 and sha256
609
610 // Checks that the response for the hashSHA2, sha256 and sha2 are equals for the first
611 // field of float64 type
612 EXPECT_ARROW_ARRAY_EQUALS(outputs.at(4),
613 outputs.at(5)); // hashSha2 and sha256 responses
614 }
615 } // namespace gandiva