]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/gandiva/hash_utils_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / gandiva / hash_utils_test.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <gtest/gtest.h>
19 #include <unordered_set>
20
21 #include "gandiva/execution_context.h"
22 #include "gandiva/hash_utils.h"
23
24 TEST(TestShaHashUtils, TestSha1Numeric) {
25 gandiva::ExecutionContext ctx;
26
27 auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
28
29 std::vector<uint64_t> values_to_be_hashed;
30
31 // Generate a list of values to obtains the SHA1 hash
32 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.0));
33 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.1));
34 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.2));
35 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.10000001));
36 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000001));
37 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(1.000000));
38 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000002));
39 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.999999));
40
41 // Checks if the hash value is different for each one of the values
42 std::unordered_set<std::string> sha_values;
43
44 int sha1_size = 40;
45
46 for (auto value : values_to_be_hashed) {
47 int out_length;
48 const char* sha_1 =
49 gandiva::gdv_hash_using_sha1(ctx_ptr, &value, sizeof(value), &out_length);
50 std::string sha1_as_str(sha_1, out_length);
51 EXPECT_EQ(sha1_as_str.size(), sha1_size);
52
53 // The value can not exists inside the set with the hash results
54 EXPECT_EQ(sha_values.find(sha1_as_str), sha_values.end());
55 sha_values.insert(sha1_as_str);
56 }
57 }
58
59 TEST(TestShaHashUtils, TestSha256Numeric) {
60 gandiva::ExecutionContext ctx;
61
62 auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
63
64 std::vector<uint64_t> values_to_be_hashed;
65
66 // Generate a list of values to obtains the SHA1 hash
67 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.0));
68 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.1));
69 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.2));
70 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.10000001));
71 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000001));
72 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(1.000000));
73 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(-0.0000002));
74 values_to_be_hashed.push_back(gandiva::gdv_double_to_long(0.999999));
75
76 // Checks if the hash value is different for each one of the values
77 std::unordered_set<std::string> sha_values;
78
79 int sha256_size = 64;
80
81 for (auto value : values_to_be_hashed) {
82 int out_length;
83 const char* sha_256 =
84 gandiva::gdv_hash_using_sha256(ctx_ptr, &value, sizeof(value), &out_length);
85 std::string sha256_as_str(sha_256, out_length);
86 EXPECT_EQ(sha256_as_str.size(), sha256_size);
87
88 // The value can not exists inside the set with the hash results
89 EXPECT_EQ(sha_values.find(sha256_as_str), sha_values.end());
90 sha_values.insert(sha256_as_str);
91 }
92 }
93
94 TEST(TestShaHashUtils, TestSha1Varlen) {
95 gandiva::ExecutionContext ctx;
96
97 auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
98
99 std::string first_string =
100 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
101 "Yen [jɛn], Yoga [ˈjoːgɑ]";
102
103 std::string second_string =
104 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
105 "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
106
107 // The strings expected hashes are obtained from shell executing the following command:
108 // echo -n <output-string> | openssl dgst sha1
109 std::string expected_first_result = "160fcdbc2fa694d884868f5fae7a4bae82706185";
110 std::string expected_second_result = "a456b3e0f88669d2482170a42fade226a815bee1";
111
112 // Generate the hashes and compare with expected outputs
113 const int sha1_size = 40;
114 int out_length;
115
116 const char* sha_1 = gandiva::gdv_hash_using_sha1(ctx_ptr, first_string.c_str(),
117 first_string.size(), &out_length);
118 std::string sha1_as_str(sha_1, out_length);
119 EXPECT_EQ(sha1_as_str.size(), sha1_size);
120 EXPECT_EQ(sha1_as_str, expected_first_result);
121
122 const char* sha_2 = gandiva::gdv_hash_using_sha1(ctx_ptr, second_string.c_str(),
123 second_string.size(), &out_length);
124 std::string sha2_as_str(sha_2, out_length);
125 EXPECT_EQ(sha2_as_str.size(), sha1_size);
126 EXPECT_EQ(sha2_as_str, expected_second_result);
127 }
128
129 TEST(TestShaHashUtils, TestSha256Varlen) {
130 gandiva::ExecutionContext ctx;
131
132 auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
133
134 std::string first_string =
135 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn\nY [ˈʏpsilɔn], "
136 "Yen [jɛn], Yoga [ˈjoːgɑ]";
137
138 std::string second_string =
139 "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeın\nY [ˈʏpsilɔn], "
140 "Yen [jɛn], Yoga [ˈjoːgɑ] コンニチハ";
141
142 // The strings expected hashes are obtained from shell executing the following command:
143 // echo -n <output-string> | openssl dgst sha1
144 std::string expected_first_result =
145 "55aeb2e789871dbd289edae94d4c1c82a1c25ca0bcd5a873924da2fefdd57acb";
146 std::string expected_second_result =
147 "86b29c13d0d0e26ea8f85bfa649dc9b8622ae59a4da2409d7d9b463e86e796f2";
148
149 // Generate the hashes and compare with expected outputs
150 const int sha256_size = 64;
151 int out_length;
152
153 const char* sha_1 = gandiva::gdv_hash_using_sha256(ctx_ptr, first_string.c_str(),
154 first_string.size(), &out_length);
155 std::string sha1_as_str(sha_1, out_length);
156 EXPECT_EQ(sha1_as_str.size(), sha256_size);
157 EXPECT_EQ(sha1_as_str, expected_first_result);
158
159 const char* sha_2 = gandiva::gdv_hash_using_sha256(ctx_ptr, second_string.c_str(),
160 second_string.size(), &out_length);
161 std::string sha2_as_str(sha_2, out_length);
162 EXPECT_EQ(sha2_as_str.size(), sha256_size);
163 EXPECT_EQ(sha2_as_str, expected_second_result);
164 }