]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include "benchmark/benchmark.h" | |
19 | ||
20 | #include <cstdint> | |
21 | #include <string> | |
22 | #include <vector> | |
23 | ||
24 | #include "arrow/status.h" | |
25 | #include "arrow/testing/gtest_util.h" | |
26 | #include "arrow/util/trie.h" | |
27 | ||
28 | namespace arrow { | |
29 | namespace internal { | |
30 | ||
31 | std::vector<std::string> AllNulls() { | |
32 | return {"#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", | |
33 | "1.#QNAN", "N/A", "NA", "NULL", "NaN", "n/a", "nan", "null"}; | |
34 | } | |
35 | ||
36 | Trie MakeNullsTrie() { | |
37 | auto nulls = AllNulls(); | |
38 | ||
39 | TrieBuilder builder; | |
40 | for (const auto& str : AllNulls()) { | |
41 | ABORT_NOT_OK(builder.Append(str)); | |
42 | } | |
43 | return builder.Finish(); | |
44 | } | |
45 | ||
46 | std::vector<std::string> Expand(const std::vector<std::string>& base, size_t n) { | |
47 | std::vector<std::string> result; | |
48 | result.reserve(n); | |
49 | ||
50 | while (true) { | |
51 | for (const auto& v : base) { | |
52 | result.push_back(v); | |
53 | if (result.size() == n) { | |
54 | return result; | |
55 | } | |
56 | } | |
57 | } | |
58 | } | |
59 | ||
60 | static void BenchmarkTrieLookups(benchmark::State& state, // NOLINT non-const reference | |
61 | const std::vector<std::string>& strings) { | |
62 | Trie trie = MakeNullsTrie(); | |
63 | int32_t total = 0; | |
64 | ||
65 | auto lookups = Expand(strings, 100); | |
66 | ||
67 | for (auto _ : state) { | |
68 | for (const auto& s : lookups) { | |
69 | total += trie.Find(s); | |
70 | } | |
71 | } | |
72 | benchmark::DoNotOptimize(total); | |
73 | state.SetItemsProcessed(state.iterations() * lookups.size()); | |
74 | } | |
75 | ||
76 | static void TrieLookupFound(benchmark::State& state) { // NOLINT non-const reference | |
77 | BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); | |
78 | } | |
79 | ||
80 | static void TrieLookupNotFound(benchmark::State& state) { // NOLINT non-const reference | |
81 | BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"}); | |
82 | } | |
83 | ||
84 | BENCHMARK(TrieLookupFound); | |
85 | BENCHMARK(TrieLookupNotFound); | |
86 | ||
87 | #ifdef ARROW_WITH_BENCHMARKS_REFERENCE | |
88 | ||
89 | static inline bool InlinedNullLookup(util::string_view s) { | |
90 | // An inlined version of trie lookup for a specific set of strings | |
91 | // (see AllNulls()) | |
92 | auto size = s.length(); | |
93 | auto data = s.data(); | |
94 | if (size == 0) { | |
95 | return false; | |
96 | } | |
97 | if (size == 1) { | |
98 | return false; | |
99 | } | |
100 | ||
101 | auto chars = reinterpret_cast<const char*>(data); | |
102 | auto first = chars[0]; | |
103 | auto second = chars[1]; | |
104 | switch (first) { | |
105 | case 'N': { | |
106 | // "NA", "N/A", "NaN", "NULL" | |
107 | if (size == 2) { | |
108 | return second == 'A'; | |
109 | } | |
110 | auto third = chars[2]; | |
111 | if (size == 3) { | |
112 | return (second == '/' && third == 'A') || (second == 'a' && third == 'N'); | |
113 | } | |
114 | if (size == 4) { | |
115 | return (second == 'U' && third == 'L' && chars[3] == 'L'); | |
116 | } | |
117 | return false; | |
118 | } | |
119 | case 'n': { | |
120 | // "n/a", "nan", "null" | |
121 | if (size == 2) { | |
122 | return false; | |
123 | } | |
124 | auto third = chars[2]; | |
125 | if (size == 3) { | |
126 | return (second == '/' && third == 'a') || (second == 'a' && third == 'n'); | |
127 | } | |
128 | if (size == 4) { | |
129 | return (second == 'u' && third == 'l' && chars[3] == 'l'); | |
130 | } | |
131 | return false; | |
132 | } | |
133 | case '1': { | |
134 | // '1.#IND', '1.#QNAN' | |
135 | if (size == 6) { | |
136 | // '#' is the most unlikely char here, check it first | |
137 | return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'I' && | |
138 | chars[4] == 'N' && chars[5] == 'D'); | |
139 | } | |
140 | if (size == 7) { | |
141 | return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'Q' && | |
142 | chars[4] == 'N' && chars[5] == 'A' && chars[6] == 'N'); | |
143 | } | |
144 | return false; | |
145 | } | |
146 | case '-': { | |
147 | switch (second) { | |
148 | case 'N': | |
149 | // "-NaN" | |
150 | return (size == 4 && chars[2] == 'a' && chars[3] == 'N'); | |
151 | case 'n': | |
152 | // "-nan" | |
153 | return (size == 4 && chars[2] == 'a' && chars[3] == 'n'); | |
154 | case '1': | |
155 | // "-1.#IND", "-1.#QNAN" | |
156 | if (size == 7) { | |
157 | return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'I' && | |
158 | chars[5] == 'N' && chars[6] == 'D'); | |
159 | } | |
160 | if (size == 8) { | |
161 | return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'Q' && | |
162 | chars[5] == 'N' && chars[6] == 'A' && chars[7] == 'N'); | |
163 | } | |
164 | return false; | |
165 | default: | |
166 | return false; | |
167 | } | |
168 | } | |
169 | case '#': { | |
170 | // "#N/A", "#N/A N/A", "#NA" | |
171 | if (size < 3 || chars[1] != 'N') { | |
172 | return false; | |
173 | } | |
174 | auto third = chars[2]; | |
175 | if (size == 3) { | |
176 | return third == 'A'; | |
177 | } | |
178 | if (size == 4) { | |
179 | return third == '/' && chars[3] == 'A'; | |
180 | } | |
181 | if (size == 8) { | |
182 | return std::memcmp(data + 2, "/A N/A", 5) == 0; | |
183 | } | |
184 | return false; | |
185 | } | |
186 | default: | |
187 | return false; | |
188 | } | |
189 | } | |
190 | ||
191 | static void BenchmarkInlinedTrieLookups( | |
192 | benchmark::State& state, // NOLINT non-const reference | |
193 | const std::vector<std::string>& strings) { | |
194 | int32_t total = 0; | |
195 | ||
196 | auto lookups = Expand(strings, 100); | |
197 | ||
198 | for (auto _ : state) { | |
199 | for (const auto& s : lookups) { | |
200 | total += InlinedNullLookup(s); | |
201 | } | |
202 | } | |
203 | benchmark::DoNotOptimize(total); | |
204 | state.SetItemsProcessed(state.iterations() * lookups.size()); | |
205 | } | |
206 | static void InlinedTrieLookupFound( | |
207 | benchmark::State& state) { // NOLINT non-const reference | |
208 | BenchmarkInlinedTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); | |
209 | } | |
210 | ||
211 | static void InlinedTrieLookupNotFound( | |
212 | benchmark::State& state) { // NOLINT non-const reference | |
213 | BenchmarkInlinedTrieLookups(state, {"None", "1.0", "", "abc"}); | |
214 | } | |
215 | ||
216 | BENCHMARK(InlinedTrieLookupFound); | |
217 | BENCHMARK(InlinedTrieLookupNotFound); | |
218 | ||
219 | #endif | |
220 | ||
221 | } // namespace internal | |
222 | } // namespace arrow |