]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/util/trie_benchmark.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / util / trie_benchmark.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "benchmark/benchmark.h"
19
20#include <cstdint>
21#include <string>
22#include <vector>
23
24#include "arrow/status.h"
25#include "arrow/testing/gtest_util.h"
26#include "arrow/util/trie.h"
27
28namespace arrow {
29namespace internal {
30
31std::vector<std::string> AllNulls() {
32 return {"#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND",
33 "1.#QNAN", "N/A", "NA", "NULL", "NaN", "n/a", "nan", "null"};
34}
35
36Trie MakeNullsTrie() {
37 auto nulls = AllNulls();
38
39 TrieBuilder builder;
40 for (const auto& str : AllNulls()) {
41 ABORT_NOT_OK(builder.Append(str));
42 }
43 return builder.Finish();
44}
45
46std::vector<std::string> Expand(const std::vector<std::string>& base, size_t n) {
47 std::vector<std::string> result;
48 result.reserve(n);
49
50 while (true) {
51 for (const auto& v : base) {
52 result.push_back(v);
53 if (result.size() == n) {
54 return result;
55 }
56 }
57 }
58}
59
60static void BenchmarkTrieLookups(benchmark::State& state, // NOLINT non-const reference
61 const std::vector<std::string>& strings) {
62 Trie trie = MakeNullsTrie();
63 int32_t total = 0;
64
65 auto lookups = Expand(strings, 100);
66
67 for (auto _ : state) {
68 for (const auto& s : lookups) {
69 total += trie.Find(s);
70 }
71 }
72 benchmark::DoNotOptimize(total);
73 state.SetItemsProcessed(state.iterations() * lookups.size());
74}
75
76static void TrieLookupFound(benchmark::State& state) { // NOLINT non-const reference
77 BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"});
78}
79
80static void TrieLookupNotFound(benchmark::State& state) { // NOLINT non-const reference
81 BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"});
82}
83
84BENCHMARK(TrieLookupFound);
85BENCHMARK(TrieLookupNotFound);
86
87#ifdef ARROW_WITH_BENCHMARKS_REFERENCE
88
89static inline bool InlinedNullLookup(util::string_view s) {
90 // An inlined version of trie lookup for a specific set of strings
91 // (see AllNulls())
92 auto size = s.length();
93 auto data = s.data();
94 if (size == 0) {
95 return false;
96 }
97 if (size == 1) {
98 return false;
99 }
100
101 auto chars = reinterpret_cast<const char*>(data);
102 auto first = chars[0];
103 auto second = chars[1];
104 switch (first) {
105 case 'N': {
106 // "NA", "N/A", "NaN", "NULL"
107 if (size == 2) {
108 return second == 'A';
109 }
110 auto third = chars[2];
111 if (size == 3) {
112 return (second == '/' && third == 'A') || (second == 'a' && third == 'N');
113 }
114 if (size == 4) {
115 return (second == 'U' && third == 'L' && chars[3] == 'L');
116 }
117 return false;
118 }
119 case 'n': {
120 // "n/a", "nan", "null"
121 if (size == 2) {
122 return false;
123 }
124 auto third = chars[2];
125 if (size == 3) {
126 return (second == '/' && third == 'a') || (second == 'a' && third == 'n');
127 }
128 if (size == 4) {
129 return (second == 'u' && third == 'l' && chars[3] == 'l');
130 }
131 return false;
132 }
133 case '1': {
134 // '1.#IND', '1.#QNAN'
135 if (size == 6) {
136 // '#' is the most unlikely char here, check it first
137 return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'I' &&
138 chars[4] == 'N' && chars[5] == 'D');
139 }
140 if (size == 7) {
141 return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'Q' &&
142 chars[4] == 'N' && chars[5] == 'A' && chars[6] == 'N');
143 }
144 return false;
145 }
146 case '-': {
147 switch (second) {
148 case 'N':
149 // "-NaN"
150 return (size == 4 && chars[2] == 'a' && chars[3] == 'N');
151 case 'n':
152 // "-nan"
153 return (size == 4 && chars[2] == 'a' && chars[3] == 'n');
154 case '1':
155 // "-1.#IND", "-1.#QNAN"
156 if (size == 7) {
157 return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'I' &&
158 chars[5] == 'N' && chars[6] == 'D');
159 }
160 if (size == 8) {
161 return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'Q' &&
162 chars[5] == 'N' && chars[6] == 'A' && chars[7] == 'N');
163 }
164 return false;
165 default:
166 return false;
167 }
168 }
169 case '#': {
170 // "#N/A", "#N/A N/A", "#NA"
171 if (size < 3 || chars[1] != 'N') {
172 return false;
173 }
174 auto third = chars[2];
175 if (size == 3) {
176 return third == 'A';
177 }
178 if (size == 4) {
179 return third == '/' && chars[3] == 'A';
180 }
181 if (size == 8) {
182 return std::memcmp(data + 2, "/A N/A", 5) == 0;
183 }
184 return false;
185 }
186 default:
187 return false;
188 }
189}
190
191static void BenchmarkInlinedTrieLookups(
192 benchmark::State& state, // NOLINT non-const reference
193 const std::vector<std::string>& strings) {
194 int32_t total = 0;
195
196 auto lookups = Expand(strings, 100);
197
198 for (auto _ : state) {
199 for (const auto& s : lookups) {
200 total += InlinedNullLookup(s);
201 }
202 }
203 benchmark::DoNotOptimize(total);
204 state.SetItemsProcessed(state.iterations() * lookups.size());
205}
206static void InlinedTrieLookupFound(
207 benchmark::State& state) { // NOLINT non-const reference
208 BenchmarkInlinedTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"});
209}
210
211static void InlinedTrieLookupNotFound(
212 benchmark::State& state) { // NOLINT non-const reference
213 BenchmarkInlinedTrieLookups(state, {"None", "1.0", "", "abc"});
214}
215
216BENCHMARK(InlinedTrieLookupFound);
217BENCHMARK(InlinedTrieLookupNotFound);
218
219#endif
220
221} // namespace internal
222} // namespace arrow