]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/util/variant_benchmark.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / util / variant_benchmark.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "benchmark/benchmark.h"
19
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <type_traits>
24 #include <vector>
25
26 #include "arrow/array.h"
27 #include "arrow/chunked_array.h"
28 #include "arrow/datum.h"
29 #include "arrow/status.h"
30 #include "arrow/testing/gtest_util.h"
31 #include "arrow/testing/random.h"
32 #include "arrow/type.h"
33 #include "arrow/util/checked_cast.h"
34 #include "arrow/util/variant.h"
35
36 namespace arrow {
37
38 using internal::checked_pointer_cast;
39
40 namespace util {
41
42 using TrivialVariant = arrow::util::Variant<int32_t, float>;
43
44 using NonTrivialVariant = arrow::util::Variant<int32_t, std::string>;
45
46 std::vector<int32_t> MakeInts(int64_t nitems) {
47 auto rng = arrow::random::RandomArrayGenerator(42);
48 auto array = checked_pointer_cast<Int32Array>(rng.Int32(nitems, 0, 1 << 30));
49 std::vector<int32_t> items(nitems);
50 for (int64_t i = 0; i < nitems; ++i) {
51 items[i] = array->Value(i);
52 }
53 return items;
54 }
55
56 std::vector<float> MakeFloats(int64_t nitems) {
57 auto rng = arrow::random::RandomArrayGenerator(42);
58 auto array = checked_pointer_cast<FloatArray>(rng.Float32(nitems, 0.0, 1.0));
59 std::vector<float> items(nitems);
60 for (int64_t i = 0; i < nitems; ++i) {
61 items[i] = array->Value(i);
62 }
63 return items;
64 }
65
66 std::vector<std::string> MakeStrings(int64_t nitems) {
67 auto rng = arrow::random::RandomArrayGenerator(42);
68 // Some std::string's will use short string optimization, but not all...
69 auto array = checked_pointer_cast<StringArray>(rng.String(nitems, 5, 40));
70 std::vector<std::string> items(nitems);
71 for (int64_t i = 0; i < nitems; ++i) {
72 items[i] = array->GetString(i);
73 }
74 return items;
75 }
76
77 static void ConstructTrivialVariant(benchmark::State& state) {
78 const int64_t N = 10000;
79 const auto ints = MakeInts(N);
80 const auto floats = MakeFloats(N);
81
82 for (auto _ : state) {
83 for (int64_t i = 0; i < N; ++i) {
84 // About type selection: we ensure 50% of each type, but try to avoid
85 // branch mispredictions by creating runs of the same type.
86 if (i & 0x10) {
87 TrivialVariant v{ints[i]};
88 const int32_t* val = &arrow::util::get<int32_t>(v);
89 benchmark::DoNotOptimize(val);
90 } else {
91 TrivialVariant v{floats[i]};
92 const float* val = &arrow::util::get<float>(v);
93 benchmark::DoNotOptimize(val);
94 }
95 }
96 }
97
98 state.SetItemsProcessed(state.iterations() * N);
99 }
100
101 static void ConstructNonTrivialVariant(benchmark::State& state) {
102 const int64_t N = 10000;
103 const auto ints = MakeInts(N);
104 const auto strings = MakeStrings(N);
105
106 for (auto _ : state) {
107 for (int64_t i = 0; i < N; ++i) {
108 if (i & 0x10) {
109 NonTrivialVariant v{ints[i]};
110 const int32_t* val = &arrow::util::get<int32_t>(v);
111 benchmark::DoNotOptimize(val);
112 } else {
113 NonTrivialVariant v{strings[i]};
114 const std::string* val = &arrow::util::get<std::string>(v);
115 benchmark::DoNotOptimize(val);
116 }
117 }
118 }
119
120 state.SetItemsProcessed(state.iterations() * N);
121 }
122
123 struct VariantVisitor {
124 int64_t total = 0;
125
126 void operator()(const int32_t& v) { total += v; }
127 void operator()(const float& v) {
128 // Avoid potentially costly float-to-int conversion
129 int32_t x;
130 memcpy(&x, &v, 4);
131 total += x;
132 }
133 void operator()(const std::string& v) { total += static_cast<int64_t>(v.length()); }
134 };
135
136 template <typename VariantType>
137 static void VisitVariant(benchmark::State& state,
138 const std::vector<VariantType>& variants) {
139 for (auto _ : state) {
140 VariantVisitor visitor;
141 for (const auto& v : variants) {
142 visit(visitor, v);
143 }
144 benchmark::DoNotOptimize(visitor.total);
145 }
146
147 state.SetItemsProcessed(state.iterations() * variants.size());
148 }
149
150 static void VisitTrivialVariant(benchmark::State& state) {
151 const int64_t N = 10000;
152 const auto ints = MakeInts(N);
153 const auto floats = MakeFloats(N);
154
155 std::vector<TrivialVariant> variants;
156 variants.reserve(N);
157 for (int64_t i = 0; i < N; ++i) {
158 if (i & 0x10) {
159 variants.emplace_back(ints[i]);
160 } else {
161 variants.emplace_back(floats[i]);
162 }
163 }
164
165 VisitVariant(state, variants);
166 }
167
168 static void VisitNonTrivialVariant(benchmark::State& state) {
169 const int64_t N = 10000;
170 const auto ints = MakeInts(N);
171 const auto strings = MakeStrings(N);
172
173 std::vector<NonTrivialVariant> variants;
174 variants.reserve(N);
175 for (int64_t i = 0; i < N; ++i) {
176 if (i & 0x10) {
177 variants.emplace_back(ints[i]);
178 } else {
179 variants.emplace_back(strings[i]);
180 }
181 }
182
183 VisitVariant(state, variants);
184 }
185
186 static void ConstructDatum(benchmark::State& state) {
187 const int64_t N = 10000;
188 auto array = *MakeArrayOfNull(int8(), 100);
189 auto chunked_array = std::make_shared<ChunkedArray>(ArrayVector{array, array});
190
191 for (auto _ : state) {
192 for (int64_t i = 0; i < N; ++i) {
193 if (i & 0x10) {
194 Datum datum{array};
195 const ArrayData* val = datum.array().get();
196 benchmark::DoNotOptimize(val);
197 } else {
198 Datum datum{chunked_array};
199 const ChunkedArray* val = datum.chunked_array().get();
200 benchmark::DoNotOptimize(val);
201 }
202 }
203 }
204
205 state.SetItemsProcessed(state.iterations() * N);
206 }
207
208 static void VisitDatum(benchmark::State& state) {
209 const int64_t N = 10000;
210 auto array = *MakeArrayOfNull(int8(), 100);
211 auto chunked_array = std::make_shared<ChunkedArray>(ArrayVector{array, array});
212
213 std::vector<Datum> datums;
214 datums.reserve(N);
215 for (int64_t i = 0; i < N; ++i) {
216 if (i & 0x10) {
217 datums.emplace_back(array);
218 } else {
219 datums.emplace_back(chunked_array);
220 }
221 }
222
223 for (auto _ : state) {
224 int64_t total = 0;
225 for (const auto& datum : datums) {
226 // The .is_XXX() methods are the usual idiom when visiting a Datum,
227 // rather than the visit() function.
228 if (datum.is_array()) {
229 total += datum.array()->length;
230 } else {
231 total += datum.chunked_array()->length();
232 }
233 }
234 benchmark::DoNotOptimize(total);
235 }
236
237 state.SetItemsProcessed(state.iterations() * datums.size());
238 }
239
240 BENCHMARK(ConstructTrivialVariant);
241 BENCHMARK(ConstructNonTrivialVariant);
242 BENCHMARK(VisitTrivialVariant);
243 BENCHMARK(VisitNonTrivialVariant);
244 BENCHMARK(ConstructDatum);
245 BENCHMARK(VisitDatum);
246
247 } // namespace util
248 } // namespace arrow