]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / kernels / scalar_if_else_benchmark.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <benchmark/benchmark.h>
19
20 #include "arrow/array/concatenate.h"
21 #include "arrow/compute/api_scalar.h"
22 #include "arrow/testing/gtest_util.h"
23 #include "arrow/testing/random.h"
24 #include "arrow/util/key_value_metadata.h"
25
26 namespace arrow {
27 namespace compute {
28
29 const int64_t kNumItems = 1024 * 1024;
30 const int64_t kFewItems = 64 * 1024;
31
32 template <typename Type, typename Enable = void>
33 struct GetBytesProcessed {};
34
35 template <>
36 struct GetBytesProcessed<BooleanType> {
37 static int64_t Get(const std::shared_ptr<Array>& arr) { return arr->length() / 8; }
38 };
39
40 template <typename Type>
41 struct GetBytesProcessed<Type, enable_if_number<Type>> {
42 static int64_t Get(const std::shared_ptr<Array>& arr) {
43 using CType = typename Type::c_type;
44 return arr->length() * sizeof(CType);
45 }
46 };
47
48 template <typename Type>
49 struct GetBytesProcessed<Type, enable_if_base_binary<Type>> {
50 static int64_t Get(const std::shared_ptr<Array>& arr) {
51 using ArrayType = typename TypeTraits<Type>::ArrayType;
52 using OffsetType = typename TypeTraits<Type>::OffsetType::c_type;
53 return arr->length() * sizeof(OffsetType) +
54 std::static_pointer_cast<ArrayType>(arr)->total_values_length();
55 }
56 };
57
58 template <typename Type>
59 static void IfElseBench(benchmark::State& state) {
60 auto type = TypeTraits<Type>::type_singleton();
61 using ArrayType = typename TypeTraits<Type>::ArrayType;
62
63 int64_t len = state.range(0);
64 int64_t offset = state.range(1);
65
66 random::RandomArrayGenerator rand(/*seed=*/0);
67
68 auto cond = std::static_pointer_cast<BooleanArray>(
69 rand.ArrayOf(boolean(), len, /*null_probability=*/0.01))
70 ->Slice(offset);
71 auto left = std::static_pointer_cast<ArrayType>(
72 rand.ArrayOf(type, len, /*null_probability=*/0.01))
73 ->Slice(offset);
74 auto right = std::static_pointer_cast<ArrayType>(
75 rand.ArrayOf(type, len, /*null_probability=*/0.01))
76 ->Slice(offset);
77
78 for (auto _ : state) {
79 ABORT_NOT_OK(IfElse(cond, left, right));
80 }
81
82 state.SetBytesProcessed(state.iterations() *
83 (GetBytesProcessed<BooleanType>::Get(cond) +
84 GetBytesProcessed<Type>::Get(left) +
85 GetBytesProcessed<Type>::Get(right)));
86 }
87
88 template <typename Type>
89 static void IfElseBenchContiguous(benchmark::State& state) {
90 auto type = TypeTraits<Type>::type_singleton();
91 using ArrayType = typename TypeTraits<Type>::ArrayType;
92
93 int64_t len = state.range(0);
94 int64_t offset = state.range(1);
95
96 ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), len / 2));
97 ASSERT_OK_AND_ASSIGN(auto temp2,
98 MakeArrayFromScalar(BooleanScalar(false), len - len / 2));
99 ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2}));
100 auto cond = std::static_pointer_cast<BooleanArray>(concat)->Slice(offset);
101
102 random::RandomArrayGenerator rand(/*seed=*/0);
103 auto left = std::static_pointer_cast<ArrayType>(
104 rand.ArrayOf(type, len, /*null_probability=*/0.01))
105 ->Slice(offset);
106 auto right = std::static_pointer_cast<ArrayType>(
107 rand.ArrayOf(type, len, /*null_probability=*/0.01))
108 ->Slice(offset);
109
110 for (auto _ : state) {
111 ABORT_NOT_OK(IfElse(cond, left, right));
112 }
113
114 state.SetBytesProcessed(state.iterations() *
115 (GetBytesProcessed<BooleanType>::Get(cond) +
116 GetBytesProcessed<Type>::Get(left) +
117 GetBytesProcessed<Type>::Get(right)));
118 }
119
120 static void IfElseBench64(benchmark::State& state) {
121 return IfElseBench<UInt64Type>(state);
122 }
123
124 static void IfElseBench32(benchmark::State& state) {
125 return IfElseBench<UInt32Type>(state);
126 }
127
128 static void IfElseBenchString32(benchmark::State& state) {
129 return IfElseBench<StringType>(state);
130 }
131
132 static void IfElseBenchString64(benchmark::State& state) {
133 return IfElseBench<LargeStringType>(state);
134 }
135
136 static void IfElseBench64Contiguous(benchmark::State& state) {
137 return IfElseBenchContiguous<UInt64Type>(state);
138 }
139
140 static void IfElseBench32Contiguous(benchmark::State& state) {
141 return IfElseBenchContiguous<UInt32Type>(state);
142 }
143
144 static void IfElseBenchString64Contiguous(benchmark::State& state) {
145 return IfElseBenchContiguous<UInt64Type>(state);
146 }
147
148 static void IfElseBenchString32Contiguous(benchmark::State& state) {
149 return IfElseBenchContiguous<UInt32Type>(state);
150 }
151
152 template <typename Type>
153 static void CaseWhenBench(benchmark::State& state) {
154 auto type = TypeTraits<Type>::type_singleton();
155 using ArrayType = typename TypeTraits<Type>::ArrayType;
156
157 int64_t len = state.range(0);
158 int64_t offset = state.range(1);
159
160 random::RandomArrayGenerator rand(/*seed=*/0);
161
162 auto cond_field =
163 field("cond", boolean(), key_value_metadata({{"null_probability", "0.01"}}));
164 auto cond = rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}),
165 key_value_metadata({{"null_probability", "0.0"}})),
166 len)
167 ->Slice(offset);
168 auto val1 = std::static_pointer_cast<ArrayType>(
169 rand.ArrayOf(type, len, /*null_probability=*/0.01))
170 ->Slice(offset);
171 auto val2 = std::static_pointer_cast<ArrayType>(
172 rand.ArrayOf(type, len, /*null_probability=*/0.01))
173 ->Slice(offset);
174 auto val3 = std::static_pointer_cast<ArrayType>(
175 rand.ArrayOf(type, len, /*null_probability=*/0.01))
176 ->Slice(offset);
177 auto val4 = std::static_pointer_cast<ArrayType>(
178 rand.ArrayOf(type, len, /*null_probability=*/0.01))
179 ->Slice(offset);
180 for (auto _ : state) {
181 ABORT_NOT_OK(CaseWhen(cond, {val1, val2, val3, val4}));
182 }
183
184 // Set bytes processed to ~length of output
185 state.SetBytesProcessed(state.iterations() * GetBytesProcessed<Type>::Get(val1));
186 state.SetItemsProcessed(state.iterations() * (len - offset));
187 }
188
189 static void CaseWhenBenchList(benchmark::State& state) {
190 auto type = list(int64());
191 auto fld = field("", type);
192
193 int64_t len = state.range(0);
194 int64_t offset = state.range(1);
195
196 random::RandomArrayGenerator rand(/*seed=*/0);
197
198 auto cond_field =
199 field("cond", boolean(), key_value_metadata({{"null_probability", "0.01"}}));
200 auto cond = rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}),
201 key_value_metadata({{"null_probability", "0.0"}})),
202 len);
203 auto val1 = rand.ArrayOf(*fld, len);
204 auto val2 = rand.ArrayOf(*fld, len);
205 auto val3 = rand.ArrayOf(*fld, len);
206 auto val4 = rand.ArrayOf(*fld, len);
207 for (auto _ : state) {
208 ABORT_NOT_OK(
209 CaseWhen(cond->Slice(offset), {val1->Slice(offset), val2->Slice(offset),
210 val3->Slice(offset), val4->Slice(offset)}));
211 }
212
213 // Set bytes processed to ~length of output
214 state.SetBytesProcessed(state.iterations() *
215 GetBytesProcessed<Int64Type>::Get(
216 std::static_pointer_cast<ListArray>(val1)->values()));
217 state.SetItemsProcessed(state.iterations() * (len - offset));
218 }
219
220 template <typename Type>
221 static void CaseWhenBenchContiguous(benchmark::State& state) {
222 auto type = TypeTraits<Type>::type_singleton();
223 using ArrayType = typename TypeTraits<Type>::ArrayType;
224
225 int64_t len = state.range(0);
226 int64_t offset = state.range(1);
227
228 ASSERT_OK_AND_ASSIGN(auto trues, MakeArrayFromScalar(BooleanScalar(true), len / 3));
229 ASSERT_OK_AND_ASSIGN(auto falses, MakeArrayFromScalar(BooleanScalar(false), len / 3));
230 ASSERT_OK_AND_ASSIGN(auto nulls, MakeArrayOfNull(boolean(), len - 2 * (len / 3)));
231 ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({trues, falses, nulls}));
232 auto cond1 = std::static_pointer_cast<BooleanArray>(concat);
233
234 random::RandomArrayGenerator rand(/*seed=*/0);
235 auto cond2 = std::static_pointer_cast<BooleanArray>(
236 rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
237 auto val1 = std::static_pointer_cast<ArrayType>(
238 rand.ArrayOf(type, len, /*null_probability=*/0.01))
239 ->Slice(offset);
240 auto val2 = std::static_pointer_cast<ArrayType>(
241 rand.ArrayOf(type, len, /*null_probability=*/0.01))
242 ->Slice(offset);
243 auto val3 = std::static_pointer_cast<ArrayType>(
244 rand.ArrayOf(type, len, /*null_probability=*/0.01))
245 ->Slice(offset);
246 ASSERT_OK_AND_ASSIGN(
247 std::shared_ptr<Array> cond,
248 StructArray::Make({cond1, cond2}, std::vector<std::string>{"a", "b"}, nullptr,
249 /*null_count=*/0));
250 cond = cond->Slice(offset);
251
252 for (auto _ : state) {
253 ABORT_NOT_OK(CaseWhen(cond, {val1, val2, val3}));
254 }
255
256 // Set bytes processed to ~length of output
257 state.SetBytesProcessed(state.iterations() * GetBytesProcessed<Type>::Get(val1));
258 state.SetItemsProcessed(state.iterations() * (len - offset));
259 }
260
261 static void CaseWhenBench64(benchmark::State& state) {
262 return CaseWhenBench<UInt64Type>(state);
263 }
264
265 static void CaseWhenBench64Contiguous(benchmark::State& state) {
266 return CaseWhenBenchContiguous<UInt64Type>(state);
267 }
268
269 static void CaseWhenBenchString(benchmark::State& state) {
270 return CaseWhenBench<StringType>(state);
271 }
272
273 static void CaseWhenBenchStringContiguous(benchmark::State& state) {
274 return CaseWhenBenchContiguous<StringType>(state);
275 }
276
277 struct CoalesceParams {
278 int64_t length;
279 int64_t num_arguments;
280 double null_probability;
281 };
282
283 std::vector<CoalesceParams> g_coalesce_params = {
284 {kNumItems, 2, 0.01}, {kNumItems, 4, 0.01}, {kNumItems, 2, 0.25},
285 {kNumItems, 4, 0.25}, {kNumItems, 2, 0.50}, {kNumItems, 4, 0.50},
286 {kNumItems, 2, 0.99}, {kNumItems, 4, 0.99},
287 };
288
289 struct CoalesceArgs : public CoalesceParams {
290 explicit CoalesceArgs(benchmark::State& state) : state_(state) {
291 const auto& params = g_coalesce_params[state.range(0)];
292 length = params.length;
293 num_arguments = params.num_arguments;
294 null_probability = params.null_probability;
295 }
296
297 ~CoalesceArgs() {
298 state_.counters["length"] = static_cast<double>(length);
299 state_.counters["null%"] = null_probability * 100;
300 state_.counters["num_args"] = static_cast<double>(num_arguments);
301 }
302
303 private:
304 benchmark::State& state_;
305 };
306
307 template <typename Type>
308 static void CoalesceBench(benchmark::State& state) {
309 auto type = TypeTraits<Type>::type_singleton();
310 CoalesceArgs params(state);
311 random::RandomArrayGenerator rand(/*seed=*/0);
312
313 std::vector<Datum> arguments;
314 for (int i = 0; i < params.num_arguments; i++) {
315 arguments.emplace_back(rand.ArrayOf(type, params.length, params.null_probability));
316 }
317
318 for (auto _ : state) {
319 ABORT_NOT_OK(CallFunction("coalesce", arguments));
320 }
321
322 state.SetBytesProcessed(state.iterations() *
323 GetBytesProcessed<Type>::Get(arguments.front().make_array()));
324 state.SetItemsProcessed(state.iterations() * params.length);
325 }
326
327 template <typename Type>
328 static void CoalesceScalarBench(benchmark::State& state) {
329 using CType = typename Type::c_type;
330 auto type = TypeTraits<Type>::type_singleton();
331 CoalesceArgs params(state);
332 random::RandomArrayGenerator rand(/*seed=*/0);
333
334 std::vector<Datum> arguments = {
335 rand.ArrayOf(type, params.length, params.null_probability),
336 Datum(CType(42)),
337 };
338
339 for (auto _ : state) {
340 ABORT_NOT_OK(CallFunction("coalesce", arguments));
341 }
342
343 state.SetBytesProcessed(state.iterations() *
344 GetBytesProcessed<Type>::Get(arguments.front().make_array()));
345 state.SetItemsProcessed(state.iterations() * params.length);
346 }
347
348 static void CoalesceScalarStringBench(benchmark::State& state) {
349 CoalesceArgs params(state);
350 random::RandomArrayGenerator rand(/*seed=*/0);
351
352 auto arr = rand.ArrayOf(utf8(), params.length, params.null_probability);
353 std::vector<Datum> arguments = {arr, Datum("foobar")};
354
355 for (auto _ : state) {
356 ABORT_NOT_OK(CallFunction("coalesce", arguments));
357 }
358
359 state.SetBytesProcessed(state.iterations() * GetBytesProcessed<StringType>::Get(
360 arguments.front().make_array()));
361 state.SetItemsProcessed(state.iterations() * params.length);
362 }
363
364 static void CoalesceBench64(benchmark::State& state) {
365 return CoalesceBench<Int64Type>(state);
366 }
367
368 static void CoalesceScalarBench64(benchmark::State& state) {
369 return CoalesceScalarBench<Int64Type>(state);
370 }
371
372 template <typename Type>
373 static void ChooseBench(benchmark::State& state) {
374 constexpr int kNumChoices = 5;
375 auto type = TypeTraits<Type>::type_singleton();
376
377 int64_t len = state.range(0);
378 int64_t offset = state.range(1);
379
380 random::RandomArrayGenerator rand(/*seed=*/0);
381
382 std::vector<Datum> arguments;
383 arguments.emplace_back(
384 rand.Int64(len, /*min=*/0, /*max=*/kNumChoices - 1, /*null_probability=*/0.1)
385 ->Slice(offset));
386 for (int i = 0; i < kNumChoices; i++) {
387 arguments.emplace_back(
388 rand.ArrayOf(type, len, /*null_probability=*/0.25)->Slice(offset));
389 }
390
391 for (auto _ : state) {
392 ABORT_NOT_OK(CallFunction("choose", arguments));
393 }
394
395 state.SetBytesProcessed(state.iterations() *
396 GetBytesProcessed<Type>::Get(arguments[1].make_array()));
397 state.SetItemsProcessed(state.iterations() * (len - offset));
398 }
399
400 static void ChooseBench64(benchmark::State& state) {
401 return ChooseBench<Int64Type>(state);
402 }
403
404 BENCHMARK(IfElseBench32)->Args({kNumItems, 0});
405 BENCHMARK(IfElseBench64)->Args({kNumItems, 0});
406
407 BENCHMARK(IfElseBench32)->Args({kNumItems, 99});
408 BENCHMARK(IfElseBench64)->Args({kNumItems, 99});
409
410 BENCHMARK(IfElseBench32Contiguous)->Args({kNumItems, 0});
411 BENCHMARK(IfElseBench64Contiguous)->Args({kNumItems, 0});
412
413 BENCHMARK(IfElseBench32Contiguous)->Args({kNumItems, 99});
414 BENCHMARK(IfElseBench64Contiguous)->Args({kNumItems, 99});
415
416 BENCHMARK(IfElseBenchString32)->Args({kNumItems, 0});
417 BENCHMARK(IfElseBenchString64)->Args({kNumItems, 0});
418
419 BENCHMARK(IfElseBenchString32Contiguous)->Args({kNumItems, 99});
420 BENCHMARK(IfElseBenchString64Contiguous)->Args({kNumItems, 99});
421
422 BENCHMARK(CaseWhenBench64)->Args({kNumItems, 0});
423 BENCHMARK(CaseWhenBench64)->Args({kNumItems, 99});
424
425 BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 0});
426 BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 99});
427
428 BENCHMARK(CaseWhenBenchList)->Args({kFewItems, 0});
429 BENCHMARK(CaseWhenBenchList)->Args({kFewItems, 99});
430
431 BENCHMARK(CaseWhenBenchString)->Args({kFewItems, 0});
432 BENCHMARK(CaseWhenBenchString)->Args({kFewItems, 99});
433
434 BENCHMARK(CaseWhenBenchStringContiguous)->Args({kFewItems, 0});
435 BENCHMARK(CaseWhenBenchStringContiguous)->Args({kFewItems, 99});
436
437 void CoalesceSetArgs(benchmark::internal::Benchmark* bench) {
438 for (size_t i = 0; i < g_coalesce_params.size(); i++) {
439 bench->Args({static_cast<int64_t>(i)});
440 }
441 }
442 void CoalesceSetBinaryArgs(benchmark::internal::Benchmark* bench) {
443 for (size_t i = 0; i < g_coalesce_params.size(); i++) {
444 if (g_coalesce_params[i].num_arguments == 2) {
445 bench->Args({static_cast<int64_t>(i)});
446 }
447 }
448 }
449 BENCHMARK(CoalesceBench64)->Apply(CoalesceSetArgs);
450 BENCHMARK(CoalesceScalarBench64)->Apply(CoalesceSetBinaryArgs);
451 BENCHMARK(CoalesceScalarStringBench)->Apply(CoalesceSetBinaryArgs);
452
453 BENCHMARK(ChooseBench64)->Args({kNumItems, 0});
454 BENCHMARK(ChooseBench64)->Args({kNumItems, 99});
455
456 } // namespace compute
457 } // namespace arrow