]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / compute / kernels / scalar_set_lookup_benchmark.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "benchmark/benchmark.h"
19
20 #include "arrow/compute/api_scalar.h"
21 #include "arrow/compute/kernels/common.h"
22 #include "arrow/compute/kernels/test_util.h"
23 #include "arrow/testing/gtest_util.h"
24 #include "arrow/testing/random.h"
25 #include "arrow/util/benchmark_util.h"
26
27 namespace arrow {
28 namespace compute {
29
30 constexpr auto kSeed = 0x94378165;
31
32 static void SetLookupBenchmarkString(benchmark::State& state,
33 const std::string& func_name,
34 const int64_t value_set_length) {
35 // As the set lookup functions don't support duplicate values in the value_set,
36 // we need to choose random generation parameters that minimize the risk of
37 // duplicates (including nulls).
38 const int64_t array_length = 1 << 18;
39 const int32_t value_min_size = (value_set_length < 64) ? 2 : 10;
40 const int32_t value_max_size = 32;
41 const double null_probability = 0.2 / value_set_length;
42 random::RandomArrayGenerator rng(kSeed);
43
44 auto values =
45 rng.String(array_length, value_min_size, value_max_size, null_probability);
46 auto value_set =
47 rng.String(value_set_length, value_min_size, value_max_size, null_probability);
48 ABORT_NOT_OK(CallFunction(func_name, {values, value_set}));
49 for (auto _ : state) {
50 ABORT_NOT_OK(CallFunction(func_name, {values, value_set}));
51 }
52 state.SetItemsProcessed(state.iterations() * array_length);
53 state.SetBytesProcessed(state.iterations() * values->data()->buffers[2]->size());
54 }
55
56 template <typename Type>
57 static void SetLookupBenchmarkNumeric(benchmark::State& state,
58 const std::string& func_name,
59 const int64_t value_set_length) {
60 const int64_t array_length = 1 << 18;
61 const int64_t value_min = 0;
62 const int64_t value_max = std::numeric_limits<typename Type::c_type>::max();
63 const double null_probability = 0.1 / value_set_length;
64 random::RandomArrayGenerator rng(kSeed);
65
66 auto values = rng.Numeric<Type>(array_length, value_min, value_max, null_probability);
67 auto value_set =
68 rng.Numeric<Type>(value_set_length, value_min, value_max, null_probability);
69 ABORT_NOT_OK(CallFunction(func_name, {values, value_set}));
70 for (auto _ : state) {
71 ABORT_NOT_OK(CallFunction(func_name, {values, value_set}));
72 }
73 state.SetItemsProcessed(state.iterations() * array_length);
74 state.SetBytesProcessed(state.iterations() * values->data()->buffers[1]->size());
75 }
76
77 static void IndexInStringSmallSet(benchmark::State& state) {
78 SetLookupBenchmarkString(state, "index_in_meta_binary", state.range(0));
79 }
80
81 static void IsInStringSmallSet(benchmark::State& state) {
82 SetLookupBenchmarkString(state, "is_in_meta_binary", state.range(0));
83 }
84
85 static void IndexInStringLargeSet(benchmark::State& state) {
86 SetLookupBenchmarkString(state, "index_in_meta_binary", 1 << 10);
87 }
88
89 static void IsInStringLargeSet(benchmark::State& state) {
90 SetLookupBenchmarkString(state, "is_in_meta_binary", 1 << 10);
91 }
92
93 static void IndexInInt8SmallSet(benchmark::State& state) {
94 SetLookupBenchmarkNumeric<Int8Type>(state, "index_in_meta_binary", state.range(0));
95 }
96
97 static void IndexInInt16SmallSet(benchmark::State& state) {
98 SetLookupBenchmarkNumeric<Int16Type>(state, "index_in_meta_binary", state.range(0));
99 }
100
101 static void IndexInInt32SmallSet(benchmark::State& state) {
102 SetLookupBenchmarkNumeric<Int32Type>(state, "index_in_meta_binary", state.range(0));
103 }
104
105 static void IndexInInt64SmallSet(benchmark::State& state) {
106 SetLookupBenchmarkNumeric<Int64Type>(state, "index_in_meta_binary", state.range(0));
107 }
108
109 static void IsInInt8SmallSet(benchmark::State& state) {
110 SetLookupBenchmarkNumeric<Int8Type>(state, "is_in_meta_binary", state.range(0));
111 }
112
113 static void IsInInt16SmallSet(benchmark::State& state) {
114 SetLookupBenchmarkNumeric<Int16Type>(state, "is_in_meta_binary", state.range(0));
115 }
116
117 static void IsInInt32SmallSet(benchmark::State& state) {
118 SetLookupBenchmarkNumeric<Int32Type>(state, "is_in_meta_binary", state.range(0));
119 }
120
121 static void IsInInt64SmallSet(benchmark::State& state) {
122 SetLookupBenchmarkNumeric<Int64Type>(state, "is_in_meta_binary", state.range(0));
123 }
124
125 BENCHMARK(IndexInStringSmallSet)->RangeMultiplier(4)->Range(2, 64);
126 BENCHMARK(IsInStringSmallSet)->RangeMultiplier(4)->Range(2, 64);
127
128 BENCHMARK(IndexInStringLargeSet);
129 BENCHMARK(IsInStringLargeSet);
130
131 // XXX For Int8, the value_set length has to be capped at a lower value
132 // in order to avoid duplicates.
133 BENCHMARK(IndexInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8);
134 BENCHMARK(IndexInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64);
135 BENCHMARK(IndexInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64);
136 BENCHMARK(IndexInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64);
137 BENCHMARK(IsInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8);
138 BENCHMARK(IsInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64);
139 BENCHMARK(IsInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64);
140 BENCHMARK(IsInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64);
141
142 } // namespace compute
143 } // namespace arrow