1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 #include "benchmark/benchmark.h"
20 #include "arrow/compute/api_scalar.h"
21 #include "arrow/compute/kernels/common.h"
22 #include "arrow/compute/kernels/test_util.h"
23 #include "arrow/testing/gtest_util.h"
24 #include "arrow/testing/random.h"
25 #include "arrow/util/benchmark_util.h"
30 constexpr auto kSeed
= 0x94378165;
32 static void SetLookupBenchmarkString(benchmark::State
& state
,
33 const std::string
& func_name
,
34 const int64_t value_set_length
) {
35 // As the set lookup functions don't support duplicate values in the value_set,
36 // we need to choose random generation parameters that minimize the risk of
37 // duplicates (including nulls).
38 const int64_t array_length
= 1 << 18;
39 const int32_t value_min_size
= (value_set_length
< 64) ? 2 : 10;
40 const int32_t value_max_size
= 32;
41 const double null_probability
= 0.2 / value_set_length
;
42 random::RandomArrayGenerator
rng(kSeed
);
45 rng
.String(array_length
, value_min_size
, value_max_size
, null_probability
);
47 rng
.String(value_set_length
, value_min_size
, value_max_size
, null_probability
);
48 ABORT_NOT_OK(CallFunction(func_name
, {values
, value_set
}));
49 for (auto _
: state
) {
50 ABORT_NOT_OK(CallFunction(func_name
, {values
, value_set
}));
52 state
.SetItemsProcessed(state
.iterations() * array_length
);
53 state
.SetBytesProcessed(state
.iterations() * values
->data()->buffers
[2]->size());
56 template <typename Type
>
57 static void SetLookupBenchmarkNumeric(benchmark::State
& state
,
58 const std::string
& func_name
,
59 const int64_t value_set_length
) {
60 const int64_t array_length
= 1 << 18;
61 const int64_t value_min
= 0;
62 const int64_t value_max
= std::numeric_limits
<typename
Type::c_type
>::max();
63 const double null_probability
= 0.1 / value_set_length
;
64 random::RandomArrayGenerator
rng(kSeed
);
66 auto values
= rng
.Numeric
<Type
>(array_length
, value_min
, value_max
, null_probability
);
68 rng
.Numeric
<Type
>(value_set_length
, value_min
, value_max
, null_probability
);
69 ABORT_NOT_OK(CallFunction(func_name
, {values
, value_set
}));
70 for (auto _
: state
) {
71 ABORT_NOT_OK(CallFunction(func_name
, {values
, value_set
}));
73 state
.SetItemsProcessed(state
.iterations() * array_length
);
74 state
.SetBytesProcessed(state
.iterations() * values
->data()->buffers
[1]->size());
77 static void IndexInStringSmallSet(benchmark::State
& state
) {
78 SetLookupBenchmarkString(state
, "index_in_meta_binary", state
.range(0));
81 static void IsInStringSmallSet(benchmark::State
& state
) {
82 SetLookupBenchmarkString(state
, "is_in_meta_binary", state
.range(0));
85 static void IndexInStringLargeSet(benchmark::State
& state
) {
86 SetLookupBenchmarkString(state
, "index_in_meta_binary", 1 << 10);
89 static void IsInStringLargeSet(benchmark::State
& state
) {
90 SetLookupBenchmarkString(state
, "is_in_meta_binary", 1 << 10);
93 static void IndexInInt8SmallSet(benchmark::State
& state
) {
94 SetLookupBenchmarkNumeric
<Int8Type
>(state
, "index_in_meta_binary", state
.range(0));
97 static void IndexInInt16SmallSet(benchmark::State
& state
) {
98 SetLookupBenchmarkNumeric
<Int16Type
>(state
, "index_in_meta_binary", state
.range(0));
101 static void IndexInInt32SmallSet(benchmark::State
& state
) {
102 SetLookupBenchmarkNumeric
<Int32Type
>(state
, "index_in_meta_binary", state
.range(0));
105 static void IndexInInt64SmallSet(benchmark::State
& state
) {
106 SetLookupBenchmarkNumeric
<Int64Type
>(state
, "index_in_meta_binary", state
.range(0));
109 static void IsInInt8SmallSet(benchmark::State
& state
) {
110 SetLookupBenchmarkNumeric
<Int8Type
>(state
, "is_in_meta_binary", state
.range(0));
113 static void IsInInt16SmallSet(benchmark::State
& state
) {
114 SetLookupBenchmarkNumeric
<Int16Type
>(state
, "is_in_meta_binary", state
.range(0));
117 static void IsInInt32SmallSet(benchmark::State
& state
) {
118 SetLookupBenchmarkNumeric
<Int32Type
>(state
, "is_in_meta_binary", state
.range(0));
121 static void IsInInt64SmallSet(benchmark::State
& state
) {
122 SetLookupBenchmarkNumeric
<Int64Type
>(state
, "is_in_meta_binary", state
.range(0));
125 BENCHMARK(IndexInStringSmallSet
)->RangeMultiplier(4)->Range(2, 64);
126 BENCHMARK(IsInStringSmallSet
)->RangeMultiplier(4)->Range(2, 64);
128 BENCHMARK(IndexInStringLargeSet
);
129 BENCHMARK(IsInStringLargeSet
);
131 // XXX For Int8, the value_set length has to be capped at a lower value
132 // in order to avoid duplicates.
133 BENCHMARK(IndexInInt8SmallSet
)->RangeMultiplier(4)->Range(2, 8);
134 BENCHMARK(IndexInInt16SmallSet
)->RangeMultiplier(4)->Range(2, 64);
135 BENCHMARK(IndexInInt32SmallSet
)->RangeMultiplier(4)->Range(2, 64);
136 BENCHMARK(IndexInInt64SmallSet
)->RangeMultiplier(4)->Range(2, 64);
137 BENCHMARK(IsInInt8SmallSet
)->RangeMultiplier(4)->Range(2, 8);
138 BENCHMARK(IsInInt16SmallSet
)->RangeMultiplier(4)->Range(2, 64);
139 BENCHMARK(IsInInt32SmallSet
)->RangeMultiplier(4)->Range(2, 64);
140 BENCHMARK(IsInInt64SmallSet
)->RangeMultiplier(4)->Range(2, 64);
142 } // namespace compute