1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
26 #include "benchmark/benchmark.h"
28 #include "arrow/builder.h"
29 #include "arrow/memory_pool.h"
30 #include "arrow/testing/gtest_util.h"
31 #include "arrow/util/bit_util.h"
32 #include "arrow/util/decimal.h"
33 #include "arrow/util/string_view.h"
37 using ValueType
= int64_t;
38 using VectorType
= std::vector
<ValueType
>;
39 constexpr int64_t kNumberOfElements
= 256 * 512;
41 static VectorType
AlmostU8CompressibleVector() {
42 VectorType
data(kNumberOfElements
, 64);
44 // Insert an element late in the game that does not fit in the 8bit
45 // representation. This forces AdaptiveIntBuilder's to resize.
46 data
[kNumberOfElements
- 2] = 1L << 13;
51 constexpr int64_t kRounds
= 256;
52 static VectorType kData
= AlmostU8CompressibleVector();
53 constexpr int64_t kBytesProcessPerRound
= kNumberOfElements
* sizeof(ValueType
);
54 constexpr int64_t kBytesProcessed
= kRounds
* kBytesProcessPerRound
;
56 static const char* kBinaryString
= "12345678";
57 static arrow::util::string_view
kBinaryView(kBinaryString
);
59 static void BuildIntArrayNoNulls(benchmark::State
& state
) { // NOLINT non-const reference
60 for (auto _
: state
) {
63 for (int i
= 0; i
< kRounds
; i
++) {
64 ABORT_NOT_OK(builder
.AppendValues(kData
.data(), kData
.size(), nullptr));
67 std::shared_ptr
<Array
> out
;
68 ABORT_NOT_OK(builder
.Finish(&out
));
71 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
74 static void BuildAdaptiveIntNoNulls(
75 benchmark::State
& state
) { // NOLINT non-const reference
76 for (auto _
: state
) {
77 AdaptiveIntBuilder builder
;
79 for (int i
= 0; i
< kRounds
; i
++) {
80 ABORT_NOT_OK(builder
.AppendValues(kData
.data(), kData
.size(), nullptr));
83 std::shared_ptr
<Array
> out
;
84 ABORT_NOT_OK(builder
.Finish(&out
));
87 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
90 static void BuildAdaptiveIntNoNullsScalarAppend(
91 benchmark::State
& state
) { // NOLINT non-const reference
92 for (auto _
: state
) {
93 AdaptiveIntBuilder builder
;
95 for (int i
= 0; i
< kRounds
; i
++) {
96 for (size_t j
= 0; j
< kData
.size(); j
++) {
97 ABORT_NOT_OK(builder
.Append(kData
[i
]))
101 std::shared_ptr
<Array
> out
;
102 ABORT_NOT_OK(builder
.Finish(&out
));
105 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
108 static void BuildBooleanArrayNoNulls(
109 benchmark::State
& state
) { // NOLINT non-const reference
111 size_t n_bytes
= kBytesProcessPerRound
;
112 const uint8_t* data
= reinterpret_cast<const uint8_t*>(kData
.data());
114 for (auto _
: state
) {
115 BooleanBuilder builder
;
117 for (int i
= 0; i
< kRounds
; i
++) {
118 ABORT_NOT_OK(builder
.AppendValues(data
, n_bytes
));
121 std::shared_ptr
<Array
> out
;
122 ABORT_NOT_OK(builder
.Finish(&out
));
125 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
128 static void BuildBinaryArray(benchmark::State
& state
) { // NOLINT non-const reference
129 for (auto _
: state
) {
130 BinaryBuilder builder
;
132 for (int64_t i
= 0; i
< kRounds
* kNumberOfElements
; i
++) {
133 ABORT_NOT_OK(builder
.Append(kBinaryView
));
136 std::shared_ptr
<Array
> out
;
137 ABORT_NOT_OK(builder
.Finish(&out
));
140 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
143 static void BuildChunkedBinaryArray(
144 benchmark::State
& state
) { // NOLINT non-const reference
146 const int32_t kChunkSize
= 1 << 20;
148 for (auto _
: state
) {
149 internal::ChunkedBinaryBuilder
builder(kChunkSize
);
151 for (int64_t i
= 0; i
< kRounds
* kNumberOfElements
; i
++) {
152 ABORT_NOT_OK(builder
.Append(kBinaryView
));
156 ABORT_NOT_OK(builder
.Finish(&out
));
159 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
162 static void BuildFixedSizeBinaryArray(
163 benchmark::State
& state
) { // NOLINT non-const reference
164 auto type
= fixed_size_binary(static_cast<int32_t>(kBinaryView
.size()));
166 for (auto _
: state
) {
167 FixedSizeBinaryBuilder
builder(type
);
169 for (int64_t i
= 0; i
< kRounds
* kNumberOfElements
; i
++) {
170 ABORT_NOT_OK(builder
.Append(kBinaryView
));
173 std::shared_ptr
<Array
> out
;
174 ABORT_NOT_OK(builder
.Finish(&out
));
177 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
180 static void BuildDecimalArray(benchmark::State
& state
) { // NOLINT non-const reference
181 auto type
= decimal(10, 5);
183 int32_t precision
= 0;
185 ABORT_NOT_OK(Decimal128::FromString("1234.1234", &value
, &precision
, &scale
));
186 for (auto _
: state
) {
187 Decimal128Builder
builder(type
);
189 for (int64_t i
= 0; i
< kRounds
* kNumberOfElements
; i
++) {
190 ABORT_NOT_OK(builder
.Append(value
));
193 std::shared_ptr
<Array
> out
;
194 ABORT_NOT_OK(builder
.Finish(&out
));
197 state
.SetBytesProcessed(state
.iterations() * kRounds
* kNumberOfElements
* 16);
200 // ----------------------------------------------------------------------
201 // DictionaryBuilder benchmarks
203 size_t kDistinctElements
= kNumberOfElements
/ 100;
205 // Testing with different distributions of integer values helps stress
206 // the hash table's robustness.
208 // Make a vector out of `n_distinct` sequential int values
209 template <class Integer
= ValueType
>
210 static std::vector
<Integer
> MakeSequentialIntDictFodder() {
211 std::default_random_engine
gen(42);
212 std::vector
<Integer
> values(kNumberOfElements
);
214 std::uniform_int_distribution
<Integer
> values_dist(0, kDistinctElements
- 1);
215 std::generate(values
.begin(), values
.end(), [&]() { return values_dist(gen
); });
220 // Make a vector out of `n_distinct` int values with potentially colliding hash
221 // entries as only their highest bits differ.
222 template <class Integer
= ValueType
>
223 static std::vector
<Integer
> MakeSimilarIntDictFodder() {
224 std::default_random_engine
gen(42);
225 std::vector
<Integer
> values(kNumberOfElements
);
227 std::uniform_int_distribution
<Integer
> values_dist(0, kDistinctElements
- 1);
228 auto max_int
= std::numeric_limits
<Integer
>::max();
230 static_cast<Integer
>(BitUtil::NextPower2(max_int
/ kDistinctElements
/ 2));
231 std::generate(values
.begin(), values
.end(),
232 [&]() { return multiplier
* values_dist(gen
); });
237 // Make a vector out of `n_distinct` random int values
238 template <class Integer
= ValueType
>
239 static std::vector
<Integer
> MakeRandomIntDictFodder() {
240 std::default_random_engine
gen(42);
241 std::vector
<Integer
> values_dict(kDistinctElements
);
242 std::vector
<Integer
> values(kNumberOfElements
);
245 std::uniform_int_distribution
<Integer
> values_dist(
246 0, std::numeric_limits
<Integer
>::max());
247 std::generate(values_dict
.begin(), values_dict
.end(),
248 [&]() { return static_cast<Integer
>(values_dist(gen
)); });
251 std::uniform_int_distribution
<int32_t> indices_dist(
252 0, static_cast<int32_t>(kDistinctElements
- 1));
253 std::generate(values
.begin(), values
.end(),
254 [&]() { return values_dict
[indices_dist(gen
)]; });
259 // Make a vector out of `kDistinctElements` string values
260 static std::vector
<std::string
> MakeStringDictFodder() {
261 std::default_random_engine
gen(42);
262 std::vector
<std::string
> values_dict(kDistinctElements
);
263 std::vector
<std::string
> values(kNumberOfElements
);
266 auto it
= values_dict
.begin();
269 // Add a few similar strings
273 // Add random strings
274 std::uniform_int_distribution
<int32_t> length_dist(2, 20);
275 std::independent_bits_engine
<std::default_random_engine
, 8, uint16_t> bytes_gen(42);
277 std::generate(it
, values_dict
.end(), [&] {
278 auto length
= length_dist(gen
);
279 std::string
s(length
, 'X');
280 for (int32_t i
= 0; i
< length
; ++i
) {
281 s
[i
] = static_cast<char>(bytes_gen());
287 std::uniform_int_distribution
<int32_t> indices_dist(
288 0, static_cast<int32_t>(kDistinctElements
- 1));
289 std::generate(values
.begin(), values
.end(),
290 [&] { return values_dict
[indices_dist(gen
)]; });
295 template <class DictionaryBuilderType
, class Scalar
>
296 static void BenchmarkDictionaryArray(
297 benchmark::State
& state
, // NOLINT non-const reference
298 const std::vector
<Scalar
>& fodder
, size_t fodder_nbytes
= 0) {
299 for (auto _
: state
) {
300 DictionaryBuilderType
builder(default_memory_pool());
302 for (int64_t i
= 0; i
< kRounds
; i
++) {
303 for (const auto& value
: fodder
) {
304 ABORT_NOT_OK(builder
.Append(value
));
308 std::shared_ptr
<Array
> out
;
309 ABORT_NOT_OK(builder
.Finish(&out
));
312 if (fodder_nbytes
== 0) {
313 fodder_nbytes
= fodder
.size() * sizeof(Scalar
);
315 state
.SetBytesProcessed(state
.iterations() * fodder_nbytes
* kRounds
);
318 static void BuildInt64DictionaryArrayRandom(
319 benchmark::State
& state
) { // NOLINT non-const reference
320 const auto fodder
= MakeRandomIntDictFodder();
321 BenchmarkDictionaryArray
<DictionaryBuilder
<Int64Type
>>(state
, fodder
);
324 static void BuildInt64DictionaryArraySequential(
325 benchmark::State
& state
) { // NOLINT non-const reference
326 const auto fodder
= MakeSequentialIntDictFodder();
327 BenchmarkDictionaryArray
<DictionaryBuilder
<Int64Type
>>(state
, fodder
);
330 static void BuildInt64DictionaryArraySimilar(
331 benchmark::State
& state
) { // NOLINT non-const reference
332 const auto fodder
= MakeSimilarIntDictFodder();
333 BenchmarkDictionaryArray
<DictionaryBuilder
<Int64Type
>>(state
, fodder
);
336 static void BuildStringDictionaryArray(
337 benchmark::State
& state
) { // NOLINT non-const reference
338 const auto fodder
= MakeStringDictFodder();
340 std::accumulate(fodder
.begin(), fodder
.end(), 0ULL,
341 [&](size_t acc
, const std::string
& s
) { return acc
+ s
.size(); });
342 BenchmarkDictionaryArray
<BinaryDictionaryBuilder
>(state
, fodder
, fodder_nbytes
);
345 static void ArrayDataConstructDestruct(
346 benchmark::State
& state
) { // NOLINT non-const reference
347 std::vector
<std::shared_ptr
<ArrayData
>> arrays
;
349 const int kNumArrays
= 1000;
350 auto InitArrays
= [&]() {
351 for (int i
= 0; i
< kNumArrays
; ++i
) {
352 arrays
.emplace_back(new ArrayData
);
356 for (auto _
: state
) {
362 // ----------------------------------------------------------------------
363 // BufferBuilder benchmarks
365 static void BenchmarkBufferBuilder(
366 const std::string
& datum
,
367 benchmark::State
& state
) { // NOLINT non-const reference
368 const void* raw_data
= datum
.data();
369 int64_t raw_nbytes
= static_cast<int64_t>(datum
.size());
370 // Write approx. 256 MB to BufferBuilder
371 int64_t num_raw_values
= (1 << 28) / raw_nbytes
;
372 for (auto _
: state
) {
373 BufferBuilder builder
;
374 std::shared_ptr
<Buffer
> buf
;
375 for (int64_t i
= 0; i
< num_raw_values
; ++i
) {
376 ABORT_NOT_OK(builder
.Append(raw_data
, raw_nbytes
));
378 ABORT_NOT_OK(builder
.Finish(&buf
));
380 state
.SetBytesProcessed(int64_t(state
.iterations()) * num_raw_values
* raw_nbytes
);
383 static void BufferBuilderTinyWrites(
384 benchmark::State
& state
) { // NOLINT non-const reference
386 return BenchmarkBufferBuilder("abdefghi", state
);
389 static void BufferBuilderSmallWrites(
390 benchmark::State
& state
) { // NOLINT non-const reference
393 for (int i
= 0; i
< 100; ++i
) {
396 return BenchmarkBufferBuilder(datum
, state
);
399 static void BufferBuilderLargeWrites(
400 benchmark::State
& state
) { // NOLINT non-const reference
402 std::string
datum(1500000, 'x');
403 return BenchmarkBufferBuilder(datum
, state
);
406 BENCHMARK(BufferBuilderTinyWrites
)->UseRealTime();
407 BENCHMARK(BufferBuilderSmallWrites
)->UseRealTime();
408 BENCHMARK(BufferBuilderLargeWrites
)->UseRealTime();
410 // ----------------------------------------------------------------------
411 // Benchmark declarations
414 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
416 // This benchmarks acts as a reference to the native std::vector
417 // implementation. It appends kRounds chunks into a vector.
418 static void ReferenceBuildVectorNoNulls(
419 benchmark::State
& state
) { // NOLINT non-const reference
420 for (auto _
: state
) {
421 std::vector
<int64_t> builder
;
423 for (int i
= 0; i
< kRounds
; i
++) {
424 builder
.insert(builder
.end(), kData
.cbegin(), kData
.cend());
428 state
.SetBytesProcessed(state
.iterations() * kBytesProcessed
);
431 BENCHMARK(ReferenceBuildVectorNoNulls
);
435 BENCHMARK(BuildBooleanArrayNoNulls
);
437 BENCHMARK(BuildIntArrayNoNulls
);
438 BENCHMARK(BuildAdaptiveIntNoNulls
);
439 BENCHMARK(BuildAdaptiveIntNoNullsScalarAppend
);
441 BENCHMARK(BuildBinaryArray
);
442 BENCHMARK(BuildChunkedBinaryArray
);
443 BENCHMARK(BuildFixedSizeBinaryArray
);
444 BENCHMARK(BuildDecimalArray
);
446 BENCHMARK(BuildInt64DictionaryArrayRandom
);
447 BENCHMARK(BuildInt64DictionaryArraySequential
);
448 BENCHMARK(BuildInt64DictionaryArraySimilar
);
449 BENCHMARK(BuildStringDictionaryArray
);
451 BENCHMARK(ArrayDataConstructDestruct
);