1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
18 #include "benchmark/benchmark.h"
24 #include "parquet/arrow/reader.h"
25 #include "parquet/arrow/writer.h"
26 #include "parquet/column_reader.h"
27 #include "parquet/column_writer.h"
28 #include "parquet/file_reader.h"
29 #include "parquet/file_writer.h"
30 #include "parquet/platform.h"
32 #include "arrow/array.h"
33 #include "arrow/array/builder_primitive.h"
34 #include "arrow/io/memory.h"
35 #include "arrow/table.h"
36 #include "arrow/testing/gtest_util.h"
37 #include "arrow/testing/random.h"
38 #include "arrow/util/async_generator.h"
39 #include "arrow/util/bitmap_ops.h"
40 #include "arrow/util/logging.h"
43 using arrow::ArrayVector
;
44 using arrow::BooleanBuilder
;
45 using arrow::FieldVector
;
46 using arrow::NumericBuilder
;
48 #define EXIT_NOT_OK(s) \
50 ::arrow::Status _s = (s); \
51 if (ARROW_PREDICT_FALSE(!_s.ok())) { \
52 std::cout << "Exiting: " << _s.ToString() << std::endl; \
59 using arrow::FileReader
;
60 using arrow::WriteTable
;
61 using schema::PrimitiveNode
;
65 // This should result in multiple pages for most primitive types
66 constexpr int64_t BENCHMARK_SIZE
= 10 * 1024 * 1024;
68 template <typename ParquetType
>
69 struct benchmark_traits
{};
72 struct benchmark_traits
<Int32Type
> {
73 using arrow_type
= ::arrow::Int32Type
;
77 struct benchmark_traits
<Int64Type
> {
78 using arrow_type
= ::arrow::Int64Type
;
82 struct benchmark_traits
<DoubleType
> {
83 using arrow_type
= ::arrow::DoubleType
;
87 struct benchmark_traits
<BooleanType
> {
88 using arrow_type
= ::arrow::BooleanType
;
91 template <typename ParquetType
>
92 using ArrowType
= typename benchmark_traits
<ParquetType
>::arrow_type
;
94 template <typename ParquetType
>
95 std::shared_ptr
<ColumnDescriptor
> MakeSchema(Repetition::type repetition
) {
96 auto node
= PrimitiveNode::Make("int64", repetition
, ParquetType::type_num
);
97 return std::make_shared
<ColumnDescriptor
>(node
, repetition
!= Repetition::REQUIRED
,
98 repetition
== Repetition::REPEATED
);
101 template <bool nullable
, typename ParquetType
>
102 void SetBytesProcessed(::benchmark::State
& state
, int64_t num_values
= BENCHMARK_SIZE
) {
103 const int64_t items_processed
= state
.iterations() * num_values
;
104 const int64_t bytes_processed
= items_processed
* sizeof(typename
ParquetType::c_type
);
106 state
.SetItemsProcessed(bytes_processed
);
107 state
.SetBytesProcessed(bytes_processed
);
110 constexpr int64_t kAlternatingOrNa
= -1;
112 template <typename T
>
113 std::vector
<T
> RandomVector(int64_t true_percentage
, int64_t vector_size
,
114 const std::array
<T
, 2>& sample_values
, int seed
= 500) {
115 std::vector
<T
> values(vector_size
, {});
116 if (true_percentage
== kAlternatingOrNa
) {
118 std::generate(values
.begin(), values
.end(), [&n
] { return n
++ % 2; });
120 std::default_random_engine
rng(seed
);
121 double true_probability
= static_cast<double>(true_percentage
) / 100.0;
122 std::bernoulli_distribution
dist(true_probability
);
123 std::generate(values
.begin(), values
.end(), [&] { return sample_values
[dist(rng
)]; });
128 template <typename ParquetType
>
129 std::shared_ptr
<::arrow::Table
> TableFromVector(
130 const std::vector
<typename
ParquetType::c_type
>& vec
, bool nullable
,
131 int64_t null_percentage
= kAlternatingOrNa
) {
133 ARROW_CHECK_EQ(null_percentage
, kAlternatingOrNa
);
135 std::shared_ptr
<::arrow::DataType
> type
= std::make_shared
<ArrowType
<ParquetType
>>();
136 NumericBuilder
<ArrowType
<ParquetType
>> builder
;
138 // Note true values select index 1 of sample_values
139 auto valid_bytes
= RandomVector
<uint8_t>(/*true_percentage=*/null_percentage
,
140 vec
.size(), /*sample_values=*/{1, 0});
141 EXIT_NOT_OK(builder
.AppendValues(vec
.data(), vec
.size(), valid_bytes
.data()));
143 EXIT_NOT_OK(builder
.AppendValues(vec
.data(), vec
.size(), nullptr));
145 std::shared_ptr
<::arrow::Array
> array
;
146 EXIT_NOT_OK(builder
.Finish(&array
));
148 auto field
= ::arrow::field("column", type
, nullable
);
149 auto schema
= ::arrow::schema({field
});
150 return ::arrow::Table::Make(schema
, {array
});
154 std::shared_ptr
<::arrow::Table
> TableFromVector
<BooleanType
>(const std::vector
<bool>& vec
,
156 int64_t null_percentage
) {
157 BooleanBuilder builder
;
159 auto valid_bytes
= RandomVector
<bool>(/*true_percentage=*/null_percentage
, vec
.size(),
161 EXIT_NOT_OK(builder
.AppendValues(vec
, valid_bytes
));
163 EXIT_NOT_OK(builder
.AppendValues(vec
));
165 std::shared_ptr
<::arrow::Array
> array
;
166 EXIT_NOT_OK(builder
.Finish(&array
));
168 auto field
= ::arrow::field("column", ::arrow::boolean(), nullable
);
169 auto schema
= std::make_shared
<::arrow::Schema
>(
170 std::vector
<std::shared_ptr
<::arrow::Field
>>({field
}));
171 return ::arrow::Table::Make(schema
, {array
});
174 template <bool nullable
, typename ParquetType
>
175 static void BM_WriteColumn(::benchmark::State
& state
) {
176 using T
= typename
ParquetType::c_type
;
177 std::vector
<T
> values(BENCHMARK_SIZE
, static_cast<T
>(128));
178 std::shared_ptr
<::arrow::Table
> table
= TableFromVector
<ParquetType
>(values
, nullable
);
180 while (state
.KeepRunning()) {
181 auto output
= CreateOutputStream();
183 WriteTable(*table
, ::arrow::default_memory_pool(), output
, BENCHMARK_SIZE
));
185 SetBytesProcessed
<nullable
, ParquetType
>(state
);
188 BENCHMARK_TEMPLATE2(BM_WriteColumn
, false, Int32Type
);
189 BENCHMARK_TEMPLATE2(BM_WriteColumn
, true, Int32Type
);
191 BENCHMARK_TEMPLATE2(BM_WriteColumn
, false, Int64Type
);
192 BENCHMARK_TEMPLATE2(BM_WriteColumn
, true, Int64Type
);
194 BENCHMARK_TEMPLATE2(BM_WriteColumn
, false, DoubleType
);
195 BENCHMARK_TEMPLATE2(BM_WriteColumn
, true, DoubleType
);
197 BENCHMARK_TEMPLATE2(BM_WriteColumn
, false, BooleanType
);
198 BENCHMARK_TEMPLATE2(BM_WriteColumn
, true, BooleanType
);
200 template <typename T
>
202 static constexpr std::array
<T
, 2> values() { return {127, 128}; }
206 struct Examples
<bool> {
207 static constexpr std::array
<bool, 2> values() { return {false, true}; }
210 static void BenchmarkReadTable(::benchmark::State
& state
, const ::arrow::Table
& table
,
211 int64_t num_values
= -1, int64_t bytes_per_value
= -1) {
212 auto output
= CreateOutputStream();
214 WriteTable(table
, ::arrow::default_memory_pool(), output
, table
.num_rows()));
215 PARQUET_ASSIGN_OR_THROW(auto buffer
, output
->Finish());
217 while (state
.KeepRunning()) {
219 ParquetFileReader::Open(std::make_shared
<::arrow::io::BufferReader
>(buffer
));
220 std::unique_ptr
<FileReader
> arrow_reader
;
221 EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader
),
223 std::shared_ptr
<::arrow::Table
> table
;
224 EXIT_NOT_OK(arrow_reader
->ReadTable(&table
));
227 if (num_values
== -1) {
228 num_values
= table
.num_rows();
230 state
.SetItemsProcessed(num_values
* state
.iterations());
231 if (bytes_per_value
!= -1) {
232 state
.SetBytesProcessed(num_values
* state
.iterations() * bytes_per_value
);
236 static void BenchmarkReadArray(::benchmark::State
& state
,
237 const std::shared_ptr
<Array
>& array
, bool nullable
,
238 int64_t num_values
= -1, int64_t bytes_per_value
= -1) {
239 auto schema
= ::arrow::schema({field("s", array
->type(), nullable
)});
240 auto table
= ::arrow::Table::Make(schema
, {array
}, array
->length());
242 EXIT_NOT_OK(table
->Validate());
244 BenchmarkReadTable(state
, *table
, num_values
, bytes_per_value
);
248 // Benchmark reading a primitive column
251 template <bool nullable
, typename ParquetType
>
252 static void BM_ReadColumn(::benchmark::State
& state
) {
253 using T
= typename
ParquetType::c_type
;
255 auto values
= RandomVector
<T
>(/*percentage=*/state
.range(1), BENCHMARK_SIZE
,
256 Examples
<T
>::values());
258 std::shared_ptr
<::arrow::Table
> table
=
259 TableFromVector
<ParquetType
>(values
, nullable
, state
.range(0));
261 BenchmarkReadTable(state
, *table
, table
->num_rows(),
262 sizeof(typename
ParquetType::c_type
));
265 // There are two parameters here that cover different data distributions.
266 // null_percentage governs distribution and therefore runs of null values.
267 // first_value_percentage governs distribution of values (we select from 1 of 2)
268 // so when 0 or 100 RLE is triggered all the time. When a value in the range (0, 100)
269 // there will be some percentage of RLE encoded values and some percentage of literal
270 // encoded values (RLE is much less likely with percentages close to 50).
271 BENCHMARK_TEMPLATE2(BM_ReadColumn
, false, Int32Type
)
272 ->Args({/*null_percentage=*/kAlternatingOrNa
, 1})
273 ->Args({/*null_percentage=*/kAlternatingOrNa
, 10})
274 ->Args({/*null_percentage=*/kAlternatingOrNa
, 50});
276 BENCHMARK_TEMPLATE2(BM_ReadColumn
, true, Int32Type
)
277 ->Args({/*null_percentage=*/kAlternatingOrNa
, /*first_value_percentage=*/0})
278 ->Args({/*null_percentage=*/1, /*first_value_percentage=*/1})
279 ->Args({/*null_percentage=*/10, /*first_value_percentage=*/10})
280 ->Args({/*null_percentage=*/25, /*first_value_percentage=*/5})
281 ->Args({/*null_percentage=*/50, /*first_value_percentage=*/50})
282 ->Args({/*null_percentage=*/50, /*first_value_percentage=*/0})
283 ->Args({/*null_percentage=*/99, /*first_value_percentage=*/50})
284 ->Args({/*null_percentage=*/99, /*first_value_percentage=*/0});
286 BENCHMARK_TEMPLATE2(BM_ReadColumn
, false, Int64Type
)
287 ->Args({/*null_percentage=*/kAlternatingOrNa
, 1})
288 ->Args({/*null_percentage=*/kAlternatingOrNa
, 10})
289 ->Args({/*null_percentage=*/kAlternatingOrNa
, 50});
290 BENCHMARK_TEMPLATE2(BM_ReadColumn
, true, Int64Type
)
291 ->Args({/*null_percentage=*/kAlternatingOrNa
, /*first_value_percentage=*/0})
292 ->Args({/*null_percentage=*/1, /*first_value_percentage=*/1})
293 ->Args({/*null_percentage=*/5, /*first_value_percentage=*/5})
294 ->Args({/*null_percentage=*/10, /*first_value_percentage=*/5})
295 ->Args({/*null_percentage=*/25, /*first_value_percentage=*/10})
296 ->Args({/*null_percentage=*/30, /*first_value_percentage=*/10})
297 ->Args({/*null_percentage=*/35, /*first_value_percentage=*/10})
298 ->Args({/*null_percentage=*/45, /*first_value_percentage=*/25})
299 ->Args({/*null_percentage=*/50, /*first_value_percentage=*/50})
300 ->Args({/*null_percentage=*/50, /*first_value_percentage=*/1})
301 ->Args({/*null_percentage=*/75, /*first_value_percentage=*/1})
302 ->Args({/*null_percentage=*/99, /*first_value_percentage=*/50})
303 ->Args({/*null_percentage=*/99, /*first_value_percentage=*/0});
305 BENCHMARK_TEMPLATE2(BM_ReadColumn
, false, DoubleType
)
306 ->Args({kAlternatingOrNa
, 0})
307 ->Args({kAlternatingOrNa
, 20});
308 // Less coverage because int64_t should be pretty good representation for nullability and
310 BENCHMARK_TEMPLATE2(BM_ReadColumn
, true, DoubleType
)
311 ->Args({/*null_percentage=*/kAlternatingOrNa
, /*first_value_percentage=*/0})
312 ->Args({/*null_percentage=*/10, /*first_value_percentage=*/50})
313 ->Args({/*null_percentage=*/25, /*first_value_percentage=*/25});
315 BENCHMARK_TEMPLATE2(BM_ReadColumn
, false, BooleanType
)
316 ->Args({kAlternatingOrNa
, 0})
318 BENCHMARK_TEMPLATE2(BM_ReadColumn
, true, BooleanType
)
319 ->Args({kAlternatingOrNa
, 1})
323 // Benchmark reading a nested column
326 const std::vector
<int64_t> kNestedNullPercents
= {0, 1, 50, 99};
328 // XXX We can use ArgsProduct() starting from Benchmark 1.5.2
329 static void NestedReadArguments(::benchmark::internal::Benchmark
* b
) {
330 for (const auto null_percentage
: kNestedNullPercents
) {
331 b
->Arg(null_percentage
);
335 static std::shared_ptr
<Array
> MakeStructArray(::arrow::random::RandomArrayGenerator
* rng
,
336 const ArrayVector
& children
,
337 double null_probability
,
338 bool propagate_validity
= false) {
339 ARROW_CHECK_GT(children
.size(), 0);
340 const int64_t length
= children
[0]->length();
342 std::shared_ptr
<::arrow::Buffer
> null_bitmap
;
343 if (null_probability
> 0.0) {
344 null_bitmap
= rng
->NullBitmap(length
, null_probability
);
345 if (propagate_validity
) {
346 // HACK: the Parquet writer currently doesn't allow non-empty list
347 // entries where a parent node is null (for instance, a struct-of-list
348 // where the outer struct is marked null but the inner list value is
350 for (const auto& child
: children
) {
351 null_bitmap
= *::arrow::internal::BitmapOr(
352 ::arrow::default_memory_pool(), null_bitmap
->data(), 0,
353 child
->null_bitmap_data(), 0, length
, 0);
357 FieldVector
fields(children
.size());
358 char field_name
= 'a';
359 for (size_t i
= 0; i
< children
.size(); ++i
) {
360 fields
[i
] = field(std::string
{field_name
++}, children
[i
]->type(),
361 /*nullable=*/null_probability
> 0.0);
363 return *::arrow::StructArray::Make(children
, std::move(fields
), null_bitmap
);
366 // Make a (int32, int64) struct array
367 static std::shared_ptr
<Array
> MakeStructArray(::arrow::random::RandomArrayGenerator
* rng
,
368 int64_t size
, double null_probability
) {
369 auto values1
= rng
->Int32(size
, -5, 5, null_probability
);
370 auto values2
= rng
->Int64(size
, -12345678912345LL, 12345678912345LL, null_probability
);
371 return MakeStructArray(rng
, {values1
, values2
}, null_probability
);
374 static void BM_ReadStructColumn(::benchmark::State
& state
) {
375 constexpr int64_t kNumValues
= BENCHMARK_SIZE
/ 10;
376 const double null_probability
= static_cast<double>(state
.range(0)) / 100.0;
377 const bool nullable
= (null_probability
!= 0.0);
379 ARROW_CHECK_GE(null_probability
, 0.0);
381 const int64_t kBytesPerValue
= sizeof(int32_t) + sizeof(int64_t);
383 ::arrow::random::RandomArrayGenerator
rng(42);
384 auto array
= MakeStructArray(&rng
, kNumValues
, null_probability
);
386 BenchmarkReadArray(state
, array
, nullable
, kNumValues
, kBytesPerValue
);
389 BENCHMARK(BM_ReadStructColumn
)->Apply(NestedReadArguments
);
391 static void BM_ReadStructOfStructColumn(::benchmark::State
& state
) {
392 constexpr int64_t kNumValues
= BENCHMARK_SIZE
/ 10;
393 const double null_probability
= static_cast<double>(state
.range(0)) / 100.0;
394 const bool nullable
= (null_probability
!= 0.0);
396 ARROW_CHECK_GE(null_probability
, 0.0);
398 const int64_t kBytesPerValue
= 2 * (sizeof(int32_t) + sizeof(int64_t));
400 ::arrow::random::RandomArrayGenerator
rng(42);
401 auto values1
= MakeStructArray(&rng
, kNumValues
, null_probability
);
402 auto values2
= MakeStructArray(&rng
, kNumValues
, null_probability
);
403 auto array
= MakeStructArray(&rng
, {values1
, values2
}, null_probability
);
405 BenchmarkReadArray(state
, array
, nullable
, kNumValues
, kBytesPerValue
);
408 BENCHMARK(BM_ReadStructOfStructColumn
)->Apply(NestedReadArguments
);
410 static void BM_ReadStructOfListColumn(::benchmark::State
& state
) {
411 constexpr int64_t kNumValues
= BENCHMARK_SIZE
/ 10;
412 const double null_probability
= static_cast<double>(state
.range(0)) / 100.0;
413 const bool nullable
= (null_probability
!= 0.0);
415 ARROW_CHECK_GE(null_probability
, 0.0);
417 ::arrow::random::RandomArrayGenerator
rng(42);
419 const int64_t kBytesPerValue
= sizeof(int32_t) + sizeof(int64_t);
421 auto values1
= rng
.Int32(kNumValues
, -5, 5, null_probability
);
423 rng
.Int64(kNumValues
, -12345678912345LL, 12345678912345LL, null_probability
);
424 auto list1
= rng
.List(*values1
, kNumValues
/ 10, null_probability
);
425 auto list2
= rng
.List(*values2
, kNumValues
/ 10, null_probability
);
426 auto array
= MakeStructArray(&rng
, {list1
, list2
}, null_probability
,
427 /*propagate_validity =*/true);
429 BenchmarkReadArray(state
, array
, nullable
, kNumValues
, kBytesPerValue
);
432 BENCHMARK(BM_ReadStructOfListColumn
)->Apply(NestedReadArguments
);
434 static void BM_ReadListColumn(::benchmark::State
& state
) {
435 constexpr int64_t kNumValues
= BENCHMARK_SIZE
/ 10;
436 const double null_probability
= static_cast<double>(state
.range(0)) / 100.0;
437 const bool nullable
= (null_probability
!= 0.0);
439 ARROW_CHECK_GE(null_probability
, 0.0);
441 ::arrow::random::RandomArrayGenerator
rng(42);
443 auto values
= rng
.Int64(kNumValues
, /*min=*/-5, /*max=*/5, null_probability
);
444 const int64_t kBytesPerValue
= sizeof(int64_t);
446 auto array
= rng
.List(*values
, kNumValues
/ 10, null_probability
);
448 BenchmarkReadArray(state
, array
, nullable
, kNumValues
, kBytesPerValue
);
451 BENCHMARK(BM_ReadListColumn
)->Apply(NestedReadArguments
);
453 static void BM_ReadListOfStructColumn(::benchmark::State
& state
) {
454 constexpr int64_t kNumValues
= BENCHMARK_SIZE
/ 10;
455 const double null_probability
= static_cast<double>(state
.range(0)) / 100.0;
456 const bool nullable
= (null_probability
!= 0.0);
458 ARROW_CHECK_GE(null_probability
, 0.0);
460 ::arrow::random::RandomArrayGenerator
rng(42);
462 auto values
= MakeStructArray(&rng
, kNumValues
, null_probability
);
463 const int64_t kBytesPerValue
= sizeof(int32_t) + sizeof(int64_t);
465 auto array
= rng
.List(*values
, kNumValues
/ 10, null_probability
);
467 BenchmarkReadArray(state
, array
, nullable
, kNumValues
, kBytesPerValue
);
470 BENCHMARK(BM_ReadListOfStructColumn
)->Apply(NestedReadArguments
);
472 static void BM_ReadListOfListColumn(::benchmark::State
& state
) {
473 constexpr int64_t kNumValues
= BENCHMARK_SIZE
/ 10;
474 const double null_probability
= static_cast<double>(state
.range(0)) / 100.0;
475 const bool nullable
= (null_probability
!= 0.0);
477 ARROW_CHECK_GE(null_probability
, 0.0);
479 ::arrow::random::RandomArrayGenerator
rng(42);
481 auto values
= rng
.Int64(kNumValues
, /*min=*/-5, /*max=*/5, null_probability
);
482 const int64_t kBytesPerValue
= sizeof(int64_t);
484 auto inner
= rng
.List(*values
, kNumValues
/ 10, null_probability
);
485 auto array
= rng
.List(*inner
, kNumValues
/ 100, null_probability
);
487 BenchmarkReadArray(state
, array
, nullable
, kNumValues
, kBytesPerValue
);
490 BENCHMARK(BM_ReadListOfListColumn
)->Apply(NestedReadArguments
);
493 // Benchmark different ways of reading select row groups
496 static void BM_ReadIndividualRowGroups(::benchmark::State
& state
) {
497 std::vector
<int64_t> values(BENCHMARK_SIZE
, 128);
498 std::shared_ptr
<::arrow::Table
> table
= TableFromVector
<Int64Type
>(values
, true);
499 auto output
= CreateOutputStream();
500 // This writes 10 RowGroups
502 WriteTable(*table
, ::arrow::default_memory_pool(), output
, BENCHMARK_SIZE
/ 10));
504 PARQUET_ASSIGN_OR_THROW(auto buffer
, output
->Finish());
506 while (state
.KeepRunning()) {
508 ParquetFileReader::Open(std::make_shared
<::arrow::io::BufferReader
>(buffer
));
509 std::unique_ptr
<FileReader
> arrow_reader
;
510 EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader
),
513 std::vector
<std::shared_ptr
<::arrow::Table
>> tables
;
514 for (int i
= 0; i
< arrow_reader
->num_row_groups(); i
++) {
515 // Only read the even numbered RowGroups
517 std::shared_ptr
<::arrow::Table
> table
;
518 EXIT_NOT_OK(arrow_reader
->RowGroup(i
)->ReadTable(&table
));
519 tables
.push_back(table
);
523 std::shared_ptr
<::arrow::Table
> final_table
;
524 PARQUET_ASSIGN_OR_THROW(final_table
, ConcatenateTables(tables
));
526 SetBytesProcessed
<true, Int64Type
>(state
);
529 BENCHMARK(BM_ReadIndividualRowGroups
);
531 static void BM_ReadMultipleRowGroups(::benchmark::State
& state
) {
532 std::vector
<int64_t> values(BENCHMARK_SIZE
, 128);
533 std::shared_ptr
<::arrow::Table
> table
= TableFromVector
<Int64Type
>(values
, true);
534 auto output
= CreateOutputStream();
535 // This writes 10 RowGroups
537 WriteTable(*table
, ::arrow::default_memory_pool(), output
, BENCHMARK_SIZE
/ 10));
538 PARQUET_ASSIGN_OR_THROW(auto buffer
, output
->Finish());
539 std::vector
<int> rgs
{0, 2, 4, 6, 8};
541 while (state
.KeepRunning()) {
543 ParquetFileReader::Open(std::make_shared
<::arrow::io::BufferReader
>(buffer
));
544 std::unique_ptr
<FileReader
> arrow_reader
;
545 EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader
),
547 std::shared_ptr
<::arrow::Table
> table
;
548 EXIT_NOT_OK(arrow_reader
->ReadRowGroups(rgs
, &table
));
550 SetBytesProcessed
<true, Int64Type
>(state
);
553 BENCHMARK(BM_ReadMultipleRowGroups
);
555 static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State
& state
) {
556 std::vector
<int64_t> values(BENCHMARK_SIZE
, 128);
557 std::shared_ptr
<::arrow::Table
> table
= TableFromVector
<Int64Type
>(values
, true);
558 auto output
= CreateOutputStream();
559 // This writes 10 RowGroups
561 WriteTable(*table
, ::arrow::default_memory_pool(), output
, BENCHMARK_SIZE
/ 10));
562 PARQUET_ASSIGN_OR_THROW(auto buffer
, output
->Finish());
563 std::vector
<int> rgs
{0, 2, 4, 6, 8};
565 while (state
.KeepRunning()) {
567 ParquetFileReader::Open(std::make_shared
<::arrow::io::BufferReader
>(buffer
));
568 std::unique_ptr
<FileReader
> unique_reader
;
569 EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader
),
571 std::shared_ptr
<FileReader
> arrow_reader
= std::move(unique_reader
);
572 ASSIGN_OR_ABORT(auto generator
,
573 arrow_reader
->GetRecordBatchGenerator(arrow_reader
, rgs
, {0}));
574 auto fut
= ::arrow::CollectAsyncGenerator(generator
);
575 ASSIGN_OR_ABORT(auto batches
, fut
.result());
576 ASSIGN_OR_ABORT(auto actual
, ::arrow::Table::FromRecordBatches(std::move(batches
)));
578 SetBytesProcessed
<true, Int64Type
>(state
);
581 BENCHMARK(BM_ReadMultipleRowGroupsGenerator
);
583 } // namespace benchmark
585 } // namespace parquet