]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include "benchmark/benchmark.h" | |
19 | ||
20 | #include "arrow/compute/exec/expression.h" | |
21 | #include "arrow/dataset/discovery.h" | |
22 | #include "arrow/dataset/file_base.h" | |
23 | #include "arrow/dataset/file_ipc.h" | |
24 | #include "arrow/dataset/partition.h" | |
25 | #include "arrow/filesystem/mockfs.h" | |
26 | #include "arrow/filesystem/test_util.h" | |
27 | #include "arrow/testing/gtest_util.h" | |
28 | #include "arrow/util/iterator.h" | |
29 | ||
30 | namespace arrow { | |
31 | namespace dataset { | |
32 | ||
33 | static std::shared_ptr<Dataset> GetDataset() { | |
34 | std::vector<fs::FileInfo> files; | |
35 | std::vector<std::string> paths; | |
36 | for (int a = 0; a < 100; a++) { | |
37 | for (int b = 0; b < 100; b++) { | |
38 | auto path = "a=" + std::to_string(a) + "/b=" + std::to_string(b) + "/data.feather"; | |
39 | files.push_back(fs::File(path)); | |
40 | paths.push_back(path); | |
41 | } | |
42 | } | |
43 | EXPECT_OK_AND_ASSIGN(auto fs, | |
44 | arrow::fs::internal::MockFileSystem::Make(fs::kNoTime, files)); | |
45 | auto format = std::make_shared<IpcFileFormat>(); | |
46 | FileSystemFactoryOptions options; | |
47 | options.partitioning = HivePartitioning::MakeFactory(); | |
48 | EXPECT_OK_AND_ASSIGN(auto factory, | |
49 | FileSystemDatasetFactory::Make(fs, paths, format, options)); | |
50 | FinishOptions finish_options; | |
51 | finish_options.inspect_options.fragments = 0; | |
52 | EXPECT_OK_AND_ASSIGN(auto dataset, factory->Finish(finish_options)); | |
53 | return dataset; | |
54 | } | |
55 | ||
56 | // A benchmark of filtering fragments in a dataset. | |
57 | static void GetAllFragments(benchmark::State& state) { | |
58 | auto dataset = GetDataset(); | |
59 | for (auto _ : state) { | |
60 | ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments()); | |
61 | ABORT_NOT_OK(fragments.Visit([](std::shared_ptr<Fragment>) { return Status::OK(); })); | |
62 | } | |
63 | } | |
64 | ||
65 | static void GetFilteredFragments(benchmark::State& state, compute::Expression filter) { | |
66 | auto dataset = GetDataset(); | |
67 | ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*dataset->schema())); | |
68 | for (auto _ : state) { | |
69 | ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments(filter)); | |
70 | ABORT_NOT_OK(fragments.Visit([](std::shared_ptr<Fragment>) { return Status::OK(); })); | |
71 | } | |
72 | } | |
73 | ||
74 | using compute::field_ref; | |
75 | using compute::literal; | |
76 | ||
77 | BENCHMARK(GetAllFragments); | |
78 | // Drill down to a subtree. | |
79 | BENCHMARK_CAPTURE(GetFilteredFragments, single_dir, equal(field_ref("a"), literal(90))); | |
80 | // Drill down, but not to a subtree. | |
81 | BENCHMARK_CAPTURE(GetFilteredFragments, multi_dir, equal(field_ref("b"), literal(90))); | |
82 | // Drill down to a single file. | |
83 | BENCHMARK_CAPTURE(GetFilteredFragments, single_file, | |
84 | and_(equal(field_ref("a"), literal(90)), | |
85 | equal(field_ref("b"), literal(90)))); | |
86 | // Apply a filter, but keep most of the files. | |
87 | BENCHMARK_CAPTURE(GetFilteredFragments, range, greater(field_ref("a"), literal(1))); | |
88 | ||
89 | } // namespace dataset | |
90 | } // namespace arrow |