]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <ctime> | |
19 | #include <iostream> | |
20 | #include <list> | |
21 | #include <memory> | |
22 | ||
23 | #include "parquet/api/reader.h" | |
24 | ||
25 | int main(int argc, char** argv) { | |
26 | if (argc > 4 || argc < 1) { | |
27 | std::cerr << "Usage: parquet-scan [--batch-size=] [--columns=...] <file>" | |
28 | << std::endl; | |
29 | return -1; | |
30 | } | |
31 | ||
32 | std::string filename; | |
33 | ||
34 | // Read command-line options | |
35 | int batch_size = 256; | |
36 | const std::string COLUMNS_PREFIX = "--columns="; | |
37 | const std::string BATCH_SIZE_PREFIX = "--batch-size="; | |
38 | std::vector<int> columns; | |
39 | int num_columns = 0; | |
40 | ||
41 | char *param, *value; | |
42 | for (int i = 1; i < argc; i++) { | |
43 | if ((param = std::strstr(argv[i], COLUMNS_PREFIX.c_str()))) { | |
44 | value = std::strtok(param + COLUMNS_PREFIX.length(), ","); | |
45 | while (value) { | |
46 | columns.push_back(std::atoi(value)); | |
47 | value = std::strtok(nullptr, ","); | |
48 | num_columns++; | |
49 | } | |
50 | } else if ((param = std::strstr(argv[i], BATCH_SIZE_PREFIX.c_str()))) { | |
51 | value = std::strtok(param + BATCH_SIZE_PREFIX.length(), " "); | |
52 | if (value) { | |
53 | batch_size = std::atoi(value); | |
54 | } | |
55 | } else { | |
56 | filename = argv[i]; | |
57 | } | |
58 | } | |
59 | ||
60 | try { | |
61 | double total_time; | |
62 | std::clock_t start_time = std::clock(); | |
63 | std::unique_ptr<parquet::ParquetFileReader> reader = | |
64 | parquet::ParquetFileReader::OpenFile(filename); | |
65 | ||
66 | int64_t total_rows = parquet::ScanFileContents(columns, batch_size, reader.get()); | |
67 | ||
68 | total_time = static_cast<double>(std::clock() - start_time) / | |
69 | static_cast<double>(CLOCKS_PER_SEC); | |
70 | std::cout << total_rows << " rows scanned in " << total_time << " seconds." | |
71 | << std::endl; | |
72 | } catch (const std::exception& e) { | |
73 | std::cerr << "Parquet error: " << e.what() << std::endl; | |
74 | return -1; | |
75 | } | |
76 | ||
77 | return 0; | |
78 | } |