]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include <arrow/api.h> | |
19 | #include <arrow/compute/api_aggregate.h> | |
20 | #include <arrow/csv/api.h> | |
21 | #include <arrow/csv/writer.h> | |
22 | #include <arrow/io/api.h> | |
23 | #include <arrow/result.h> | |
24 | #include <arrow/status.h> | |
25 | ||
26 | #include <iostream> | |
27 | #include <vector> | |
28 | ||
29 | // Many operations in Apache Arrow operate on | |
30 | // columns of data, and the columns of data are | |
31 | // assembled into a table. In this example, we | |
32 | // examine how to compare two arrays which are | |
33 | // combined to form a table that is then written | |
34 | // out to a CSV file. | |
35 | // | |
36 | // To run this example you can use | |
37 | // ./compute_and_write_csv_example | |
38 | // | |
39 | // the program will write the files into | |
40 | // compute_and_write_output.csv | |
41 | // in the current directory | |
42 | ||
43 | arrow::Status RunMain(int argc, char** argv) { | |
44 | // Make Arrays | |
45 | arrow::NumericBuilder<arrow::Int64Type> int64_builder; | |
46 | arrow::BooleanBuilder boolean_builder; | |
47 | ||
48 | // Make place for 8 values in total | |
49 | ARROW_RETURN_NOT_OK(int64_builder.Resize(8)); | |
50 | ARROW_RETURN_NOT_OK(boolean_builder.Resize(8)); | |
51 | ||
52 | // Bulk append the given values | |
53 | std::vector<int64_t> int64_values = {1, 2, 3, 4, 5, 6, 7, 8}; | |
54 | ARROW_RETURN_NOT_OK(int64_builder.AppendValues(int64_values)); | |
55 | std::shared_ptr<arrow::Array> array_a; | |
56 | ARROW_RETURN_NOT_OK(int64_builder.Finish(&array_a)); | |
57 | int64_builder.Reset(); | |
58 | int64_values = {2, 5, 1, 3, 6, 2, 7, 4}; | |
59 | std::shared_ptr<arrow::Array> array_b; | |
60 | ARROW_RETURN_NOT_OK(int64_builder.AppendValues(int64_values)); | |
61 | ARROW_RETURN_NOT_OK(int64_builder.Finish(&array_b)); | |
62 | ||
63 | // Cast the arrays to their actual types | |
64 | auto int64_array_a = std::static_pointer_cast<arrow::Int64Array>(array_a); | |
65 | auto int64_array_b = std::static_pointer_cast<arrow::Int64Array>(array_b); | |
66 | // Explicit comparison of values using a loop | |
67 | for (int64_t i = 0; i < 8; i++) { | |
68 | if ((!int64_array_a->IsNull(i)) && (!int64_array_b->IsNull(i))) { | |
69 | bool comparison_result = int64_array_a->Value(i) > int64_array_b->Value(i); | |
70 | boolean_builder.UnsafeAppend(comparison_result); | |
71 | } else { | |
72 | boolean_builder.UnsafeAppendNull(); | |
73 | } | |
74 | } | |
75 | std::shared_ptr<arrow::Array> array_a_gt_b_self; | |
76 | ARROW_RETURN_NOT_OK(boolean_builder.Finish(&array_a_gt_b_self)); | |
77 | std::cout << "Array explicitly compared" << std::endl; | |
78 | ||
79 | // Explicit comparison of values using a compute function | |
80 | ARROW_ASSIGN_OR_RAISE(arrow::Datum compared_datum, | |
81 | arrow::compute::CallFunction("greater", {array_a, array_b})); | |
82 | auto array_a_gt_b_compute = compared_datum.make_array(); | |
83 | std::cout << "Arrays compared using a compute function" << std::endl; | |
84 | ||
85 | // Create a table for the output | |
86 | auto schema = | |
87 | arrow::schema({arrow::field("a", arrow::int64()), arrow::field("b", arrow::int64()), | |
88 | arrow::field("a>b? (self written)", arrow::boolean()), | |
89 | arrow::field("a>b? (arrow)", arrow::boolean())}); | |
90 | std::shared_ptr<arrow::Table> my_table = arrow::Table::Make( | |
91 | schema, {array_a, array_b, array_a_gt_b_self, array_a_gt_b_compute}); | |
92 | ||
93 | std::cout << "Table created" << std::endl; | |
94 | ||
95 | // Write table to CSV file | |
96 | auto csv_filename = "compute_and_write_output.csv"; | |
97 | ARROW_ASSIGN_OR_RAISE(auto outstream, arrow::io::FileOutputStream::Open(csv_filename)); | |
98 | ||
99 | std::cout << "Writing CSV file" << std::endl; | |
100 | ARROW_RETURN_NOT_OK(arrow::csv::WriteCSV( | |
101 | *my_table, arrow::csv::WriteOptions::Defaults(), outstream.get())); | |
102 | ||
103 | return arrow::Status::OK(); | |
104 | } | |
105 | ||
106 | int main(int argc, char** argv) { | |
107 | arrow::Status status = RunMain(argc, argv); | |
108 | if (!status.ok()) { | |
109 | std::cerr << status << std::endl; | |
110 | return EXIT_FAILURE; | |
111 | } | |
112 | return EXIT_SUCCESS; | |
113 | } |