]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/csv/writer_test.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / csv / writer_test.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "gtest/gtest.h"
19
20 #include <memory>
21 #include <vector>
22
23 #include "arrow/buffer.h"
24 #include "arrow/csv/writer.h"
25 #include "arrow/io/memory.h"
26 #include "arrow/ipc/writer.h"
27 #include "arrow/record_batch.h"
28 #include "arrow/result_internal.h"
29 #include "arrow/testing/gtest_util.h"
30 #include "arrow/type.h"
31 #include "arrow/type_fwd.h"
32
33 namespace arrow {
34 namespace csv {
35
36 struct WriterTestParams {
37 std::shared_ptr<Schema> schema;
38 std::string batch_data;
39 WriteOptions options;
40 std::string expected_output;
41 };
42
43 // Avoid Valgrind failures with GTest trying to represent a WriterTestParams
44 void PrintTo(const WriterTestParams& p, std::ostream* os) {
45 *os << "WriterTestParams(" << reinterpret_cast<const void*>(&p) << ")";
46 }
47
48 WriteOptions DefaultTestOptions(bool include_header) {
49 WriteOptions options;
50 options.batch_size = 5;
51 options.include_header = include_header;
52 return options;
53 }
54
55 std::vector<WriterTestParams> GenerateTestCases() {
56 auto abc_schema = schema({
57 {field("a", uint64())},
58 {field("b\"", utf8())},
59 {field("c ", int32())},
60 {field("d", date32())},
61 {field("e", date64())},
62 });
63 auto populated_batch = R"([{"a": 1, "c ": -1},
64 { "a": 1, "b\"": "abc\"efg", "c ": 2324},
65 { "b\"": "abcd", "c ": 5467},
66 { },
67 { "a": 546, "b\"": "", "c ": 517 },
68 { "a": 124, "b\"": "a\"\"b\"" },
69 { "d": 0 },
70 { "e": 86400000 }])";
71 std::string expected_without_header = std::string("1,,-1,,") + "\n" + // line 1
72 R"(1,"abc""efg",2324,,)" + "\n" + // line 2
73 R"(,"abcd",5467,,)" + "\n" + // line 3
74 R"(,,,,)" + "\n" + // line 4
75 R"(546,"",517,,)" + "\n" + // line 5
76 R"(124,"a""""b""",,,)" + "\n" + // line 6
77 R"(,,,1970-01-01,)" + "\n" + // line 7
78 R"(,,,,1970-01-02)" + "\n"; // line 8
79 std::string expected_header = std::string(R"("a","b""","c ","d","e")") + "\n";
80
81 return std::vector<WriterTestParams>{
82 {abc_schema, "[]", DefaultTestOptions(/*header=*/false), ""},
83 {abc_schema, "[]", DefaultTestOptions(/*header=*/true), expected_header},
84 {abc_schema, populated_batch, DefaultTestOptions(/*header=*/false),
85 expected_without_header},
86 {abc_schema, populated_batch, DefaultTestOptions(/*header=*/true),
87 expected_header + expected_without_header}};
88 }
89
90 class TestWriteCSV : public ::testing::TestWithParam<WriterTestParams> {
91 protected:
92 template <typename Data>
93 Result<std::string> ToCsvString(const Data& data, const WriteOptions& options) {
94 std::shared_ptr<io::BufferOutputStream> out;
95 ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create());
96
97 RETURN_NOT_OK(WriteCSV(data, options, out.get()));
98 ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer, out->Finish());
99 return std::string(reinterpret_cast<const char*>(buffer->data()), buffer->size());
100 }
101
102 Result<std::string> ToCsvStringUsingWriter(const Table& data,
103 const WriteOptions& options) {
104 std::shared_ptr<io::BufferOutputStream> out;
105 ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create());
106 // Write row-by-row
107 ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(out, data.schema(), options));
108 TableBatchReader reader(data);
109 reader.set_chunksize(1);
110 std::shared_ptr<RecordBatch> batch;
111 RETURN_NOT_OK(reader.ReadNext(&batch));
112 while (batch != nullptr) {
113 RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
114 RETURN_NOT_OK(reader.ReadNext(&batch));
115 }
116 RETURN_NOT_OK(writer->Close());
117 EXPECT_EQ(data.num_rows(), writer->stats().num_record_batches);
118 ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer, out->Finish());
119 return std::string(reinterpret_cast<const char*>(buffer->data()), buffer->size());
120 }
121 };
122
123 TEST_P(TestWriteCSV, TestWrite) {
124 ASSERT_OK_AND_ASSIGN(std::shared_ptr<io::BufferOutputStream> out,
125 io::BufferOutputStream::Create());
126 WriteOptions options = GetParam().options;
127 std::string csv;
128 auto record_batch = RecordBatchFromJSON(GetParam().schema, GetParam().batch_data);
129 ASSERT_OK_AND_ASSIGN(csv, ToCsvString(*record_batch, options));
130 EXPECT_EQ(csv, GetParam().expected_output);
131
132 // Batch size shouldn't matter.
133 options.batch_size /= 2;
134 ASSERT_OK_AND_ASSIGN(csv, ToCsvString(*record_batch, options));
135 EXPECT_EQ(csv, GetParam().expected_output);
136
137 // Table and Record batch should work identically.
138 ASSERT_OK_AND_ASSIGN(std::shared_ptr<Table> table,
139 Table::FromRecordBatches({record_batch}));
140 ASSERT_OK_AND_ASSIGN(csv, ToCsvString(*table, options));
141 EXPECT_EQ(csv, GetParam().expected_output);
142
143 // The writer should work identically.
144 ASSERT_OK_AND_ASSIGN(csv, ToCsvStringUsingWriter(*table, options));
145 EXPECT_EQ(csv, GetParam().expected_output);
146 }
147
148 INSTANTIATE_TEST_SUITE_P(MultiColumnWriteCSVTest, TestWriteCSV,
149 ::testing::ValuesIn(GenerateTestCases()));
150
151 INSTANTIATE_TEST_SUITE_P(SingleColumnWriteCSVTest, TestWriteCSV,
152 ::testing::Values(WriterTestParams{
153 schema({field("int64", int64())}),
154 R"([{ "int64": 9999}, {}, { "int64": -15}])", WriteOptions(),
155 R"("int64")"
156 "\n9999\n\n-15\n"}));
157
158 } // namespace csv
159 } // namespace arrow