]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/csv/test_common.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / csv / test_common.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include "arrow/csv/test_common.h"
19
20 #include "arrow/testing/gtest_util.h"
21
22 namespace arrow {
23 namespace csv {
24
25 std::string MakeCSVData(std::vector<std::string> lines) {
26 std::string s;
27 for (const auto& line : lines) {
28 s += line;
29 }
30 return s;
31 }
32
33 void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, int32_t num_cols,
34 std::shared_ptr<BlockParser>* out) {
35 auto csv = MakeCSVData(lines);
36 auto parser = std::make_shared<BlockParser>(options, num_cols);
37 uint32_t out_size;
38 ASSERT_OK(parser->Parse(util::string_view(csv), &out_size));
39 ASSERT_EQ(out_size, csv.size()) << "trailing CSV data not parsed";
40 *out = parser;
41 }
42
43 void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
44 std::shared_ptr<BlockParser>* out) {
45 return MakeCSVParser(lines, options, -1, out);
46 }
47
48 void MakeCSVParser(std::vector<std::string> lines, std::shared_ptr<BlockParser>* out) {
49 MakeCSVParser(lines, ParseOptions::Defaults(), out);
50 }
51
52 void MakeColumnParser(std::vector<std::string> items, std::shared_ptr<BlockParser>* out) {
53 auto options = ParseOptions::Defaults();
54 // Need this to test for null (empty) values
55 options.ignore_empty_lines = false;
56 std::vector<std::string> lines;
57 for (const auto& item : items) {
58 lines.push_back(item + '\n');
59 }
60 MakeCSVParser(lines, options, 1, out);
61 ASSERT_EQ((*out)->num_cols(), 1) << "Should have seen only 1 CSV column";
62 ASSERT_EQ((*out)->num_rows(), items.size());
63 }
64
65 namespace {
66
67 const std::vector<std::string> int64_rows = {"123", "4", "-317005557", "", "N/A", "0"};
68 const std::vector<std::string> float_rows = {"0", "123.456", "-3170.55766", "", "N/A"};
69 const std::vector<std::string> decimal128_rows = {"0", "123.456", "-3170.55766",
70 "", "N/A", "1233456789.123456789"};
71 const std::vector<std::string> iso8601_rows = {"1917-10-17", "2018-09-13",
72 "1941-06-22 04:00", "1945-05-09 09:45:38"};
73 const std::vector<std::string> strptime_rows = {"10/17/1917", "9/13/2018", "9/5/1945"};
74
75 static void WriteHeader(std::ostream& writer) {
76 writer << "Int64,Float,Decimal128,ISO8601,Strptime" << std::endl;
77 }
78
79 static std::string GetCell(const std::vector<std::string>& base_rows, size_t row_index) {
80 return base_rows[row_index % base_rows.size()];
81 }
82
83 static void WriteRow(std::ostream& writer, size_t row_index) {
84 writer << GetCell(int64_rows, row_index);
85 writer << ',';
86 writer << GetCell(float_rows, row_index);
87 writer << ',';
88 writer << GetCell(decimal128_rows, row_index);
89 writer << ',';
90 writer << GetCell(iso8601_rows, row_index);
91 writer << ',';
92 writer << GetCell(strptime_rows, row_index);
93 writer << std::endl;
94 }
95
96 static void WriteInvalidRow(std::ostream& writer, size_t row_index) {
97 writer << "\"" << std::endl << "\"";
98 writer << std::endl;
99 }
100 } // namespace
101
102 Result<std::shared_ptr<Buffer>> MakeSampleCsvBuffer(
103 size_t num_rows, std::function<bool(size_t)> is_valid) {
104 std::stringstream writer;
105
106 WriteHeader(writer);
107 for (size_t i = 0; i < num_rows; ++i) {
108 if (!is_valid || is_valid(i)) {
109 WriteRow(writer, i);
110 } else {
111 WriteInvalidRow(writer, i);
112 }
113 }
114
115 auto table_str = writer.str();
116 auto table_buffer = std::make_shared<Buffer>(table_str);
117 return MemoryManager::CopyBuffer(table_buffer, default_cpu_memory_manager());
118 }
119
120 } // namespace csv
121 } // namespace arrow