]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file | |
3 | // distributed with this work for additional information | |
4 | // regarding copyright ownership. The ASF licenses this file | |
5 | // to you under the Apache License, Version 2.0 (the | |
6 | // "License"); you may not use this file except in compliance | |
7 | // with the License. You may obtain a copy of the License at | |
8 | // | |
9 | // http://www.apache.org/licenses/LICENSE-2.0 | |
10 | // | |
11 | // Unless required by applicable law or agreed to in writing, | |
12 | // software distributed under the License is distributed on an | |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | // KIND, either express or implied. See the License for the | |
15 | // specific language governing permissions and limitations | |
16 | // under the License. | |
17 | ||
18 | #include "arrow/csv/test_common.h" | |
19 | ||
20 | #include "arrow/testing/gtest_util.h" | |
21 | ||
22 | namespace arrow { | |
23 | namespace csv { | |
24 | ||
25 | std::string MakeCSVData(std::vector<std::string> lines) { | |
26 | std::string s; | |
27 | for (const auto& line : lines) { | |
28 | s += line; | |
29 | } | |
30 | return s; | |
31 | } | |
32 | ||
33 | void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, int32_t num_cols, | |
34 | std::shared_ptr<BlockParser>* out) { | |
35 | auto csv = MakeCSVData(lines); | |
36 | auto parser = std::make_shared<BlockParser>(options, num_cols); | |
37 | uint32_t out_size; | |
38 | ASSERT_OK(parser->Parse(util::string_view(csv), &out_size)); | |
39 | ASSERT_EQ(out_size, csv.size()) << "trailing CSV data not parsed"; | |
40 | *out = parser; | |
41 | } | |
42 | ||
43 | void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, | |
44 | std::shared_ptr<BlockParser>* out) { | |
45 | return MakeCSVParser(lines, options, -1, out); | |
46 | } | |
47 | ||
48 | void MakeCSVParser(std::vector<std::string> lines, std::shared_ptr<BlockParser>* out) { | |
49 | MakeCSVParser(lines, ParseOptions::Defaults(), out); | |
50 | } | |
51 | ||
52 | void MakeColumnParser(std::vector<std::string> items, std::shared_ptr<BlockParser>* out) { | |
53 | auto options = ParseOptions::Defaults(); | |
54 | // Need this to test for null (empty) values | |
55 | options.ignore_empty_lines = false; | |
56 | std::vector<std::string> lines; | |
57 | for (const auto& item : items) { | |
58 | lines.push_back(item + '\n'); | |
59 | } | |
60 | MakeCSVParser(lines, options, 1, out); | |
61 | ASSERT_EQ((*out)->num_cols(), 1) << "Should have seen only 1 CSV column"; | |
62 | ASSERT_EQ((*out)->num_rows(), items.size()); | |
63 | } | |
64 | ||
65 | namespace { | |
66 | ||
67 | const std::vector<std::string> int64_rows = {"123", "4", "-317005557", "", "N/A", "0"}; | |
68 | const std::vector<std::string> float_rows = {"0", "123.456", "-3170.55766", "", "N/A"}; | |
69 | const std::vector<std::string> decimal128_rows = {"0", "123.456", "-3170.55766", | |
70 | "", "N/A", "1233456789.123456789"}; | |
71 | const std::vector<std::string> iso8601_rows = {"1917-10-17", "2018-09-13", | |
72 | "1941-06-22 04:00", "1945-05-09 09:45:38"}; | |
73 | const std::vector<std::string> strptime_rows = {"10/17/1917", "9/13/2018", "9/5/1945"}; | |
74 | ||
75 | static void WriteHeader(std::ostream& writer) { | |
76 | writer << "Int64,Float,Decimal128,ISO8601,Strptime" << std::endl; | |
77 | } | |
78 | ||
79 | static std::string GetCell(const std::vector<std::string>& base_rows, size_t row_index) { | |
80 | return base_rows[row_index % base_rows.size()]; | |
81 | } | |
82 | ||
83 | static void WriteRow(std::ostream& writer, size_t row_index) { | |
84 | writer << GetCell(int64_rows, row_index); | |
85 | writer << ','; | |
86 | writer << GetCell(float_rows, row_index); | |
87 | writer << ','; | |
88 | writer << GetCell(decimal128_rows, row_index); | |
89 | writer << ','; | |
90 | writer << GetCell(iso8601_rows, row_index); | |
91 | writer << ','; | |
92 | writer << GetCell(strptime_rows, row_index); | |
93 | writer << std::endl; | |
94 | } | |
95 | ||
96 | static void WriteInvalidRow(std::ostream& writer, size_t row_index) { | |
97 | writer << "\"" << std::endl << "\""; | |
98 | writer << std::endl; | |
99 | } | |
100 | } // namespace | |
101 | ||
102 | Result<std::shared_ptr<Buffer>> MakeSampleCsvBuffer( | |
103 | size_t num_rows, std::function<bool(size_t)> is_valid) { | |
104 | std::stringstream writer; | |
105 | ||
106 | WriteHeader(writer); | |
107 | for (size_t i = 0; i < num_rows; ++i) { | |
108 | if (!is_valid || is_valid(i)) { | |
109 | WriteRow(writer, i); | |
110 | } else { | |
111 | WriteInvalidRow(writer, i); | |
112 | } | |
113 | } | |
114 | ||
115 | auto table_str = writer.str(); | |
116 | auto table_buffer = std::make_shared<Buffer>(table_str); | |
117 | return MemoryManager::CopyBuffer(table_buffer, default_cpu_memory_manager()); | |
118 | } | |
119 | ||
120 | } // namespace csv | |
121 | } // namespace arrow |