]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/csv/test_common.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / csv / test_common.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include "arrow/csv/test_common.h"
19
20#include "arrow/testing/gtest_util.h"
21
22namespace arrow {
23namespace csv {
24
25std::string MakeCSVData(std::vector<std::string> lines) {
26 std::string s;
27 for (const auto& line : lines) {
28 s += line;
29 }
30 return s;
31}
32
33void MakeCSVParser(std::vector<std::string> lines, ParseOptions options, int32_t num_cols,
34 std::shared_ptr<BlockParser>* out) {
35 auto csv = MakeCSVData(lines);
36 auto parser = std::make_shared<BlockParser>(options, num_cols);
37 uint32_t out_size;
38 ASSERT_OK(parser->Parse(util::string_view(csv), &out_size));
39 ASSERT_EQ(out_size, csv.size()) << "trailing CSV data not parsed";
40 *out = parser;
41}
42
43void MakeCSVParser(std::vector<std::string> lines, ParseOptions options,
44 std::shared_ptr<BlockParser>* out) {
45 return MakeCSVParser(lines, options, -1, out);
46}
47
48void MakeCSVParser(std::vector<std::string> lines, std::shared_ptr<BlockParser>* out) {
49 MakeCSVParser(lines, ParseOptions::Defaults(), out);
50}
51
52void MakeColumnParser(std::vector<std::string> items, std::shared_ptr<BlockParser>* out) {
53 auto options = ParseOptions::Defaults();
54 // Need this to test for null (empty) values
55 options.ignore_empty_lines = false;
56 std::vector<std::string> lines;
57 for (const auto& item : items) {
58 lines.push_back(item + '\n');
59 }
60 MakeCSVParser(lines, options, 1, out);
61 ASSERT_EQ((*out)->num_cols(), 1) << "Should have seen only 1 CSV column";
62 ASSERT_EQ((*out)->num_rows(), items.size());
63}
64
65namespace {
66
67const std::vector<std::string> int64_rows = {"123", "4", "-317005557", "", "N/A", "0"};
68const std::vector<std::string> float_rows = {"0", "123.456", "-3170.55766", "", "N/A"};
69const std::vector<std::string> decimal128_rows = {"0", "123.456", "-3170.55766",
70 "", "N/A", "1233456789.123456789"};
71const std::vector<std::string> iso8601_rows = {"1917-10-17", "2018-09-13",
72 "1941-06-22 04:00", "1945-05-09 09:45:38"};
73const std::vector<std::string> strptime_rows = {"10/17/1917", "9/13/2018", "9/5/1945"};
74
75static void WriteHeader(std::ostream& writer) {
76 writer << "Int64,Float,Decimal128,ISO8601,Strptime" << std::endl;
77}
78
79static std::string GetCell(const std::vector<std::string>& base_rows, size_t row_index) {
80 return base_rows[row_index % base_rows.size()];
81}
82
83static void WriteRow(std::ostream& writer, size_t row_index) {
84 writer << GetCell(int64_rows, row_index);
85 writer << ',';
86 writer << GetCell(float_rows, row_index);
87 writer << ',';
88 writer << GetCell(decimal128_rows, row_index);
89 writer << ',';
90 writer << GetCell(iso8601_rows, row_index);
91 writer << ',';
92 writer << GetCell(strptime_rows, row_index);
93 writer << std::endl;
94}
95
96static void WriteInvalidRow(std::ostream& writer, size_t row_index) {
97 writer << "\"" << std::endl << "\"";
98 writer << std::endl;
99}
100} // namespace
101
102Result<std::shared_ptr<Buffer>> MakeSampleCsvBuffer(
103 size_t num_rows, std::function<bool(size_t)> is_valid) {
104 std::stringstream writer;
105
106 WriteHeader(writer);
107 for (size_t i = 0; i < num_rows; ++i) {
108 if (!is_valid || is_valid(i)) {
109 WriteRow(writer, i);
110 } else {
111 WriteInvalidRow(writer, i);
112 }
113 }
114
115 auto table_str = writer.str();
116 auto table_buffer = std::make_shared<Buffer>(table_str);
117 return MemoryManager::CopyBuffer(table_buffer, default_cpu_memory_manager());
118}
119
120} // namespace csv
121} // namespace arrow